您现在的位置是:首页 >技术交流 >阿里云部署ui-tar模型网站首页技术交流
阿里云部署ui-tar模型
简介阿里云部署ui-tar模型
1、快捷部署
2、访问测试
import os
import json
import base64
import io
import openai
import re
import math
from PIL import Image, ImageDraw
def encode_image(image):
"""Encode image to base64 string."""
img_bytes = io.BytesIO()
image.save(img_bytes, format="PNG")
return base64.b64encode(img_bytes.getvalue()).decode("utf-8")
def resize_image(image):
"""Resize image to control the maximum number of pixels."""
max_pixels = 6000 * 28 * 28
if image.width * image.height > max_pixels:
max_pixels = 2700 * 28 * 28
else:
max_pixels = 1340 * 28 * 28
resize_factor = math.sqrt(max_pixels / (image.width * image.height))
width, height = int(image.width * resize_factor), int(image.height * resize_factor)
return image.resize((width, height))
def draw_coordinates(image, coordinates):
"""Draw the detected coordinates on the image."""
draw = ImageDraw.Draw(image)
radius = min(image.width, image.height) // 15
x, y = coordinates
x, y = round(x / 1000 * image.width), round(y / 1000 * image.height)
draw.ellipse((x - radius, y - radius, x + radius, y + radius), outline='red', width=2)
draw.ellipse((x - 2, y - 2, x + 2, y + 2), fill='red')
return image
def send_request(image, query):
"""Send request to the OpenAI API."""
# 配置 OpenAI API 的地址和密钥
openai.api_base = "your访问地址/v1"
openai.api_key = "your token"
model_name = "UI-TARS-7B-DPO" # 你的模型名称
if not openai.api_base or not openai.api_key:
raise ValueError("Missing OPENAI_API_BASE or OPENAI_API_KEY in environment variables.")
# 压缩图片
image = resize_image(image)
# 编码图片
base64_image = encode_image(image)
# 构造请求数据
messages = [
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
{"type": "text", "text": "Output only the coordinate of one box in your response. " + query},
],
}
]
response = openai.ChatCompletion.create(
model=model_name,
messages=messages,
temperature=1.0,
top_p=0.7,
max_tokens=128,
frequency_penalty=1
)
output_text = response["choices"][0]["message"]["content"]
pattern = r"((d+),(d+))"
match = re.search(pattern, output_text)
if match:
coordinates = (int(match.group(1)), int(match.group(2)))
return coordinates
else:
raise ValueError("No valid coordinates found in model response.")
# 测试代码
if __name__ == "__main__":
test_image = Image.open(r"your image path") # 替换为你的测试图片路径
test_query = "报名方式下拉按钮"
coordinates = send_request(test_image, test_query)
print(f"Detected coordinates: {coordinates}")
result_image = draw_coordinates(test_image, coordinates)
result_image.show()
预训练模型是model name
访问地址和token在调用信息中。
风语者!平时喜欢研究各种技术,目前在从事后端开发工作,热爱生活、热爱工作。