我用了 flask 和 fastapi 部署都这中情况。为了 gpt 回答的不太行
server 端:
# -*- coding: utf-8 -*-
import time
import requests
import numpy as np
from loguru import logger
from fastapi import FastAPI, Request
from transformers import AutoTokenizer, AutoModel
import torch
import uvicorn
app = FastAPI()
# Load model and tokenizer
# model_name = "Alibaba-NLP/gte-Qwen2-7B-instruct"
model_name = 'DMetaSoul/Dmeta-embedding-zh'
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModel.from_pretrained(model_name, trust_remote_code=True).half().cuda()
model.eval()
# Model warm-up
dummy_input = ["预热"]
inputs = tokenizer(dummy_input, return_tensors="pt", padding=True, truncation=True).to("cuda")
with torch.no_grad():
_ = model(**inputs)
@app.post("/vectorize")
async def vectorize(request: Request):
data = await request.json()
texts = data.get("texts", [])
if not texts:
return {"error": "No texts provided"}
try:
start = time.time()
inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to("cuda")
with torch.no_grad():
outputs = model(**inputs)
vectors = outputs.last_hidden_state.mean(dim=1).cpu().numpy().tolist()
logger.info(f'转化耗时: {time.time() - start:.2f} seconds')
return {"vectors": vectors}
except Exception as e:
logger.error(f"Error occurred: {str(e)}")
return {"error": str(e)}
# Run FastAPI server
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=5000)
客户端:
# -*- coding: utf-8 -*-
import time
import requests
import numpy as np
from loguru import logger
class TextVectorizationClient:
def __init__(self, api_url="http://localhost:5000"):
self.api_url = api_url
def vectorize(self, text):
start_time = time.time()
response = requests.post(f"{self.api_url}/vectorize", json={"texts": [text]})
end_time = time.time()
print('接口响应时间:', end_time - start_time)
if response.status_code == 200:
return np.array(response.json()["vectors"][0])
else:
raise Exception(f"API request failed: {response.json().get('error', 'Unknown error')}")
if __name__ == "__main__":
client = TextVectorizationClient()
text = """
天津历史上名医辈出,中医和中西医结合的发展成就位居全国前列。和平区作为天津市中心城区的核心区域,是近代津沽名中医的汇聚地,也是中医药事业的奠基之地。天津市和平区中医医院开展了“寻访近代津沽名中医和平印迹活动”,旨在挖掘中医药文化的传承脉络,践行天津市中医药强市行动计划的“文化”要求。
"""
start_time = time.time()
vector = client.vectorize(text)
logger.info(f"Vectorization completed in {time.time() - start_time:.2f} seconds.")
#服务器 log:
2024-11-13 17:49:26.580 | INFO | __main__:vectorize:40 - 转化耗时: 0.12 seconds
INFO: 127.0.0.1:52711 - "POST /vectorize HTTP/1.1" 200 OK
INFO: 127.0.0.1:52739 - "POST /vectorize HTTP/1.1" 200 OK
2024-11-13 17:49:28.740 | INFO | __main__:vectorize:40 - 转化耗时: 0.12 seconds
# 客户端 log:
接口响应时间:2.15596079826355
接口响应时间:2.1456186771392822
这是一个专为移动设备优化的页面(即为了让你能够在 Google 搜索结果里秒开这个页面),如果你希望参与 V2EX 社区的讨论,你可以继续到 V2EX 上打开本讨论主题的完整版本。
V2EX 是创意工作者们的社区,是一个分享自己正在做的有趣事物、交流想法,可以遇见新朋友甚至新机会的地方。
V2EX is a community of developers, designers and creative people.