Spaces:
Running
Running
from fastapi import FastAPI, HTTPException, Request | |
from pydantic import BaseModel | |
from transformers import AutoTokenizer, AutoModel | |
import torch | |
from typing import List, Dict | |
import uvicorn | |
# 定义请求和响应模型 | |
class EmbeddingRequest(BaseModel): | |
input: str | |
model: str = "jinaai/jina-embeddings-v3" | |
class EmbeddingResponse(BaseModel): | |
status: str | |
embeddings: List[List[float]] | |
# 创建FastAPI应用 | |
app = FastAPI( | |
title="Jina Embeddings API", | |
description="Text embedding generation service using jina-embeddings-v3", | |
version="1.0.0" | |
) | |
# 加载模型和分词器 | |
model_name = "jinaai/jina-embeddings-v3" | |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
model = AutoModel.from_pretrained(model_name, trust_remote_code=True) | |
@app.post("/generate_embeddings", response_model=EmbeddingResponse) | |
@app.post("/api/v1/embeddings", response_model=EmbeddingResponse) | |
@app.post("/hf/v1/embeddings", response_model=EmbeddingResponse) | |
@app.post("/api/v1/chat/completions", response_model=EmbeddingResponse) | |
@app.post("/hf/v1/chat/completions", response_model=EmbeddingResponse) | |
async def generate_embeddings(request: EmbeddingRequest): | |
try: | |
# 使用分词器处理输入文本 | |
inputs = tokenizer(request.input, return_tensors="pt", truncation=True, max_length=512) | |
# 生成嵌入 | |
with torch.no_grad(): | |
embeddings = model(**inputs).last_hidden_state.mean(dim=1) | |
return EmbeddingResponse( | |
status="success", | |
embeddings=embeddings.numpy().tolist() | |
) | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |
@app.get("/") | |
async def root(): | |
return { | |
"status": "active", | |
"model": model_name, | |
"usage": "Send POST request to /generate_embeddings or /api/v1/embeddings or /hf/v1/embeddings" | |
} | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=7860) |