Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, Request, Response | |
from pydantic import BaseModel | |
from typing import Union, Dict, List, Any | |
import requests | |
import base64 | |
from typing import List, Dict, Union | |
import json | |
import requests | |
import base64 | |
from typing import List, Dict, Union | |
class LLM: | |
def __init__(self, model: str, system_message: str = "You are a Helpful AI."): | |
self.model = model | |
self.conversation_history = [{"role": "system", "content": system_message}] | |
def chat(self, messages: List[Dict[str, str]]) -> Union[str, None]: | |
url = "https://api.deepinfra.com/v1/openai/chat/completions" | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', | |
'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3', | |
'Cache-Control': 'no-cache', | |
'Connection': 'keep-alive', | |
'Content-Type': 'application/json', | |
'Origin': 'https://deepinfra.com', | |
'Pragma': 'no-cache', | |
'Referer': 'https://deepinfra.com/', | |
'Sec-Fetch-Dest': 'empty', | |
'Sec-Fetch-Mode': 'cors', | |
'Sec-Fetch-Site': 'same-site', | |
'X-Deepinfra-Source': 'web-embed', | |
'accept': 'text/event-stream', | |
'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"', | |
'sec-ch-ua-mobile': '?0', | |
'sec-ch-ua-platform': '"macOS"' | |
} | |
data = json.dumps( | |
{ | |
'model': self.model, | |
'messages': messages, | |
'temperature': 0.7, | |
'max_tokens': 8028, | |
'stop': [], | |
'stream': False #dont change it | |
}, separators=(',', ':') | |
) | |
try: | |
result = requests.post(url=url, data=data, headers=headers) | |
return result.json()['choices'][0]['message']['content'] | |
except: | |
return None | |
app = FastAPI() | |
class Model(BaseModel): | |
id: str | |
object: str | |
created: int | |
owned_by: str | |
class Message(BaseModel): | |
role: str | |
content: str | |
class CompletionRequest(BaseModel): | |
model: str | |
messages: List[Message] | |
class CompletionResponse(BaseModel): | |
id: str | |
object: str | |
created: int | |
model: str | |
choices: List[Dict[str, Any]] | |
usage: Dict[str, int] | |
models = [ | |
{"id": "meta-llama/Meta-Llama-3-70B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}, | |
{"id": "google/gemma-2-27b-it", "object": "model", "created": 1686935002, "owned_by": "meta"}, | |
{"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"}, | |
{"id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", "object": "model", "created": 1686935002, "owned_by": "cognitivecomputations"}, | |
{"id": "nvidia/Nemotron-4-340B-Instruct", "object": "model", "created": 1686935002, "owned_by": "nvidia"}, | |
{"id": "Qwen/Qwen2-72B-Instruct", "object": "model", "created": 1686935002, "owned_by": "qwen"}, | |
{"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"}, | |
{"id": "openchat/openchat-3.6-8b", "object": "model", "created": 1686935002, "owned_by": "unknown"}, | |
{"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model", "created": 1686935002, "owned_by": "mistral"}, | |
{"id": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}, | |
{"id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"}, | |
{"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"}, | |
{"id": "Qwen/Qwen2-7B-Instruct", "object": "model", "created": 1686935002, "owned_by": "Qwen"}, | |
{"id": "meta-llama/Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"} | |
] | |
def handle_completions(completion_request: CompletionRequest): | |
system_prompt = next((message.content for message in completion_request.messages if message.role == 'system'), None) | |
user_query = next((message.content for message in completion_request.messages if message.role == 'user'), None) | |
response_text = generative(query=user_query, system_prompt=system_prompt, model=completion_request.model) | |
response = CompletionResponse( | |
id="chatcmpl-1", | |
object="chat.completion", | |
created=1234567890, | |
model=completion_request.model, | |
choices=[{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}], | |
usage={"prompt_tokens": sum(len(message.content.split()) for message in completion_request.messages), "total_tokens": sum(len(message.content.split()) for message in completion_request.messages) + len(response_text.split())} | |
) | |
return response | |
def get_models(): | |
return {"object": "list", "data": models} | |
def create_completion(prompt: str, model: str, best_of: int = 1, echo: bool = False, frequency_penalty: float = 0.0): | |
response_text = generative(prompt, "you are an helpful assistant", model) | |
response = { | |
"id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", | |
"object": "text_completion", | |
"created": 1589478378, | |
"model": model, | |
"system_fingerprint": "fp_44709d6fcb", | |
"choices": [{"text": response_text, "index": 0, "logprobs": None, "finish_reason": "length"}] | |
} | |
return response | |
def generative(system_prompt, query, model): | |
llm = LLM(model=model, system_message=system_prompt) | |
messages = [{"role": "user", "content": query}] | |
response = llm.chat(messages) | |
return response | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="0.0.0.0", port=8000) |