File size: 5,959 Bytes
d19a99e
 
 
2627be2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84b695c
 
84009f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0632cf6
84009f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0632cf6
84009f1
 
 
0632cf6
84009f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84b695c
 
d19a99e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
from fastapi import FastAPI, Request, Response
from pydantic import BaseModel
from typing import Union, Dict, List, Any
import requests
import base64
from typing import List, Dict, Union
import json
import requests
import base64
from typing import List, Dict, Union

class LLM:
    def __init__(self, model: str, system_message: str = "You are a Helpful AI."):
        self.model = model
        self.conversation_history = [{"role": "system", "content": system_message}]

    def chat(self, messages: List[Dict[str, str]]) -> Union[str, None]:
        url = "https://api.deepinfra.com/v1/openai/chat/completions"
        headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
            'Accept-Language': 'en,fr-FR;q=0.9,fr;q=0.8,es-ES;q=0.7,es;q=0.6,en-US;q=0.5,am;q=0.4,de;q=0.3',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Content-Type': 'application/json',
            'Origin': 'https://deepinfra.com',
            'Pragma': 'no-cache',
            'Referer': 'https://deepinfra.com/',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site',
            'X-Deepinfra-Source': 'web-embed',
            'accept': 'text/event-stream',
            'sec-ch-ua': '"Google Chrome";v="119", "Chromium";v="119", "Not?A_Brand";v="24"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"macOS"'
        }
        data = json.dumps(
            {
                'model': self.model,
                'messages': messages,
                'temperature': 0.7,
                'max_tokens': 8028,
                'stop': [],
                'stream': False #dont change it
            }, separators=(',', ':')
        )
        try:
            result = requests.post(url=url, data=data, headers=headers)
            return result.json()['choices'][0]['message']['content']
        except:
            return None

app = FastAPI()

class Model(BaseModel):
    id: str
    object: str
    created: int
    owned_by: str

class Message(BaseModel):
    role: str
    content: str

class CompletionRequest(BaseModel):
    model: str
    messages: List[Message]

class CompletionResponse(BaseModel):
    id: str
    object: str
    created: int
    model: str
    choices: List[Dict[str, Any]]
    usage: Dict[str, int]

models = [
    {"id": "meta-llama/Meta-Llama-3-70B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"},
    {"id": "google/gemma-2-27b-it", "object": "model", "created": 1686935002, "owned_by": "meta"},
    {"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"},
    {"id": "cognitivecomputations/dolphin-2.9.1-llama-3-70b", "object": "model", "created": 1686935002, "owned_by": "cognitivecomputations"},
    {"id": "nvidia/Nemotron-4-340B-Instruct", "object": "model", "created": 1686935002, "owned_by": "nvidia"},
    {"id": "Qwen/Qwen2-72B-Instruct", "object": "model", "created": 1686935002, "owned_by": "qwen"},
    {"id": "google/gemma-2-9b-it", "object": "model", "created": 1686935002, "owned_by": "ConsiousAI"},
    {"id": "openchat/openchat-3.6-8b", "object": "model", "created": 1686935002, "owned_by": "unknown"},
    {"id": "mistralai/Mistral-7B-Instruct-v0.3", "object": "model", "created": 1686935002, "owned_by": "mistral"},
    {"id": "meta-llama/Meta-Llama-3-8B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"},
    {"id": "mistralai/Mixtral-8x22B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"},
    {"id": "mistralai/Mixtral-8x7B-Instruct-v0.1", "object": "model", "created": 1686935002, "owned_by": "mistral"},
    {"id": "Qwen/Qwen2-7B-Instruct", "object": "model", "created": 1686935002, "owned_by": "Qwen"},
    {"id": "meta-llama/Meta-Llama-3.1-405B-Instruct", "object": "model", "created": 1686935002, "owned_by": "meta"}

]

@app.post("/v1/chat/completions")
def handle_completions(completion_request: CompletionRequest):
    system_prompt = next((message.content for message in completion_request.messages if message.role == 'system'), None)
    user_query = next((message.content for message in completion_request.messages if message.role == 'user'), None)

    response_text = generative(query=user_query, system_prompt=system_prompt, model=completion_request.model)

    response = CompletionResponse(
        id="chatcmpl-1",
        object="chat.completion",
        created=1234567890,
        model=completion_request.model,
        choices=[{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}],
        usage={"prompt_tokens": sum(len(message.content.split()) for message in completion_request.messages), "total_tokens": sum(len(message.content.split()) for message in completion_request.messages) + len(response_text.split())}
    )
    return response

@app.get("/v1/models")
def get_models():
    return {"object": "list", "data": models}

@app.post("/v1/completions")
def create_completion(prompt: str, model: str, best_of: int = 1, echo: bool = False, frequency_penalty: float = 0.0):
    response_text = generative(prompt, "you are an helpful assistant", model)

    response = {
        "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
        "object": "text_completion",
        "created": 1589478378,
        "model": model,
        "system_fingerprint": "fp_44709d6fcb",
        "choices": [{"text": response_text, "index": 0, "logprobs": None, "finish_reason": "length"}]
    }
    return response

def generative(system_prompt, query, model):
    llm = LLM(model=model, system_message=system_prompt)
    messages = [{"role": "user", "content": query}]
    response = llm.chat(messages)
    return response

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)