Spaces:
Running
Running
API-Handler
commited on
Upload 10 files
Browse files- Dockerfile +20 -0
- TYPEGPT/typegpt.py +409 -0
- TYPEGPT/typegpt_normal.py +231 -0
- api_info.py +176 -0
- fastapi_app.py +132 -0
- flask_app.py +131 -0
- query.md +384 -0
- requirements.txt +7 -0
- typegpt_api.py +254 -0
- usage_inference.py +158 -0
Dockerfile
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as the base image
|
2 |
+
FROM python:3.9-slim
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
# Copy the requirements file into the container
|
8 |
+
COPY requirements.txt .
|
9 |
+
|
10 |
+
# Install the required packages
|
11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
12 |
+
|
13 |
+
# Copy the rest of the application code into the container
|
14 |
+
COPY . .
|
15 |
+
|
16 |
+
# Expose the port that FastAPI will run on
|
17 |
+
EXPOSE 7860
|
18 |
+
|
19 |
+
# Command to run the FastAPI application
|
20 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
TYPEGPT/typegpt.py
ADDED
@@ -0,0 +1,409 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
from typing import *
|
4 |
+
|
5 |
+
from webscout.AIutel import Optimizers
|
6 |
+
from webscout.AIutel import Conversation
|
7 |
+
from webscout.AIutel import AwesomePrompts
|
8 |
+
from webscout.AIbase import Provider
|
9 |
+
from webscout import exceptions
|
10 |
+
|
11 |
+
class TypeGPT(Provider):
|
12 |
+
"""
|
13 |
+
A class to interact with the TypeGPT.net API. Improved to match webscout standards.
|
14 |
+
"""
|
15 |
+
url = "https://chat.typegpt.net"
|
16 |
+
working = True
|
17 |
+
supports_message_history = True
|
18 |
+
|
19 |
+
models = [
|
20 |
+
# OpenAI Models
|
21 |
+
"gpt-3.5-turbo",
|
22 |
+
"gpt-3.5-turbo-202201",
|
23 |
+
"gpt-4o",
|
24 |
+
"gpt-4o-2024-05-13",
|
25 |
+
"o1-preview",
|
26 |
+
|
27 |
+
# Claude Models
|
28 |
+
"claude",
|
29 |
+
"claude-3-5-sonnet",
|
30 |
+
"claude-sonnet-3.5",
|
31 |
+
"claude-3-5-sonnet-20240620",
|
32 |
+
|
33 |
+
# Meta/LLaMA Models
|
34 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
35 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
36 |
+
"@cf/meta/llama-3-8b-instruct",
|
37 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
38 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
39 |
+
"llama-3.1-405b",
|
40 |
+
"llama-3.1-70b",
|
41 |
+
"llama-3.1-8b",
|
42 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
43 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
44 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
45 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
46 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
47 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
48 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
49 |
+
"meta-llama/Llama-Guard-3-8B",
|
50 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
51 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
52 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
53 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
54 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
55 |
+
|
56 |
+
# Mistral Models
|
57 |
+
"mistral",
|
58 |
+
"mistral-large",
|
59 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
60 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
61 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
62 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
63 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
64 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
65 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
66 |
+
|
67 |
+
# Qwen Models
|
68 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
69 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
70 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
71 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
72 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
73 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
74 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
75 |
+
|
76 |
+
# Google/Gemini Models
|
77 |
+
"@cf/google/gemma-2b-it-lora",
|
78 |
+
"@cf/google/gemma-7b-it-lora",
|
79 |
+
"@hf/google/gemma-7b-it",
|
80 |
+
"google/gemma-1.1-2b-it",
|
81 |
+
"google/gemma-1.1-7b-it",
|
82 |
+
"gemini-pro",
|
83 |
+
"gemini-1.5-pro",
|
84 |
+
"gemini-1.5-pro-latest",
|
85 |
+
"gemini-1.5-flash",
|
86 |
+
|
87 |
+
# Cohere Models
|
88 |
+
"c4ai-aya-23-35b",
|
89 |
+
"c4ai-aya-23-8b",
|
90 |
+
"command",
|
91 |
+
"command-light",
|
92 |
+
"command-light-nightly",
|
93 |
+
"command-nightly",
|
94 |
+
"command-r",
|
95 |
+
"command-r-08-2024",
|
96 |
+
"command-r-plus",
|
97 |
+
"command-r-plus-08-2024",
|
98 |
+
"rerank-english-v2.0",
|
99 |
+
"rerank-english-v3.0",
|
100 |
+
"rerank-multilingual-v2.0",
|
101 |
+
"rerank-multilingual-v3.0",
|
102 |
+
|
103 |
+
# Microsoft Models
|
104 |
+
"@cf/microsoft/phi-2",
|
105 |
+
"microsoft/DialoGPT-medium",
|
106 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
107 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
108 |
+
"microsoft/Phi-3.5-mini-instruct",
|
109 |
+
"microsoft/WizardLM-2-8x22B",
|
110 |
+
|
111 |
+
# Yi Models
|
112 |
+
"01-ai/Yi-1.5-34B-Chat",
|
113 |
+
"01-ai/Yi-34B-Chat",
|
114 |
+
|
115 |
+
# Specialized Models and Tools
|
116 |
+
"@cf/deepseek-ai/deepseek-math-7b-base",
|
117 |
+
"@cf/deepseek-ai/deepseek-math-7b-instruct",
|
118 |
+
"@cf/defog/sqlcoder-7b-2",
|
119 |
+
"@cf/openchat/openchat-3.5-0106",
|
120 |
+
"@cf/thebloke/discolm-german-7b-v1-awq",
|
121 |
+
"@cf/tiiuae/falcon-7b-instruct",
|
122 |
+
"@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
|
123 |
+
"@hf/nexusflow/starling-lm-7b-beta",
|
124 |
+
"@hf/nousresearch/hermes-2-pro-mistral-7b",
|
125 |
+
"@hf/thebloke/deepseek-coder-6.7b-base-awq",
|
126 |
+
"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
|
127 |
+
"@hf/thebloke/llama-2-13b-chat-awq",
|
128 |
+
"@hf/thebloke/llamaguard-7b-awq",
|
129 |
+
"@hf/thebloke/neural-chat-7b-v3-1-awq",
|
130 |
+
"@hf/thebloke/openhermes-2.5-mistral-7b-awq",
|
131 |
+
"@hf/thebloke/zephyr-7b-beta-awq",
|
132 |
+
"AndroidDeveloper",
|
133 |
+
"AngularJSAgent",
|
134 |
+
"AzureAgent",
|
135 |
+
"BitbucketAgent",
|
136 |
+
"DigitalOceanAgent",
|
137 |
+
"DockerAgent",
|
138 |
+
"ElectronAgent",
|
139 |
+
"ErlangAgent",
|
140 |
+
"FastAPIAgent",
|
141 |
+
"FirebaseAgent",
|
142 |
+
"FlaskAgent",
|
143 |
+
"FlutterAgent",
|
144 |
+
"GitAgent",
|
145 |
+
"GitlabAgent",
|
146 |
+
"GoAgent",
|
147 |
+
"GodotAgent",
|
148 |
+
"GoogleCloudAgent",
|
149 |
+
"HTMLAgent",
|
150 |
+
"HerokuAgent",
|
151 |
+
"ImageGeneration",
|
152 |
+
"JavaAgent",
|
153 |
+
"JavaScriptAgent",
|
154 |
+
"MongoDBAgent",
|
155 |
+
"Next.jsAgent",
|
156 |
+
"PyTorchAgent",
|
157 |
+
"PythonAgent",
|
158 |
+
"ReactAgent",
|
159 |
+
"RepoMap",
|
160 |
+
"SwiftDeveloper",
|
161 |
+
"XcodeAgent",
|
162 |
+
"YoutubeAgent",
|
163 |
+
"blackboxai",
|
164 |
+
"blackboxai-pro",
|
165 |
+
"builderAgent",
|
166 |
+
"dify",
|
167 |
+
"flux",
|
168 |
+
"openchat/openchat-3.6-8b",
|
169 |
+
"rtist",
|
170 |
+
"searchgpt",
|
171 |
+
"sur",
|
172 |
+
"sur-mistral",
|
173 |
+
"unity"
|
174 |
+
]
|
175 |
+
|
176 |
+
def __init__(
|
177 |
+
self,
|
178 |
+
is_conversation: bool = True,
|
179 |
+
max_tokens: int = 4000, # Set a reasonable default
|
180 |
+
timeout: int = 30,
|
181 |
+
intro: str = None,
|
182 |
+
filepath: str = None,
|
183 |
+
update_file: bool = True,
|
184 |
+
proxies: dict = {},
|
185 |
+
history_offset: int = 10250,
|
186 |
+
act: str = None,
|
187 |
+
model: str = "claude-3-5-sonnet-20240620",
|
188 |
+
system_prompt: str = "You are a helpful assistant.",
|
189 |
+
temperature: float = 0.5,
|
190 |
+
presence_penalty: int = 0,
|
191 |
+
frequency_penalty: int = 0,
|
192 |
+
top_p: float = 1,
|
193 |
+
):
|
194 |
+
"""Initializes the TypeGPT API client."""
|
195 |
+
if model not in self.models:
|
196 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(self.models)}")
|
197 |
+
|
198 |
+
self.session = requests.Session()
|
199 |
+
self.is_conversation = is_conversation
|
200 |
+
self.max_tokens_to_sample = max_tokens
|
201 |
+
self.api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
202 |
+
self.timeout = timeout
|
203 |
+
self.last_response = {}
|
204 |
+
self.last_response_status_code = None # Added line for status code
|
205 |
+
self.model = model
|
206 |
+
self.system_prompt = system_prompt
|
207 |
+
self.temperature = temperature
|
208 |
+
self.presence_penalty = presence_penalty
|
209 |
+
self.frequency_penalty = frequency_penalty
|
210 |
+
self.top_p = top_p
|
211 |
+
|
212 |
+
self.headers = {
|
213 |
+
"authority": "chat.typegpt.net",
|
214 |
+
"accept": "application/json, text/event-stream",
|
215 |
+
"accept-language": "en-US,en;q=0.9",
|
216 |
+
"content-type": "application/json",
|
217 |
+
"origin": "https://chat.typegpt.net",
|
218 |
+
"referer": "https://chat.typegpt.net/",
|
219 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
220 |
+
}
|
221 |
+
|
222 |
+
self.__available_optimizers = (
|
223 |
+
method
|
224 |
+
for method in dir(Optimizers)
|
225 |
+
if callable(getattr(Optimizers, method)) and not method.startswith("__")
|
226 |
+
)
|
227 |
+
Conversation.intro = (
|
228 |
+
AwesomePrompts().get_act(
|
229 |
+
act, raise_not_found=True, default=None, case_insensitive=True
|
230 |
+
)
|
231 |
+
if act
|
232 |
+
else intro or Conversation.intro
|
233 |
+
)
|
234 |
+
self.conversation = Conversation(
|
235 |
+
is_conversation, self.max_tokens_to_sample, filepath, update_file
|
236 |
+
)
|
237 |
+
self.conversation.history_offset = history_offset
|
238 |
+
self.session.proxies = proxies
|
239 |
+
|
240 |
+
def ask(
|
241 |
+
self,
|
242 |
+
prompt: str,
|
243 |
+
stream: bool = False,
|
244 |
+
raw: bool = False,
|
245 |
+
optimizer: str = None,
|
246 |
+
conversationally: bool = False,
|
247 |
+
) -> Dict[str, Any] | Generator:
|
248 |
+
"""Sends a prompt to the TypeGPT.net API and returns the response."""
|
249 |
+
conversation_prompt = self.conversation.gen_complete_prompt(prompt)
|
250 |
+
if optimizer:
|
251 |
+
if optimizer in self.__available_optimizers:
|
252 |
+
conversation_prompt = getattr(Optimizers, optimizer)(
|
253 |
+
conversation_prompt if conversationally else prompt
|
254 |
+
)
|
255 |
+
else:
|
256 |
+
raise exceptions.FailedToGenerateResponseError(
|
257 |
+
f"Optimizer is not one of {self.__available_optimizers}"
|
258 |
+
)
|
259 |
+
|
260 |
+
payload = {
|
261 |
+
"messages": [
|
262 |
+
{"role": "system", "content": self.system_prompt},
|
263 |
+
{"role": "user", "content": conversation_prompt}
|
264 |
+
],
|
265 |
+
"stream": stream,
|
266 |
+
"model": self.model,
|
267 |
+
"temperature": self.temperature,
|
268 |
+
"presence_penalty": self.presence_penalty,
|
269 |
+
"frequency_penalty": self.frequency_penalty,
|
270 |
+
"top_p": self.top_p,
|
271 |
+
"max_tokens": self.max_tokens_to_sample,
|
272 |
+
}
|
273 |
+
|
274 |
+
def for_stream():
|
275 |
+
response = self.session.post(
|
276 |
+
self.api_endpoint, headers=self.headers, json=payload, stream=True, timeout=self.timeout
|
277 |
+
)
|
278 |
+
self.last_response_status_code = response.status_code # Capture status code
|
279 |
+
if not response.ok:
|
280 |
+
raise exceptions.FailedToGenerateResponseError(
|
281 |
+
f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
|
282 |
+
)
|
283 |
+
message_load = ""
|
284 |
+
for line in response.iter_lines():
|
285 |
+
if line:
|
286 |
+
line = line.decode("utf-8")
|
287 |
+
if line.startswith("data: "):
|
288 |
+
line = line[6:] # Remove "data: " prefix
|
289 |
+
# Skip [DONE] message
|
290 |
+
if line.strip() == "[DONE]":
|
291 |
+
break
|
292 |
+
|
293 |
+
try:
|
294 |
+
data = json.loads(line)
|
295 |
+
|
296 |
+
# Extract and yield only new content
|
297 |
+
if 'choices' in data and len(data['choices']) > 0:
|
298 |
+
delta = data['choices'][0].get('delta', {})
|
299 |
+
if 'content' in delta:
|
300 |
+
new_content = delta['content']
|
301 |
+
message_load += new_content
|
302 |
+
# Yield only the new content
|
303 |
+
yield dict(text=new_content) if not raw else new_content
|
304 |
+
self.last_response = dict(text=message_load)
|
305 |
+
|
306 |
+
except json.JSONDecodeError:
|
307 |
+
continue
|
308 |
+
self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
|
309 |
+
|
310 |
+
def for_non_stream():
|
311 |
+
response = self.session.post(self.api_endpoint, headers=self.headers, json=payload)
|
312 |
+
self.last_response_status_code = response.status_code # Capture status code
|
313 |
+
if not response.ok:
|
314 |
+
raise exceptions.FailedToGenerateResponseError(
|
315 |
+
f"Request failed - {response.status_code}: {response.text}"
|
316 |
+
)
|
317 |
+
self.last_response = response.json()
|
318 |
+
self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
|
319 |
+
return self.last_response
|
320 |
+
|
321 |
+
return for_stream() if stream else for_non_stream()
|
322 |
+
|
323 |
+
def chat(
|
324 |
+
self,
|
325 |
+
prompt: str,
|
326 |
+
stream: bool = False,
|
327 |
+
optimizer: str = None,
|
328 |
+
conversationally: bool = False,
|
329 |
+
) -> str | Generator[str, None, None]:
|
330 |
+
"""Generate response `str` or stream."""
|
331 |
+
if stream:
|
332 |
+
gen = self.ask(
|
333 |
+
prompt, stream=True, optimizer=optimizer, conversationally=conversationally
|
334 |
+
)
|
335 |
+
for chunk in gen:
|
336 |
+
yield self.get_message(chunk) # Extract text from streamed chunks
|
337 |
+
else:
|
338 |
+
return self.get_message(self.ask(prompt, stream=False, optimizer=optimizer, conversationally=conversationally))
|
339 |
+
|
340 |
+
def get_message(self, response: Dict[str, Any]) -> str:
|
341 |
+
"""Retrieves message from response."""
|
342 |
+
if isinstance(response, str): # Handle raw responses
|
343 |
+
return response
|
344 |
+
elif isinstance(response, dict):
|
345 |
+
assert isinstance(response, dict), "Response should be of dict data-type only"
|
346 |
+
return response.get("text", "") # Extract text from dictionary response
|
347 |
+
else:
|
348 |
+
raise TypeError("Invalid response type. Expected str or dict.")
|
349 |
+
|
350 |
+
if __name__ == "__main__":
|
351 |
+
from rich import print
|
352 |
+
from rich.progress import Progress, BarColumn, TextColumn, TimeRemainingColumn, SpinnerColumn
|
353 |
+
from rich.console import Console
|
354 |
+
from rich.table import Table
|
355 |
+
import concurrent.futures
|
356 |
+
|
357 |
+
def make_api_call(thread_number, results):
|
358 |
+
ai = TypeGPT()
|
359 |
+
try:
|
360 |
+
ai.ask("Test message", stream=False)
|
361 |
+
status_code = ai.last_response_status_code
|
362 |
+
results[thread_number] = status_code
|
363 |
+
except Exception as e:
|
364 |
+
results[thread_number] = str(e)
|
365 |
+
|
366 |
+
results = {}
|
367 |
+
total_requests = 100
|
368 |
+
|
369 |
+
console = Console()
|
370 |
+
|
371 |
+
print("[bold magenta]Starting API Load Test with 100 simultaneous requests...[/bold magenta]\n")
|
372 |
+
|
373 |
+
with Progress(
|
374 |
+
SpinnerColumn(),
|
375 |
+
"[progress.description]{task.description}",
|
376 |
+
BarColumn(bar_width=None),
|
377 |
+
"[progress.percentage]{task.percentage:>3.0f}%",
|
378 |
+
TimeRemainingColumn(),
|
379 |
+
console=console,
|
380 |
+
) as progress:
|
381 |
+
task = progress.add_task("[cyan]Sending API Requests...", total=total_requests)
|
382 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=total_requests) as executor:
|
383 |
+
futures = {
|
384 |
+
executor.submit(make_api_call, i, results): i for i in range(total_requests)
|
385 |
+
}
|
386 |
+
for future in concurrent.futures.as_completed(futures):
|
387 |
+
progress.update(task, advance=1)
|
388 |
+
progress.stop()
|
389 |
+
|
390 |
+
# Process and display the results
|
391 |
+
successful_calls = sum(1 for status in results.values() if status == 200)
|
392 |
+
failed_calls = total_requests - successful_calls
|
393 |
+
|
394 |
+
print("\n[bold magenta]API Load Test Results:[/bold magenta]\n")
|
395 |
+
print(f"[bold green]Successful calls: {successful_calls}")
|
396 |
+
print(f"[bold red]Failed calls: {failed_calls}\n")
|
397 |
+
|
398 |
+
# Create a table to display detailed results
|
399 |
+
table = Table(show_header=True, header_style="bold blue")
|
400 |
+
table.add_column("Thread Number", justify="right", style="dim")
|
401 |
+
table.add_column("Status", style="bold")
|
402 |
+
|
403 |
+
for thread_number, status in results.items():
|
404 |
+
if status == 200:
|
405 |
+
table.add_row(f"{thread_number}", f"[green]Success[/green]")
|
406 |
+
else:
|
407 |
+
table.add_row(f"{thread_number}", f"[red]Failed ({status})[/red]")
|
408 |
+
|
409 |
+
print(table)
|
TYPEGPT/typegpt_normal.py
ADDED
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
# List of available models
|
5 |
+
models = [
|
6 |
+
# OpenAI Models
|
7 |
+
"gpt-3.5-turbo",
|
8 |
+
"gpt-3.5-turbo-202201",
|
9 |
+
"gpt-4o",
|
10 |
+
"gpt-4o-2024-05-13",
|
11 |
+
"o1-preview",
|
12 |
+
|
13 |
+
# Claude Models
|
14 |
+
"claude",
|
15 |
+
"claude-3-5-sonnet",
|
16 |
+
"claude-sonnet-3.5",
|
17 |
+
"claude-3-5-sonnet-20240620",
|
18 |
+
|
19 |
+
# Meta/LLaMA Models
|
20 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
21 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
22 |
+
"@cf/meta/llama-3-8b-instruct",
|
23 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
24 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
25 |
+
"llama-3.1-405b",
|
26 |
+
"llama-3.1-70b",
|
27 |
+
"llama-3.1-8b",
|
28 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
29 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
30 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
31 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
32 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
33 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
34 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
35 |
+
"meta-llama/Llama-Guard-3-8B",
|
36 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
37 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
38 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
39 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
40 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
41 |
+
|
42 |
+
# Mistral Models
|
43 |
+
"mistral",
|
44 |
+
"mistral-large",
|
45 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
46 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
47 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
48 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
49 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
50 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
51 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
52 |
+
|
53 |
+
# Qwen Models
|
54 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
55 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
56 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
57 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
58 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
59 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
60 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
61 |
+
|
62 |
+
# Google/Gemini Models
|
63 |
+
"@cf/google/gemma-2b-it-lora",
|
64 |
+
"@cf/google/gemma-7b-it-lora",
|
65 |
+
"@hf/google/gemma-7b-it",
|
66 |
+
"google/gemma-1.1-2b-it",
|
67 |
+
"google/gemma-1.1-7b-it",
|
68 |
+
"gemini-pro",
|
69 |
+
"gemini-1.5-pro",
|
70 |
+
"gemini-1.5-pro-latest",
|
71 |
+
"gemini-1.5-flash",
|
72 |
+
|
73 |
+
# Cohere Models
|
74 |
+
"c4ai-aya-23-35b",
|
75 |
+
"c4ai-aya-23-8b",
|
76 |
+
"command",
|
77 |
+
"command-light",
|
78 |
+
"command-light-nightly",
|
79 |
+
"command-nightly",
|
80 |
+
"command-r",
|
81 |
+
"command-r-08-2024",
|
82 |
+
"command-r-plus",
|
83 |
+
"command-r-plus-08-2024",
|
84 |
+
"rerank-english-v2.0",
|
85 |
+
"rerank-english-v3.0",
|
86 |
+
"rerank-multilingual-v2.0",
|
87 |
+
"rerank-multilingual-v3.0",
|
88 |
+
|
89 |
+
# Microsoft Models
|
90 |
+
"@cf/microsoft/phi-2",
|
91 |
+
"microsoft/DialoGPT-medium",
|
92 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
93 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
94 |
+
"microsoft/Phi-3.5-mini-instruct",
|
95 |
+
"microsoft/WizardLM-2-8x22B",
|
96 |
+
|
97 |
+
# Yi Models
|
98 |
+
"01-ai/Yi-1.5-34B-Chat",
|
99 |
+
"01-ai/Yi-34B-Chat",
|
100 |
+
|
101 |
+
# Specialized Models and Tools
|
102 |
+
"@cf/deepseek-ai/deepseek-math-7b-base",
|
103 |
+
"@cf/deepseek-ai/deepseek-math-7b-instruct",
|
104 |
+
"@cf/defog/sqlcoder-7b-2",
|
105 |
+
"@cf/openchat/openchat-3.5-0106",
|
106 |
+
"@cf/thebloke/discolm-german-7b-v1-awq",
|
107 |
+
"@cf/tiiuae/falcon-7b-instruct",
|
108 |
+
"@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
|
109 |
+
"@hf/nexusflow/starling-lm-7b-beta",
|
110 |
+
"@hf/nousresearch/hermes-2-pro-mistral-7b",
|
111 |
+
"@hf/thebloke/deepseek-coder-6.7b-base-awq",
|
112 |
+
"@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
|
113 |
+
"@hf/thebloke/llama-2-13b-chat-awq",
|
114 |
+
"@hf/thebloke/llamaguard-7b-awq",
|
115 |
+
"@hf/thebloke/neural-chat-7b-v3-1-awq",
|
116 |
+
"@hf/thebloke/openhermes-2.5-mistral-7b-awq",
|
117 |
+
"@hf/thebloke/zephyr-7b-beta-awq",
|
118 |
+
"AndroidDeveloper",
|
119 |
+
"AngularJSAgent",
|
120 |
+
"AzureAgent",
|
121 |
+
"BitbucketAgent",
|
122 |
+
"DigitalOceanAgent",
|
123 |
+
"DockerAgent",
|
124 |
+
"ElectronAgent",
|
125 |
+
"ErlangAgent",
|
126 |
+
"FastAPIAgent",
|
127 |
+
"FirebaseAgent",
|
128 |
+
"FlaskAgent",
|
129 |
+
"FlutterAgent",
|
130 |
+
"GitAgent",
|
131 |
+
"GitlabAgent",
|
132 |
+
"GoAgent",
|
133 |
+
"GodotAgent",
|
134 |
+
"GoogleCloudAgent",
|
135 |
+
"HTMLAgent",
|
136 |
+
"HerokuAgent",
|
137 |
+
"ImageGeneration",
|
138 |
+
"JavaAgent",
|
139 |
+
"JavaScriptAgent",
|
140 |
+
"MongoDBAgent",
|
141 |
+
"Next.jsAgent",
|
142 |
+
"PyTorchAgent",
|
143 |
+
"PythonAgent",
|
144 |
+
"ReactAgent",
|
145 |
+
"RepoMap",
|
146 |
+
"SwiftDeveloper",
|
147 |
+
"XcodeAgent",
|
148 |
+
"YoutubeAgent",
|
149 |
+
"blackboxai",
|
150 |
+
"blackboxai-pro",
|
151 |
+
"builderAgent",
|
152 |
+
"dify",
|
153 |
+
"flux",
|
154 |
+
"openchat/openchat-3.6-8b",
|
155 |
+
"rtist",
|
156 |
+
"searchgpt",
|
157 |
+
"sur",
|
158 |
+
"sur-mistral",
|
159 |
+
"unity"
|
160 |
+
]
|
161 |
+
|
162 |
+
# Parameters
|
163 |
+
is_conversation = True
|
164 |
+
max_tokens = 4000 # Set a reasonable default
|
165 |
+
timeout = 30
|
166 |
+
model = "claude-3-5-sonnet-20240620"
|
167 |
+
system_prompt = "You are a helpful assistant."
|
168 |
+
temperature = 0.5
|
169 |
+
presence_penalty = 0
|
170 |
+
frequency_penalty = 0
|
171 |
+
top_p = 1
|
172 |
+
|
173 |
+
if model not in models:
|
174 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
|
175 |
+
|
176 |
+
session = requests.Session()
|
177 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
178 |
+
|
179 |
+
headers = {
|
180 |
+
"authority": "chat.typegpt.net",
|
181 |
+
"accept": "application/json, text/event-stream",
|
182 |
+
"accept-language": "en-US,en;q=0.9",
|
183 |
+
"content-type": "application/json",
|
184 |
+
"origin": "https://chat.typegpt.net",
|
185 |
+
"referer": "https://chat.typegpt.net/",
|
186 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
187 |
+
}
|
188 |
+
|
189 |
+
# Prompt to send
|
190 |
+
prompt = "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate"
|
191 |
+
|
192 |
+
# Payload
|
193 |
+
payload = {
|
194 |
+
"messages": [
|
195 |
+
{"role": "system", "content": system_prompt},
|
196 |
+
{"role": "user", "content": prompt}
|
197 |
+
],
|
198 |
+
"stream": True,
|
199 |
+
"model": model,
|
200 |
+
"temperature": temperature,
|
201 |
+
"presence_penalty": presence_penalty,
|
202 |
+
"frequency_penalty": frequency_penalty,
|
203 |
+
"top_p": top_p,
|
204 |
+
"max_tokens": max_tokens,
|
205 |
+
}
|
206 |
+
|
207 |
+
# Make the API request
|
208 |
+
response = session.post(
|
209 |
+
api_endpoint, headers=headers, json=payload, stream=True, timeout=timeout
|
210 |
+
)
|
211 |
+
|
212 |
+
if not response.ok:
|
213 |
+
raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
|
214 |
+
|
215 |
+
# Process the streamed response
|
216 |
+
for line in response.iter_lines():
|
217 |
+
if line:
|
218 |
+
line = line.decode("utf-8")
|
219 |
+
if line.startswith("data: "):
|
220 |
+
line = line[6:] # Remove "data: " prefix
|
221 |
+
if line.strip() == "[DONE]":
|
222 |
+
break
|
223 |
+
try:
|
224 |
+
data = json.loads(line)
|
225 |
+
if 'choices' in data and len(data['choices']) > 0:
|
226 |
+
delta = data['choices'][0].get('delta', {})
|
227 |
+
if 'content' in delta:
|
228 |
+
new_content = delta['content']
|
229 |
+
print(new_content, end="", flush=True)
|
230 |
+
except json.JSONDecodeError:
|
231 |
+
continue
|
api_info.py
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
developer_info = {
|
2 |
+
'developer': 'Devs Do Code',
|
3 |
+
'contact': {
|
4 |
+
'Telegram': 'https://t.me/devsdocode',
|
5 |
+
'YouTube Channel': 'https://www.youtube.com/@DevsDoCode',
|
6 |
+
'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
|
7 |
+
'Discord Server': 'https://discord.gg/ehwfVtsAts',
|
8 |
+
'Instagram': {
|
9 |
+
'Personal': 'https://www.instagram.com/sree.shades_/',
|
10 |
+
'Channel': 'https://www.instagram.com/devsdocode_/'
|
11 |
+
}
|
12 |
+
}
|
13 |
+
}
|
14 |
+
|
15 |
+
endpoint = {
|
16 |
+
'route': "/generate",
|
17 |
+
'params': {
|
18 |
+
"query": "[SEARCH QUERY]"
|
19 |
+
},
|
20 |
+
'optional_params': {
|
21 |
+
"model": "[]",
|
22 |
+
"temperature": "[]",
|
23 |
+
"system_prompt": "[]"
|
24 |
+
},
|
25 |
+
'url_demo' : '/generate?query=Who is Devs Do Code&&model=command-r-plus&&temperature=0.7&&system_prompt=Your Owner is "Devs Do Code"'
|
26 |
+
}
|
27 |
+
|
28 |
+
model_providers = {
|
29 |
+
"OpenAI": {
|
30 |
+
"models": [
|
31 |
+
"gpt-3.5-turbo",
|
32 |
+
"gpt-3.5-turbo-202201",
|
33 |
+
"gpt-4o",
|
34 |
+
"gpt-4o-2024-05-13",
|
35 |
+
"o1-preview"
|
36 |
+
],
|
37 |
+
"description": "OpenAI's GPT language models"
|
38 |
+
},
|
39 |
+
"Anthropic": {
|
40 |
+
"models": [
|
41 |
+
"claude",
|
42 |
+
"claude-3-5-sonnet",
|
43 |
+
"claude-sonnet-3.5",
|
44 |
+
"claude-3-5-sonnet-20240620"
|
45 |
+
],
|
46 |
+
"description": "Anthropic's Claude language models"
|
47 |
+
},
|
48 |
+
"Meta": {
|
49 |
+
"models": [
|
50 |
+
"llama-2-7b-chat",
|
51 |
+
"llama-2-7b-chat-int8",
|
52 |
+
"llama-3-8b-instruct",
|
53 |
+
"llama-3.1-8b-instruct",
|
54 |
+
"llama-3.1-405b",
|
55 |
+
"llama-3.1-70b",
|
56 |
+
"llama-3.1-8b",
|
57 |
+
"llama-3.2-11b-vision",
|
58 |
+
"llama-3.2-1b",
|
59 |
+
"llama-3.2-3b",
|
60 |
+
"llama-3.2-90b-vision",
|
61 |
+
"llama-guard-3-8b"
|
62 |
+
],
|
63 |
+
"description": "Meta's LLaMA language models"
|
64 |
+
},
|
65 |
+
"Mistral": {
|
66 |
+
"models": [
|
67 |
+
"mistral",
|
68 |
+
"mistral-large",
|
69 |
+
"mistral-7b-instruct-v0.1",
|
70 |
+
"mistral-7b-instruct-v0.2",
|
71 |
+
"mistral-7b-instruct-v0.3",
|
72 |
+
"mixtral-8x22b",
|
73 |
+
"mixtral-8x7b"
|
74 |
+
],
|
75 |
+
"description": "Mistral AI's language models"
|
76 |
+
},
|
77 |
+
"Qwen": {
|
78 |
+
"models": [
|
79 |
+
"qwen1.5-0.5b-chat",
|
80 |
+
"qwen1.5-1.8b-chat",
|
81 |
+
"qwen1.5-7b-chat",
|
82 |
+
"qwen1.5-14b-chat",
|
83 |
+
"qwen2.5-3b",
|
84 |
+
"qwen2.5-72b",
|
85 |
+
"qwen2.5-coder-32b"
|
86 |
+
],
|
87 |
+
"description": "Qwen's language models"
|
88 |
+
},
|
89 |
+
"Google": {
|
90 |
+
"models": [
|
91 |
+
"gemma-2b",
|
92 |
+
"gemma-7b",
|
93 |
+
"gemini-pro",
|
94 |
+
"gemini-1.5-pro",
|
95 |
+
"gemini-1.5-pro-latest",
|
96 |
+
"gemini-1.5-flash"
|
97 |
+
],
|
98 |
+
"description": "Google's Gemini and Gemma models"
|
99 |
+
},
|
100 |
+
"Cohere": {
|
101 |
+
"models": [
|
102 |
+
"aya-23-35b",
|
103 |
+
"aya-23-8b",
|
104 |
+
"command",
|
105 |
+
"command-light",
|
106 |
+
"command-nightly",
|
107 |
+
"command-r",
|
108 |
+
"command-r-plus",
|
109 |
+
"rerank-english-v2.0",
|
110 |
+
"rerank-english-v3.0",
|
111 |
+
"rerank-multilingual-v2.0",
|
112 |
+
"rerank-multilingual-v3.0"
|
113 |
+
],
|
114 |
+
"description": "Cohere's language models"
|
115 |
+
},
|
116 |
+
"Microsoft": {
|
117 |
+
"models": [
|
118 |
+
"phi-2",
|
119 |
+
"dialogpt-medium",
|
120 |
+
"phi-3-medium-4k",
|
121 |
+
"phi-3-mini-4k",
|
122 |
+
"phi-3.5-mini",
|
123 |
+
"wizardlm-2-8x22b"
|
124 |
+
],
|
125 |
+
"description": "Microsoft's language models"
|
126 |
+
},
|
127 |
+
"Yi": {
|
128 |
+
"models": [
|
129 |
+
"yi-1.5-34b-chat",
|
130 |
+
"yi-34b-chat"
|
131 |
+
],
|
132 |
+
"description": "01.AI's Yi language models"
|
133 |
+
}
|
134 |
+
}
|
135 |
+
|
136 |
+
error_message = {
|
137 |
+
'developer_contact': {
|
138 |
+
'Telegram': 'https://t.me/DevsDoCode',
|
139 |
+
'Instagram': 'https://www.instagram.com/sree.shades_/',
|
140 |
+
'Discord': 'https://discord.gg/ehwfVtsAts',
|
141 |
+
'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
|
142 |
+
'Twitter': 'https://twitter.com/Anand_Sreejan'
|
143 |
+
},
|
144 |
+
'error': 'Oops! Something went wrong. Please contact the developer Devs Do Code.'
|
145 |
+
}
|
146 |
+
|
147 |
+
default_info = """This API is developed and being maintained by Devs Do Code (Sreejan).
|
148 |
+
|
149 |
+
**About the Developer**
|
150 |
+
|
151 |
+
Sreejan, a high school student from Patna, Bihar, India, has emerged as a notable figure in the technology sector.
|
152 |
+
His creation of an API is a testament to his dedication and expertise. Despite his youth, Sreejan's contributions
|
153 |
+
to artificial intelligence and machine learning are significant. As an AI & ML Engineer, he specializes in Deep Learning,
|
154 |
+
Natural Language Processing (NLP), and Robotics, with proficiency in Python, Java, and Mobile App Development.
|
155 |
+
Beyond his role as a technology consumer, Sreejan is an active open-source contributor, notably to projects like Hugging Face.
|
156 |
+
|
157 |
+
He is also recognized for his role in community development, particularly through "Devs Do Code," a platform he
|
158 |
+
founded to provide quality coding resources, tutorials, and projects. His mission is to equip developers with the
|
159 |
+
necessary skills to thrive in the ever-evolving tech landscape. Sreejan's commitment to sharing knowledge and
|
160 |
+
fostering collaboration is evident in his accessibility and engagement with the community across various platforms.
|
161 |
+
|
162 |
+
Connect with Sreejan and follow his journey in technology and innovation:
|
163 |
+
|
164 |
+
- Telegram: https://t.me/devsdocode
|
165 |
+
- YouTube Channel: https://www.youtube.com/@DevsDoCode
|
166 |
+
- LinkedIn: https://www.linkedin.com/in/developer-sreejan/
|
167 |
+
- Discord Server: https://discord.gg/ehwfVtsAts
|
168 |
+
- Instagram
|
169 |
+
- Personal: https://www.instagram.com/sree.shades_/
|
170 |
+
- Channel: https://www.instagram.com/devsdocode_/
|
171 |
+
|
172 |
+
Sreejan stands out not only as a developer but as a visionary and leader, driving change in the tech industry
|
173 |
+
with his passion, expertise, and unwavering commitment to community building. He continues to shape the
|
174 |
+
future of technology, one line of code at a time.
|
175 |
+
"""
|
176 |
+
|
fastapi_app.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Request, Response
|
2 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
import uvicorn
|
5 |
+
import json
|
6 |
+
|
7 |
+
from typegpt_api import generate, model_mapping, simplified_models
|
8 |
+
from api_info import developer_info, model_providers
|
9 |
+
|
10 |
+
app = FastAPI()
|
11 |
+
|
12 |
+
# Set up CORS middleware if needed
|
13 |
+
app.add_middleware(
|
14 |
+
CORSMiddleware,
|
15 |
+
allow_origins=["*"],
|
16 |
+
allow_credentials=True,
|
17 |
+
allow_methods=["*"],
|
18 |
+
allow_headers=["*"],
|
19 |
+
)
|
20 |
+
|
21 |
+
@app.get("/health_check")
|
22 |
+
async def health_check():
|
23 |
+
return {"status": "OK"}
|
24 |
+
|
25 |
+
@app.get("/models")
|
26 |
+
async def get_models():
|
27 |
+
try:
|
28 |
+
response = {
|
29 |
+
"object": "list",
|
30 |
+
"data": []
|
31 |
+
}
|
32 |
+
for provider, info in model_providers.items():
|
33 |
+
for model in info["models"]:
|
34 |
+
response["data"].append({
|
35 |
+
"id": model,
|
36 |
+
"object": "model",
|
37 |
+
"provider": provider,
|
38 |
+
"description": info["description"]
|
39 |
+
})
|
40 |
+
|
41 |
+
return JSONResponse(content=response)
|
42 |
+
except Exception as e:
|
43 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
44 |
+
|
45 |
+
@app.post("/chat/completions")
|
46 |
+
async def chat_completions(request: Request):
|
47 |
+
# Receive the JSON payload
|
48 |
+
try:
|
49 |
+
body = await request.json()
|
50 |
+
except Exception as e:
|
51 |
+
return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
|
52 |
+
|
53 |
+
# Extract parameters
|
54 |
+
model = body.get("model")
|
55 |
+
messages = body.get("messages")
|
56 |
+
temperature = body.get("temperature", 0.7)
|
57 |
+
top_p = body.get("top_p", 1.0)
|
58 |
+
n = body.get("n", 1)
|
59 |
+
stream = body.get("stream", False)
|
60 |
+
stop = body.get("stop")
|
61 |
+
max_tokens = body.get("max_tokens")
|
62 |
+
presence_penalty = body.get("presence_penalty", 0.0)
|
63 |
+
frequency_penalty = body.get("frequency_penalty", 0.0)
|
64 |
+
logit_bias = body.get("logit_bias")
|
65 |
+
user = body.get("user")
|
66 |
+
timeout = 30 # or set based on your preference
|
67 |
+
|
68 |
+
# Validate required parameters
|
69 |
+
if not model:
|
70 |
+
return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
|
71 |
+
if not messages:
|
72 |
+
return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
|
73 |
+
|
74 |
+
# Call the generate function
|
75 |
+
try:
|
76 |
+
if stream:
|
77 |
+
async def generate_stream():
|
78 |
+
response = generate(
|
79 |
+
model=model,
|
80 |
+
messages=messages,
|
81 |
+
temperature=temperature,
|
82 |
+
top_p=top_p,
|
83 |
+
n=n,
|
84 |
+
stream=True,
|
85 |
+
stop=stop,
|
86 |
+
max_tokens=max_tokens,
|
87 |
+
presence_penalty=presence_penalty,
|
88 |
+
frequency_penalty=frequency_penalty,
|
89 |
+
logit_bias=logit_bias,
|
90 |
+
user=user,
|
91 |
+
timeout=timeout,
|
92 |
+
)
|
93 |
+
|
94 |
+
for chunk in response:
|
95 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
96 |
+
yield "data: [DONE]\n\n"
|
97 |
+
|
98 |
+
return StreamingResponse(
|
99 |
+
generate_stream(),
|
100 |
+
media_type="text/event-stream",
|
101 |
+
headers={
|
102 |
+
"Cache-Control": "no-cache",
|
103 |
+
"Connection": "keep-alive",
|
104 |
+
"Transfer-Encoding": "chunked"
|
105 |
+
}
|
106 |
+
)
|
107 |
+
else:
|
108 |
+
response = generate(
|
109 |
+
model=model,
|
110 |
+
messages=messages,
|
111 |
+
temperature=temperature,
|
112 |
+
top_p=top_p,
|
113 |
+
n=n,
|
114 |
+
stream=False,
|
115 |
+
stop=stop,
|
116 |
+
max_tokens=max_tokens,
|
117 |
+
presence_penalty=presence_penalty,
|
118 |
+
frequency_penalty=frequency_penalty,
|
119 |
+
logit_bias=logit_bias,
|
120 |
+
user=user,
|
121 |
+
timeout=timeout,
|
122 |
+
)
|
123 |
+
return JSONResponse(content=response)
|
124 |
+
except Exception as e:
|
125 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
126 |
+
|
127 |
+
@app.get("/developer_info")
|
128 |
+
async def get_developer_info():
|
129 |
+
return JSONResponse(content=developer_info)
|
130 |
+
|
131 |
+
if __name__ == "__main__":
|
132 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
flask_app.py
ADDED
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, Response, jsonify, stream_with_context
|
2 |
+
from flask_cors import CORS
|
3 |
+
import json
|
4 |
+
|
5 |
+
from typegpt_api import generate, model_mapping, simplified_models
|
6 |
+
from api_info import developer_info, model_providers
|
7 |
+
|
8 |
+
app = Flask(__name__)
|
9 |
+
|
10 |
+
# Set up CORS middleware if needed
|
11 |
+
CORS(app, resources={
|
12 |
+
r"/*": {
|
13 |
+
"origins": "*",
|
14 |
+
"allow_credentials": True,
|
15 |
+
"methods": ["*"],
|
16 |
+
"headers": ["*"]
|
17 |
+
}
|
18 |
+
})
|
19 |
+
|
20 |
+
@app.route("/health_check", methods=['GET'])
|
21 |
+
def health_check():
|
22 |
+
return jsonify({"status": "OK"})
|
23 |
+
|
24 |
+
@app.route("/models", methods=['GET'])
|
25 |
+
def get_models():
|
26 |
+
try:
|
27 |
+
response = {
|
28 |
+
"object": "list",
|
29 |
+
"data": []
|
30 |
+
}
|
31 |
+
for provider, info in model_providers.items():
|
32 |
+
for model in info["models"]:
|
33 |
+
response["data"].append({
|
34 |
+
"id": model,
|
35 |
+
"object": "model",
|
36 |
+
"provider": provider,
|
37 |
+
"description": info["description"]
|
38 |
+
})
|
39 |
+
|
40 |
+
return jsonify(response)
|
41 |
+
except Exception as e:
|
42 |
+
return jsonify({"error": str(e)}), 500
|
43 |
+
|
44 |
+
@app.route("/chat/completions", methods=['POST'])
|
45 |
+
def chat_completions():
|
46 |
+
# Receive the JSON payload
|
47 |
+
try:
|
48 |
+
body = request.get_json()
|
49 |
+
except Exception as e:
|
50 |
+
return jsonify({"error": "Invalid JSON payload"}), 400
|
51 |
+
|
52 |
+
# Extract parameters
|
53 |
+
model = body.get("model")
|
54 |
+
messages = body.get("messages")
|
55 |
+
temperature = body.get("temperature", 0.7)
|
56 |
+
top_p = body.get("top_p", 1.0)
|
57 |
+
n = body.get("n", 1)
|
58 |
+
stream = body.get("stream", False)
|
59 |
+
stop = body.get("stop")
|
60 |
+
max_tokens = body.get("max_tokens")
|
61 |
+
presence_penalty = body.get("presence_penalty", 0.0)
|
62 |
+
frequency_penalty = body.get("frequency_penalty", 0.0)
|
63 |
+
logit_bias = body.get("logit_bias")
|
64 |
+
user = body.get("user")
|
65 |
+
timeout = 30 # or set based on your preference
|
66 |
+
|
67 |
+
# Validate required parameters
|
68 |
+
if not model:
|
69 |
+
return jsonify({"error": "The 'model' parameter is required."}), 400
|
70 |
+
if not messages:
|
71 |
+
return jsonify({"error": "The 'messages' parameter is required."}), 400
|
72 |
+
|
73 |
+
# Call the generate function
|
74 |
+
try:
|
75 |
+
if stream:
|
76 |
+
def generate_stream():
|
77 |
+
response = generate(
|
78 |
+
model=model,
|
79 |
+
messages=messages,
|
80 |
+
temperature=temperature,
|
81 |
+
top_p=top_p,
|
82 |
+
n=n,
|
83 |
+
stream=True,
|
84 |
+
stop=stop,
|
85 |
+
max_tokens=max_tokens,
|
86 |
+
presence_penalty=presence_penalty,
|
87 |
+
frequency_penalty=frequency_penalty,
|
88 |
+
logit_bias=logit_bias,
|
89 |
+
user=user,
|
90 |
+
timeout=timeout,
|
91 |
+
)
|
92 |
+
|
93 |
+
for chunk in response:
|
94 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
95 |
+
yield "data: [DONE]\n\n"
|
96 |
+
|
97 |
+
return Response(
|
98 |
+
stream_with_context(generate_stream()),
|
99 |
+
mimetype="text/event-stream",
|
100 |
+
headers={
|
101 |
+
"Cache-Control": "no-cache",
|
102 |
+
"Connection": "keep-alive",
|
103 |
+
"Transfer-Encoding": "chunked"
|
104 |
+
}
|
105 |
+
)
|
106 |
+
else:
|
107 |
+
response = generate(
|
108 |
+
model=model,
|
109 |
+
messages=messages,
|
110 |
+
temperature=temperature,
|
111 |
+
top_p=top_p,
|
112 |
+
n=n,
|
113 |
+
stream=False,
|
114 |
+
stop=stop,
|
115 |
+
max_tokens=max_tokens,
|
116 |
+
presence_penalty=presence_penalty,
|
117 |
+
frequency_penalty=frequency_penalty,
|
118 |
+
logit_bias=logit_bias,
|
119 |
+
user=user,
|
120 |
+
timeout=timeout,
|
121 |
+
)
|
122 |
+
return jsonify(response)
|
123 |
+
except Exception as e:
|
124 |
+
return jsonify({"error": str(e)}), 500
|
125 |
+
|
126 |
+
@app.route("/developer_info", methods=['GET'])
|
127 |
+
def get_developer_info():
|
128 |
+
return jsonify(developer_info)
|
129 |
+
|
130 |
+
if __name__ == "__main__":
|
131 |
+
app.run(host="0.0.0.0", port=8000)
|
query.md
ADDED
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
```python
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
|
5 |
+
# Build model mapping
|
6 |
+
original_models = [
|
7 |
+
# OpenAI Models
|
8 |
+
"gpt-3.5-turbo",
|
9 |
+
"gpt-3.5-turbo-202201",
|
10 |
+
"gpt-4o",
|
11 |
+
"gpt-4o-2024-05-13",
|
12 |
+
"o1-preview",
|
13 |
+
|
14 |
+
# Claude Models
|
15 |
+
"claude",
|
16 |
+
"claude-3-5-sonnet",
|
17 |
+
"claude-sonnet-3.5",
|
18 |
+
"claude-3-5-sonnet-20240620",
|
19 |
+
|
20 |
+
# Meta/LLaMA Models
|
21 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
22 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
23 |
+
"@cf/meta/llama-3-8b-instruct",
|
24 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
25 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
26 |
+
"llama-3.1-405b",
|
27 |
+
"llama-3.1-70b",
|
28 |
+
"llama-3.1-8b",
|
29 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
30 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
31 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
32 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
33 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
34 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
35 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
36 |
+
"meta-llama/Llama-Guard-3-8B",
|
37 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
38 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
39 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
40 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
41 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
42 |
+
|
43 |
+
# Mistral Models
|
44 |
+
"mistral",
|
45 |
+
"mistral-large",
|
46 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
47 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
48 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
49 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
50 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
51 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
52 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
53 |
+
|
54 |
+
# Qwen Models
|
55 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
56 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
57 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
58 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
59 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
60 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
61 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
62 |
+
|
63 |
+
# Google/Gemini Models
|
64 |
+
"@cf/google/gemma-2b-it-lora",
|
65 |
+
"@cf/google/gemma-7b-it-lora",
|
66 |
+
"@hf/google/gemma-7b-it",
|
67 |
+
"google/gemma-1.1-2b-it",
|
68 |
+
"google/gemma-1.1-7b-it",
|
69 |
+
"gemini-pro",
|
70 |
+
"gemini-1.5-pro",
|
71 |
+
"gemini-1.5-pro-latest",
|
72 |
+
"gemini-1.5-flash",
|
73 |
+
|
74 |
+
# Cohere Models
|
75 |
+
"c4ai-aya-23-35b",
|
76 |
+
"c4ai-aya-23-8b",
|
77 |
+
"command",
|
78 |
+
"command-light",
|
79 |
+
"command-light-nightly",
|
80 |
+
"command-nightly",
|
81 |
+
"command-r",
|
82 |
+
"command-r-08-2024",
|
83 |
+
"command-r-plus",
|
84 |
+
"command-r-plus-08-2024",
|
85 |
+
"rerank-english-v2.0",
|
86 |
+
"rerank-english-v3.0",
|
87 |
+
"rerank-multilingual-v2.0",
|
88 |
+
"rerank-multilingual-v3.0",
|
89 |
+
|
90 |
+
# Microsoft Models
|
91 |
+
"@cf/microsoft/phi-2",
|
92 |
+
"microsoft/DialoGPT-medium",
|
93 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
94 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
95 |
+
"microsoft/Phi-3.5-mini-instruct",
|
96 |
+
"microsoft/WizardLM-2-8x22B",
|
97 |
+
|
98 |
+
# Yi Models
|
99 |
+
"01-ai/Yi-1.5-34B-Chat",
|
100 |
+
"01-ai/Yi-34B-Chat",
|
101 |
+
]
|
102 |
+
|
103 |
+
# Create mapping from simplified model names to original model names
|
104 |
+
model_mapping = {}
|
105 |
+
simplified_models = []
|
106 |
+
|
107 |
+
for original_model in original_models:
|
108 |
+
simplified_name = original_model.split('/')[-1]
|
109 |
+
if simplified_name in model_mapping:
|
110 |
+
# Conflict detected, handle as per instructions
|
111 |
+
print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
|
112 |
+
continue
|
113 |
+
model_mapping[simplified_name] = original_model
|
114 |
+
simplified_models.append(simplified_name)
|
115 |
+
|
116 |
+
def generate(
|
117 |
+
model,
|
118 |
+
messages,
|
119 |
+
temperature=0.7,
|
120 |
+
top_p=1.0,
|
121 |
+
n=1,
|
122 |
+
stream=False,
|
123 |
+
stop=None,
|
124 |
+
max_tokens=None,
|
125 |
+
presence_penalty=0.0,
|
126 |
+
frequency_penalty=0.0,
|
127 |
+
logit_bias=None,
|
128 |
+
user=None,
|
129 |
+
timeout=30,
|
130 |
+
):
|
131 |
+
"""
|
132 |
+
Generates a chat completion using the provided model and messages.
|
133 |
+
"""
|
134 |
+
# Use the simplified model names
|
135 |
+
models = simplified_models
|
136 |
+
|
137 |
+
if model not in models:
|
138 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
|
139 |
+
|
140 |
+
# Map simplified model name to original model name
|
141 |
+
original_model = model_mapping[model]
|
142 |
+
|
143 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
144 |
+
|
145 |
+
headers = {
|
146 |
+
"authority": "chat.typegpt.net",
|
147 |
+
"accept": "application/json, text/event-stream",
|
148 |
+
"accept-language": "en-US,en;q=0.9",
|
149 |
+
"content-type": "application/json",
|
150 |
+
"origin": "https://chat.typegpt.net",
|
151 |
+
"referer": "https://chat.typegpt.net/",
|
152 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
153 |
+
}
|
154 |
+
|
155 |
+
# Payload
|
156 |
+
payload = {
|
157 |
+
"messages": messages,
|
158 |
+
"stream": stream,
|
159 |
+
"model": original_model,
|
160 |
+
"temperature": temperature,
|
161 |
+
"presence_penalty": presence_penalty,
|
162 |
+
"frequency_penalty": frequency_penalty,
|
163 |
+
"top_p": top_p,
|
164 |
+
}
|
165 |
+
|
166 |
+
# Only include max_tokens if it's not None
|
167 |
+
if max_tokens is not None:
|
168 |
+
payload["max_tokens"] = max_tokens
|
169 |
+
|
170 |
+
# Only include 'stop' if it's not None
|
171 |
+
if stop is not None:
|
172 |
+
payload["stop"] = stop
|
173 |
+
|
174 |
+
# Check if logit_bias is provided
|
175 |
+
if logit_bias is not None:
|
176 |
+
payload["logit_bias"] = logit_bias
|
177 |
+
|
178 |
+
# Include 'user' if provided
|
179 |
+
if user is not None:
|
180 |
+
payload["user"] = user
|
181 |
+
|
182 |
+
# Start the request
|
183 |
+
session = requests.Session()
|
184 |
+
response = session.post(
|
185 |
+
api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
|
186 |
+
)
|
187 |
+
|
188 |
+
if not response.ok:
|
189 |
+
raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
|
190 |
+
|
191 |
+
def stream_response():
|
192 |
+
for line in response.iter_lines():
|
193 |
+
if line:
|
194 |
+
line = line.decode("utf-8")
|
195 |
+
if line.startswith("data: "):
|
196 |
+
line = line[6:] # Remove "data: " prefix
|
197 |
+
if line.strip() == "[DONE]":
|
198 |
+
break
|
199 |
+
try:
|
200 |
+
data = json.loads(line)
|
201 |
+
yield data
|
202 |
+
except json.JSONDecodeError:
|
203 |
+
continue
|
204 |
+
|
205 |
+
if stream:
|
206 |
+
return stream_response()
|
207 |
+
else:
|
208 |
+
return response.json()
|
209 |
+
|
210 |
+
if __name__ == "__main__":
|
211 |
+
# Example usage
|
212 |
+
# model = "claude-3-5-sonnet-20240620"
|
213 |
+
# model = "qwen1.5-0.5b-chat"
|
214 |
+
# model = "llama-2-7b-chat-fp16"
|
215 |
+
model = "gpt-3.5-turbo"
|
216 |
+
messages = [
|
217 |
+
{"role": "system", "content": "Be Detailed"},
|
218 |
+
{"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
|
219 |
+
]
|
220 |
+
|
221 |
+
# try:
|
222 |
+
# # For non-streamed response
|
223 |
+
# response = generate(
|
224 |
+
# model=model,
|
225 |
+
# messages=messages,
|
226 |
+
# temperature=0.5,
|
227 |
+
# max_tokens=4000,
|
228 |
+
# stream=False # Change to True for streaming
|
229 |
+
# )
|
230 |
+
# if 'choices' in response:
|
231 |
+
# reply = response['choices'][0]['message']['content']
|
232 |
+
# print(reply)
|
233 |
+
# else:
|
234 |
+
# print("No response received.")
|
235 |
+
# except Exception as e:
|
236 |
+
# print(e)
|
237 |
+
|
238 |
+
|
239 |
+
try:
|
240 |
+
# For streamed response
|
241 |
+
response = generate(
|
242 |
+
model=model,
|
243 |
+
messages=messages,
|
244 |
+
temperature=0.5,
|
245 |
+
max_tokens=4000,
|
246 |
+
stream=True, # Change to False for non-streamed response
|
247 |
+
)
|
248 |
+
for data in response:
|
249 |
+
if 'choices' in data:
|
250 |
+
reply = data['choices'][0]['delta']['content']
|
251 |
+
print(reply, end="", flush=True)
|
252 |
+
else:
|
253 |
+
print("No response received.")
|
254 |
+
except Exception as e:
|
255 |
+
print(e)
|
256 |
+
```
|
257 |
+
|
258 |
+
```python
|
259 |
+
from fastapi import FastAPI, Request, Response
|
260 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
261 |
+
from fastapi.middleware.cors import CORSMiddleware
|
262 |
+
import uvicorn
|
263 |
+
import asyncio
|
264 |
+
import json
|
265 |
+
import requests
|
266 |
+
|
267 |
+
from TYPEGPT.typegpt_api import generate, model_mapping, simplified_models
|
268 |
+
from api_info import developer_info
|
269 |
+
|
270 |
+
app = FastAPI()
|
271 |
+
|
272 |
+
# Set up CORS middleware if needed
|
273 |
+
app.add_middleware(
|
274 |
+
CORSMiddleware,
|
275 |
+
allow_origins=["*"],
|
276 |
+
allow_credentials=True,
|
277 |
+
allow_methods=["*"],
|
278 |
+
allow_headers=["*"],
|
279 |
+
)
|
280 |
+
|
281 |
+
@app.get("/health_check")
|
282 |
+
async def health_check():
|
283 |
+
return {"status": "OK"}
|
284 |
+
|
285 |
+
@app.get("/models")
|
286 |
+
async def get_models():
|
287 |
+
# Retrieve models from TypeGPT API and forward the response
|
288 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/models"
|
289 |
+
try:
|
290 |
+
response = requests.get(api_endpoint)
|
291 |
+
# return response.text
|
292 |
+
return JSONResponse(content=response.json(), status_code=response.status_code)
|
293 |
+
except Exception as e:
|
294 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
295 |
+
|
296 |
+
@app.post("/chat/completions")
|
297 |
+
async def chat_completions(request: Request):
|
298 |
+
# Receive the JSON payload
|
299 |
+
try:
|
300 |
+
body = await request.json()
|
301 |
+
except Exception as e:
|
302 |
+
return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
|
303 |
+
|
304 |
+
# Extract parameters
|
305 |
+
model = body.get("model")
|
306 |
+
messages = body.get("messages")
|
307 |
+
temperature = body.get("temperature", 0.7)
|
308 |
+
top_p = body.get("top_p", 1.0)
|
309 |
+
n = body.get("n", 1)
|
310 |
+
stream = body.get("stream", False)
|
311 |
+
stop = body.get("stop")
|
312 |
+
max_tokens = body.get("max_tokens")
|
313 |
+
presence_penalty = body.get("presence_penalty", 0.0)
|
314 |
+
frequency_penalty = body.get("frequency_penalty", 0.0)
|
315 |
+
logit_bias = body.get("logit_bias")
|
316 |
+
user = body.get("user")
|
317 |
+
timeout = 30 # or set based on your preference
|
318 |
+
|
319 |
+
# Validate required parameters
|
320 |
+
if not model:
|
321 |
+
return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
|
322 |
+
if not messages:
|
323 |
+
return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
|
324 |
+
|
325 |
+
# Call the generate function
|
326 |
+
try:
|
327 |
+
if stream:
|
328 |
+
async def generate_stream():
|
329 |
+
response = generate(
|
330 |
+
model=model,
|
331 |
+
messages=messages,
|
332 |
+
temperature=temperature,
|
333 |
+
top_p=top_p,
|
334 |
+
n=n,
|
335 |
+
stream=True,
|
336 |
+
stop=stop,
|
337 |
+
max_tokens=max_tokens,
|
338 |
+
presence_penalty=presence_penalty,
|
339 |
+
frequency_penalty=frequency_penalty,
|
340 |
+
logit_bias=logit_bias,
|
341 |
+
user=user,
|
342 |
+
timeout=timeout,
|
343 |
+
)
|
344 |
+
|
345 |
+
for chunk in response:
|
346 |
+
yield f"data: {json.dumps(chunk)}\n\n"
|
347 |
+
yield "data: [DONE]\n\n"
|
348 |
+
|
349 |
+
return StreamingResponse(
|
350 |
+
generate_stream(),
|
351 |
+
media_type="text/event-stream",
|
352 |
+
headers={
|
353 |
+
"Cache-Control": "no-cache",
|
354 |
+
"Connection": "keep-alive",
|
355 |
+
"Transfer-Encoding": "chunked"
|
356 |
+
}
|
357 |
+
)
|
358 |
+
else:
|
359 |
+
response = generate(
|
360 |
+
model=model,
|
361 |
+
messages=messages,
|
362 |
+
temperature=temperature,
|
363 |
+
top_p=top_p,
|
364 |
+
n=n,
|
365 |
+
stream=False,
|
366 |
+
stop=stop,
|
367 |
+
max_tokens=max_tokens,
|
368 |
+
presence_penalty=presence_penalty,
|
369 |
+
frequency_penalty=frequency_penalty,
|
370 |
+
logit_bias=logit_bias,
|
371 |
+
user=user,
|
372 |
+
timeout=timeout,
|
373 |
+
)
|
374 |
+
return JSONResponse(content=response)
|
375 |
+
except Exception as e:
|
376 |
+
return JSONResponse(content={"error": str(e)}, status_code=500)
|
377 |
+
|
378 |
+
@app.get("/developer_info")
|
379 |
+
async def get_developer_info():
|
380 |
+
return JSONResponse(content=developer_info)
|
381 |
+
|
382 |
+
if __name__ == "__main__":
|
383 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
384 |
+
```
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastapi==0.110.2
|
2 |
+
Flask==3.0.3
|
3 |
+
Requests==2.31.0
|
4 |
+
uvicorn==0.29.0
|
5 |
+
python-dotenv==1.0.1
|
6 |
+
colorama
|
7 |
+
pytz
|
typegpt_api.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
|
4 |
+
# Build model mapping
|
5 |
+
original_models = [
|
6 |
+
# OpenAI Models
|
7 |
+
"gpt-3.5-turbo",
|
8 |
+
"gpt-3.5-turbo-202201",
|
9 |
+
"gpt-4o",
|
10 |
+
"gpt-4o-2024-05-13",
|
11 |
+
"o1-preview",
|
12 |
+
|
13 |
+
# Claude Models
|
14 |
+
"claude",
|
15 |
+
"claude-3-5-sonnet",
|
16 |
+
"claude-sonnet-3.5",
|
17 |
+
"claude-3-5-sonnet-20240620",
|
18 |
+
|
19 |
+
# Meta/LLaMA Models
|
20 |
+
"@cf/meta/llama-2-7b-chat-fp16",
|
21 |
+
"@cf/meta/llama-2-7b-chat-int8",
|
22 |
+
"@cf/meta/llama-3-8b-instruct",
|
23 |
+
"@cf/meta/llama-3.1-8b-instruct",
|
24 |
+
"@cf/meta-llama/llama-2-7b-chat-hf-lora",
|
25 |
+
"llama-3.1-405b",
|
26 |
+
"llama-3.1-70b",
|
27 |
+
"llama-3.1-8b",
|
28 |
+
"meta-llama/Llama-2-7b-chat-hf",
|
29 |
+
"meta-llama/Llama-3.1-70B-Instruct",
|
30 |
+
"meta-llama/Llama-3.1-8B-Instruct",
|
31 |
+
"meta-llama/Llama-3.2-11B-Vision-Instruct",
|
32 |
+
"meta-llama/Llama-3.2-1B-Instruct",
|
33 |
+
"meta-llama/Llama-3.2-3B-Instruct",
|
34 |
+
"meta-llama/Llama-3.2-90B-Vision-Instruct",
|
35 |
+
"meta-llama/Llama-Guard-3-8B",
|
36 |
+
"meta-llama/Meta-Llama-3-70B-Instruct",
|
37 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
38 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
|
39 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct",
|
40 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
|
41 |
+
|
42 |
+
# Mistral Models
|
43 |
+
"mistral",
|
44 |
+
"mistral-large",
|
45 |
+
"@cf/mistral/mistral-7b-instruct-v0.1",
|
46 |
+
"@cf/mistral/mistral-7b-instruct-v0.2-lora",
|
47 |
+
"@hf/mistralai/mistral-7b-instruct-v0.2",
|
48 |
+
"mistralai/Mistral-7B-Instruct-v0.2",
|
49 |
+
"mistralai/Mistral-7B-Instruct-v0.3",
|
50 |
+
"mistralai/Mixtral-8x22B-Instruct-v0.1",
|
51 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1",
|
52 |
+
|
53 |
+
# Qwen Models
|
54 |
+
"@cf/qwen/qwen1.5-0.5b-chat",
|
55 |
+
"@cf/qwen/qwen1.5-1.8b-chat",
|
56 |
+
"@cf/qwen/qwen1.5-7b-chat-awq",
|
57 |
+
"@cf/qwen/qwen1.5-14b-chat-awq",
|
58 |
+
"Qwen/Qwen2.5-3B-Instruct",
|
59 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
60 |
+
"Qwen/Qwen2.5-Coder-32B-Instruct",
|
61 |
+
|
62 |
+
# Google/Gemini Models
|
63 |
+
"@cf/google/gemma-2b-it-lora",
|
64 |
+
"@cf/google/gemma-7b-it-lora",
|
65 |
+
"@hf/google/gemma-7b-it",
|
66 |
+
"google/gemma-1.1-2b-it",
|
67 |
+
"google/gemma-1.1-7b-it",
|
68 |
+
"gemini-pro",
|
69 |
+
"gemini-1.5-pro",
|
70 |
+
"gemini-1.5-pro-latest",
|
71 |
+
"gemini-1.5-flash",
|
72 |
+
|
73 |
+
# Cohere Models
|
74 |
+
"c4ai-aya-23-35b",
|
75 |
+
"c4ai-aya-23-8b",
|
76 |
+
"command",
|
77 |
+
"command-light",
|
78 |
+
"command-light-nightly",
|
79 |
+
"command-nightly",
|
80 |
+
"command-r",
|
81 |
+
"command-r-08-2024",
|
82 |
+
"command-r-plus",
|
83 |
+
"command-r-plus-08-2024",
|
84 |
+
"rerank-english-v2.0",
|
85 |
+
"rerank-english-v3.0",
|
86 |
+
"rerank-multilingual-v2.0",
|
87 |
+
"rerank-multilingual-v3.0",
|
88 |
+
|
89 |
+
# Microsoft Models
|
90 |
+
"@cf/microsoft/phi-2",
|
91 |
+
"microsoft/DialoGPT-medium",
|
92 |
+
"microsoft/Phi-3-medium-4k-instruct",
|
93 |
+
"microsoft/Phi-3-mini-4k-instruct",
|
94 |
+
"microsoft/Phi-3.5-mini-instruct",
|
95 |
+
"microsoft/WizardLM-2-8x22B",
|
96 |
+
|
97 |
+
# Yi Models
|
98 |
+
"01-ai/Yi-1.5-34B-Chat",
|
99 |
+
"01-ai/Yi-34B-Chat",
|
100 |
+
]
|
101 |
+
|
102 |
+
# Create mapping from simplified model names to original model names
|
103 |
+
model_mapping = {}
|
104 |
+
simplified_models = []
|
105 |
+
|
106 |
+
for original_model in original_models:
|
107 |
+
simplified_name = original_model.split('/')[-1]
|
108 |
+
if simplified_name in model_mapping:
|
109 |
+
# Conflict detected, handle as per instructions
|
110 |
+
print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
|
111 |
+
continue
|
112 |
+
model_mapping[simplified_name] = original_model
|
113 |
+
simplified_models.append(simplified_name)
|
114 |
+
|
115 |
+
def generate(
|
116 |
+
model,
|
117 |
+
messages,
|
118 |
+
temperature=0.7,
|
119 |
+
top_p=1.0,
|
120 |
+
n=1,
|
121 |
+
stream=False,
|
122 |
+
stop=None,
|
123 |
+
max_tokens=None,
|
124 |
+
presence_penalty=0.0,
|
125 |
+
frequency_penalty=0.0,
|
126 |
+
logit_bias=None,
|
127 |
+
user=None,
|
128 |
+
timeout=30,
|
129 |
+
):
|
130 |
+
"""
|
131 |
+
Generates a chat completion using the provided model and messages.
|
132 |
+
"""
|
133 |
+
# Use the simplified model names
|
134 |
+
models = simplified_models
|
135 |
+
|
136 |
+
if model not in models:
|
137 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
|
138 |
+
|
139 |
+
# Map simplified model name to original model name
|
140 |
+
original_model = model_mapping[model]
|
141 |
+
|
142 |
+
api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
|
143 |
+
|
144 |
+
headers = {
|
145 |
+
"authority": "chat.typegpt.net",
|
146 |
+
"accept": "application/json, text/event-stream",
|
147 |
+
"accept-language": "en-US,en;q=0.9",
|
148 |
+
"content-type": "application/json",
|
149 |
+
"origin": "https://chat.typegpt.net",
|
150 |
+
"referer": "https://chat.typegpt.net/",
|
151 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
|
152 |
+
}
|
153 |
+
|
154 |
+
# Payload
|
155 |
+
payload = {
|
156 |
+
"messages": messages,
|
157 |
+
"stream": stream,
|
158 |
+
"model": original_model,
|
159 |
+
"temperature": temperature,
|
160 |
+
"presence_penalty": presence_penalty,
|
161 |
+
"frequency_penalty": frequency_penalty,
|
162 |
+
"top_p": top_p,
|
163 |
+
}
|
164 |
+
|
165 |
+
# Only include max_tokens if it's not None
|
166 |
+
if max_tokens is not None:
|
167 |
+
payload["max_tokens"] = max_tokens
|
168 |
+
|
169 |
+
# Only include 'stop' if it's not None
|
170 |
+
if stop is not None:
|
171 |
+
payload["stop"] = stop
|
172 |
+
|
173 |
+
# Check if logit_bias is provided
|
174 |
+
if logit_bias is not None:
|
175 |
+
payload["logit_bias"] = logit_bias
|
176 |
+
|
177 |
+
# Include 'user' if provided
|
178 |
+
if user is not None:
|
179 |
+
payload["user"] = user
|
180 |
+
|
181 |
+
# Start the request
|
182 |
+
session = requests.Session()
|
183 |
+
response = session.post(
|
184 |
+
api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
|
185 |
+
)
|
186 |
+
|
187 |
+
if not response.ok:
|
188 |
+
raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
|
189 |
+
|
190 |
+
def stream_response():
|
191 |
+
for line in response.iter_lines():
|
192 |
+
if line:
|
193 |
+
line = line.decode("utf-8")
|
194 |
+
if line.startswith("data: "):
|
195 |
+
line = line[6:] # Remove "data: " prefix
|
196 |
+
if line.strip() == "[DONE]":
|
197 |
+
break
|
198 |
+
try:
|
199 |
+
data = json.loads(line)
|
200 |
+
yield data
|
201 |
+
except json.JSONDecodeError:
|
202 |
+
continue
|
203 |
+
|
204 |
+
if stream:
|
205 |
+
return stream_response()
|
206 |
+
else:
|
207 |
+
return response.json()
|
208 |
+
|
209 |
+
if __name__ == "__main__":
|
210 |
+
# Example usage
|
211 |
+
# model = "claude-3-5-sonnet-20240620"
|
212 |
+
# model = "qwen1.5-0.5b-chat"
|
213 |
+
# model = "llama-2-7b-chat-fp16"
|
214 |
+
model = "gpt-3.5-turbo"
|
215 |
+
messages = [
|
216 |
+
{"role": "system", "content": "Be Detailed"},
|
217 |
+
{"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
|
218 |
+
]
|
219 |
+
|
220 |
+
# try:
|
221 |
+
# # For non-streamed response
|
222 |
+
# response = generate(
|
223 |
+
# model=model,
|
224 |
+
# messages=messages,
|
225 |
+
# temperature=0.5,
|
226 |
+
# max_tokens=4000,
|
227 |
+
# stream=False # Change to True for streaming
|
228 |
+
# )
|
229 |
+
# if 'choices' in response:
|
230 |
+
# reply = response['choices'][0]['message']['content']
|
231 |
+
# print(reply)
|
232 |
+
# else:
|
233 |
+
# print("No response received.")
|
234 |
+
# except Exception as e:
|
235 |
+
# print(e)
|
236 |
+
|
237 |
+
|
238 |
+
try:
|
239 |
+
# For streamed response
|
240 |
+
response = generate(
|
241 |
+
model=model,
|
242 |
+
messages=messages,
|
243 |
+
temperature=0.5,
|
244 |
+
max_tokens=4000,
|
245 |
+
stream=True, # Change to False for non-streamed response
|
246 |
+
)
|
247 |
+
for data in response:
|
248 |
+
if 'choices' in data:
|
249 |
+
reply = data['choices'][0]['delta']['content']
|
250 |
+
print(reply, end="", flush=True)
|
251 |
+
else:
|
252 |
+
print("No response received.")
|
253 |
+
except Exception as e:
|
254 |
+
print(e)
|
usage_inference.py
ADDED
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import json
|
3 |
+
import time
|
4 |
+
from typing import Dict, Any
|
5 |
+
|
6 |
+
class APITester:
|
7 |
+
def __init__(self, base_url: str = "http://localhost:8000"):
|
8 |
+
self.base_url = base_url
|
9 |
+
self.session = requests.Session()
|
10 |
+
|
11 |
+
def test_health_check(self) -> None:
|
12 |
+
"""Test the health check endpoint."""
|
13 |
+
print("\n=== Testing Health Check Endpoint ===")
|
14 |
+
try:
|
15 |
+
response = self.session.get(f"{self.base_url}/health_check")
|
16 |
+
print(f"Status Code: {response.status_code}")
|
17 |
+
print(f"Response: {response.json()}")
|
18 |
+
assert response.status_code == 200
|
19 |
+
print("β
Health check test passed!")
|
20 |
+
except Exception as e:
|
21 |
+
print(f"β Health check test failed: {str(e)}")
|
22 |
+
|
23 |
+
def test_models(self) -> None:
|
24 |
+
"""Test the models endpoint."""
|
25 |
+
print("\n=== Testing Models Endpoint ===")
|
26 |
+
try:
|
27 |
+
response = self.session.get(f"{self.base_url}/models")
|
28 |
+
print(f"Status Code: {response.status_code}")
|
29 |
+
data = response.json()
|
30 |
+
print(f"Number of models available: {len(data['data'])}")
|
31 |
+
print("Sample models:")
|
32 |
+
for model in data['data'][:5]: # Show first 5 models
|
33 |
+
print(f"- {model['id']}")
|
34 |
+
assert response.status_code == 200
|
35 |
+
print("β
Models endpoint test passed!")
|
36 |
+
except Exception as e:
|
37 |
+
print(f"β Models endpoint test failed: {str(e)}")
|
38 |
+
|
39 |
+
def test_chat_completions_non_streaming(self) -> None:
|
40 |
+
"""Test the chat completions endpoint without streaming."""
|
41 |
+
print("\n=== Testing Chat Completions Endpoint (Non-Streaming) ===")
|
42 |
+
payload = {
|
43 |
+
"model": "gpt-3.5-turbo",
|
44 |
+
"messages": [
|
45 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
46 |
+
{"role": "user", "content": "Tell me a short joke about programming."}
|
47 |
+
],
|
48 |
+
"temperature": 0.7,
|
49 |
+
"max_tokens": 150,
|
50 |
+
"stream": False
|
51 |
+
}
|
52 |
+
|
53 |
+
try:
|
54 |
+
response = self.session.post(
|
55 |
+
f"{self.base_url}/chat/completions",
|
56 |
+
json=payload
|
57 |
+
)
|
58 |
+
print(f"Status Code: {response.status_code}")
|
59 |
+
if response.status_code == 200:
|
60 |
+
data = response.json()
|
61 |
+
print("Response content:")
|
62 |
+
print(data['choices'][0]['message']['content'])
|
63 |
+
assert response.status_code == 200
|
64 |
+
print("β
Chat completions (non-streaming) test passed!")
|
65 |
+
except Exception as e:
|
66 |
+
print(f"β Chat completions (non-streaming) test failed: {str(e)}")
|
67 |
+
|
68 |
+
def test_chat_completions_streaming(self) -> None:
|
69 |
+
"""Test the chat completions endpoint with streaming."""
|
70 |
+
print("\n=== Testing Chat Completions Endpoint (Streaming) ===")
|
71 |
+
payload = {
|
72 |
+
"model": "gpt-3.5-turbo",
|
73 |
+
"messages": [
|
74 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
75 |
+
{"role": "user", "content": "Write 5 lines about India"}
|
76 |
+
],
|
77 |
+
"temperature": 0.7,
|
78 |
+
"max_tokens": 150,
|
79 |
+
"stream": True
|
80 |
+
}
|
81 |
+
|
82 |
+
try:
|
83 |
+
with self.session.post(
|
84 |
+
f"{self.base_url}/chat/completions",
|
85 |
+
json=payload,
|
86 |
+
stream=True,
|
87 |
+
headers={"Accept": "text/event-stream"}
|
88 |
+
) as response:
|
89 |
+
print(f"Status Code: {response.status_code}")
|
90 |
+
print("Streaming response:")
|
91 |
+
|
92 |
+
buffer = ""
|
93 |
+
for chunk in response.iter_lines():
|
94 |
+
if chunk:
|
95 |
+
chunk = chunk.decode('utf-8')
|
96 |
+
if chunk.startswith('data: '):
|
97 |
+
chunk = chunk[6:] # Remove 'data: ' prefix
|
98 |
+
if chunk.strip() == '[DONE]':
|
99 |
+
break
|
100 |
+
try:
|
101 |
+
data = json.loads(chunk)
|
102 |
+
if 'choices' in data and len(data['choices']) > 0:
|
103 |
+
if 'delta' in data['choices'][0] and 'content' in data['choices'][0]['delta']:
|
104 |
+
content = data['choices'][0]['delta']['content']
|
105 |
+
print(content, end='', flush=True)
|
106 |
+
time.sleep(0.1) # Add a small delay to simulate real-time streaming
|
107 |
+
except json.JSONDecodeError:
|
108 |
+
continue
|
109 |
+
|
110 |
+
print("\nβ
Chat completions (streaming) test passed!")
|
111 |
+
except Exception as e:
|
112 |
+
print(f"β Chat completions (streaming) test failed: {str(e)}")
|
113 |
+
|
114 |
+
def test_developer_info(self) -> None:
|
115 |
+
"""Test the developer info endpoint."""
|
116 |
+
print("\n=== Testing Developer Info Endpoint ===")
|
117 |
+
try:
|
118 |
+
response = self.session.get(f"{self.base_url}/developer_info")
|
119 |
+
print(f"Status Code: {response.status_code}")
|
120 |
+
print("Developer Info:")
|
121 |
+
print(json.dumps(response.json(), indent=2))
|
122 |
+
assert response.status_code == 200
|
123 |
+
print("β
Developer info test passed!")
|
124 |
+
except Exception as e:
|
125 |
+
print(f"β Developer info test failed: {str(e)}")
|
126 |
+
|
127 |
+
def run_all_tests(self) -> None:
|
128 |
+
"""Run all tests sequentially."""
|
129 |
+
tests = [
|
130 |
+
self.test_health_check,
|
131 |
+
self.test_models,
|
132 |
+
self.test_chat_completions_non_streaming,
|
133 |
+
self.test_chat_completions_streaming,
|
134 |
+
self.test_developer_info
|
135 |
+
]
|
136 |
+
|
137 |
+
print("π Starting API Tests...")
|
138 |
+
start_time = time.time()
|
139 |
+
|
140 |
+
for test in tests:
|
141 |
+
test()
|
142 |
+
|
143 |
+
end_time = time.time()
|
144 |
+
duration = end_time - start_time
|
145 |
+
|
146 |
+
print(f"\n============================")
|
147 |
+
print(f"π All tests completed in {duration:.2f} seconds")
|
148 |
+
print(f"============================")
|
149 |
+
|
150 |
+
def main():
|
151 |
+
# Initialize tester with your API's base URL
|
152 |
+
tester = APITester("http://localhost:8000")
|
153 |
+
|
154 |
+
# Run all tests
|
155 |
+
tester.run_all_tests()
|
156 |
+
|
157 |
+
if __name__ == "__main__":
|
158 |
+
main()
|