API-Handler commited on
Commit
501c69f
Β·
verified Β·
1 Parent(s): 175bc11

Upload 10 files

Browse files
Files changed (10) hide show
  1. Dockerfile +20 -0
  2. TYPEGPT/typegpt.py +409 -0
  3. TYPEGPT/typegpt_normal.py +231 -0
  4. api_info.py +176 -0
  5. fastapi_app.py +132 -0
  6. flask_app.py +131 -0
  7. query.md +384 -0
  8. requirements.txt +7 -0
  9. typegpt_api.py +254 -0
  10. usage_inference.py +158 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as the base image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Copy the requirements file into the container
8
+ COPY requirements.txt .
9
+
10
+ # Install the required packages
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the rest of the application code into the container
14
+ COPY . .
15
+
16
+ # Expose the port that FastAPI will run on
17
+ EXPOSE 7860
18
+
19
+ # Command to run the FastAPI application
20
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
TYPEGPT/typegpt.py ADDED
@@ -0,0 +1,409 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ from typing import *
4
+
5
+ from webscout.AIutel import Optimizers
6
+ from webscout.AIutel import Conversation
7
+ from webscout.AIutel import AwesomePrompts
8
+ from webscout.AIbase import Provider
9
+ from webscout import exceptions
10
+
11
+ class TypeGPT(Provider):
12
+ """
13
+ A class to interact with the TypeGPT.net API. Improved to match webscout standards.
14
+ """
15
+ url = "https://chat.typegpt.net"
16
+ working = True
17
+ supports_message_history = True
18
+
19
+ models = [
20
+ # OpenAI Models
21
+ "gpt-3.5-turbo",
22
+ "gpt-3.5-turbo-202201",
23
+ "gpt-4o",
24
+ "gpt-4o-2024-05-13",
25
+ "o1-preview",
26
+
27
+ # Claude Models
28
+ "claude",
29
+ "claude-3-5-sonnet",
30
+ "claude-sonnet-3.5",
31
+ "claude-3-5-sonnet-20240620",
32
+
33
+ # Meta/LLaMA Models
34
+ "@cf/meta/llama-2-7b-chat-fp16",
35
+ "@cf/meta/llama-2-7b-chat-int8",
36
+ "@cf/meta/llama-3-8b-instruct",
37
+ "@cf/meta/llama-3.1-8b-instruct",
38
+ "@cf/meta-llama/llama-2-7b-chat-hf-lora",
39
+ "llama-3.1-405b",
40
+ "llama-3.1-70b",
41
+ "llama-3.1-8b",
42
+ "meta-llama/Llama-2-7b-chat-hf",
43
+ "meta-llama/Llama-3.1-70B-Instruct",
44
+ "meta-llama/Llama-3.1-8B-Instruct",
45
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
46
+ "meta-llama/Llama-3.2-1B-Instruct",
47
+ "meta-llama/Llama-3.2-3B-Instruct",
48
+ "meta-llama/Llama-3.2-90B-Vision-Instruct",
49
+ "meta-llama/Llama-Guard-3-8B",
50
+ "meta-llama/Meta-Llama-3-70B-Instruct",
51
+ "meta-llama/Meta-Llama-3-8B-Instruct",
52
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
53
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
54
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
55
+
56
+ # Mistral Models
57
+ "mistral",
58
+ "mistral-large",
59
+ "@cf/mistral/mistral-7b-instruct-v0.1",
60
+ "@cf/mistral/mistral-7b-instruct-v0.2-lora",
61
+ "@hf/mistralai/mistral-7b-instruct-v0.2",
62
+ "mistralai/Mistral-7B-Instruct-v0.2",
63
+ "mistralai/Mistral-7B-Instruct-v0.3",
64
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
65
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
66
+
67
+ # Qwen Models
68
+ "@cf/qwen/qwen1.5-0.5b-chat",
69
+ "@cf/qwen/qwen1.5-1.8b-chat",
70
+ "@cf/qwen/qwen1.5-7b-chat-awq",
71
+ "@cf/qwen/qwen1.5-14b-chat-awq",
72
+ "Qwen/Qwen2.5-3B-Instruct",
73
+ "Qwen/Qwen2.5-72B-Instruct",
74
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
75
+
76
+ # Google/Gemini Models
77
+ "@cf/google/gemma-2b-it-lora",
78
+ "@cf/google/gemma-7b-it-lora",
79
+ "@hf/google/gemma-7b-it",
80
+ "google/gemma-1.1-2b-it",
81
+ "google/gemma-1.1-7b-it",
82
+ "gemini-pro",
83
+ "gemini-1.5-pro",
84
+ "gemini-1.5-pro-latest",
85
+ "gemini-1.5-flash",
86
+
87
+ # Cohere Models
88
+ "c4ai-aya-23-35b",
89
+ "c4ai-aya-23-8b",
90
+ "command",
91
+ "command-light",
92
+ "command-light-nightly",
93
+ "command-nightly",
94
+ "command-r",
95
+ "command-r-08-2024",
96
+ "command-r-plus",
97
+ "command-r-plus-08-2024",
98
+ "rerank-english-v2.0",
99
+ "rerank-english-v3.0",
100
+ "rerank-multilingual-v2.0",
101
+ "rerank-multilingual-v3.0",
102
+
103
+ # Microsoft Models
104
+ "@cf/microsoft/phi-2",
105
+ "microsoft/DialoGPT-medium",
106
+ "microsoft/Phi-3-medium-4k-instruct",
107
+ "microsoft/Phi-3-mini-4k-instruct",
108
+ "microsoft/Phi-3.5-mini-instruct",
109
+ "microsoft/WizardLM-2-8x22B",
110
+
111
+ # Yi Models
112
+ "01-ai/Yi-1.5-34B-Chat",
113
+ "01-ai/Yi-34B-Chat",
114
+
115
+ # Specialized Models and Tools
116
+ "@cf/deepseek-ai/deepseek-math-7b-base",
117
+ "@cf/deepseek-ai/deepseek-math-7b-instruct",
118
+ "@cf/defog/sqlcoder-7b-2",
119
+ "@cf/openchat/openchat-3.5-0106",
120
+ "@cf/thebloke/discolm-german-7b-v1-awq",
121
+ "@cf/tiiuae/falcon-7b-instruct",
122
+ "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
123
+ "@hf/nexusflow/starling-lm-7b-beta",
124
+ "@hf/nousresearch/hermes-2-pro-mistral-7b",
125
+ "@hf/thebloke/deepseek-coder-6.7b-base-awq",
126
+ "@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
127
+ "@hf/thebloke/llama-2-13b-chat-awq",
128
+ "@hf/thebloke/llamaguard-7b-awq",
129
+ "@hf/thebloke/neural-chat-7b-v3-1-awq",
130
+ "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
131
+ "@hf/thebloke/zephyr-7b-beta-awq",
132
+ "AndroidDeveloper",
133
+ "AngularJSAgent",
134
+ "AzureAgent",
135
+ "BitbucketAgent",
136
+ "DigitalOceanAgent",
137
+ "DockerAgent",
138
+ "ElectronAgent",
139
+ "ErlangAgent",
140
+ "FastAPIAgent",
141
+ "FirebaseAgent",
142
+ "FlaskAgent",
143
+ "FlutterAgent",
144
+ "GitAgent",
145
+ "GitlabAgent",
146
+ "GoAgent",
147
+ "GodotAgent",
148
+ "GoogleCloudAgent",
149
+ "HTMLAgent",
150
+ "HerokuAgent",
151
+ "ImageGeneration",
152
+ "JavaAgent",
153
+ "JavaScriptAgent",
154
+ "MongoDBAgent",
155
+ "Next.jsAgent",
156
+ "PyTorchAgent",
157
+ "PythonAgent",
158
+ "ReactAgent",
159
+ "RepoMap",
160
+ "SwiftDeveloper",
161
+ "XcodeAgent",
162
+ "YoutubeAgent",
163
+ "blackboxai",
164
+ "blackboxai-pro",
165
+ "builderAgent",
166
+ "dify",
167
+ "flux",
168
+ "openchat/openchat-3.6-8b",
169
+ "rtist",
170
+ "searchgpt",
171
+ "sur",
172
+ "sur-mistral",
173
+ "unity"
174
+ ]
175
+
176
+ def __init__(
177
+ self,
178
+ is_conversation: bool = True,
179
+ max_tokens: int = 4000, # Set a reasonable default
180
+ timeout: int = 30,
181
+ intro: str = None,
182
+ filepath: str = None,
183
+ update_file: bool = True,
184
+ proxies: dict = {},
185
+ history_offset: int = 10250,
186
+ act: str = None,
187
+ model: str = "claude-3-5-sonnet-20240620",
188
+ system_prompt: str = "You are a helpful assistant.",
189
+ temperature: float = 0.5,
190
+ presence_penalty: int = 0,
191
+ frequency_penalty: int = 0,
192
+ top_p: float = 1,
193
+ ):
194
+ """Initializes the TypeGPT API client."""
195
+ if model not in self.models:
196
+ raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(self.models)}")
197
+
198
+ self.session = requests.Session()
199
+ self.is_conversation = is_conversation
200
+ self.max_tokens_to_sample = max_tokens
201
+ self.api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
202
+ self.timeout = timeout
203
+ self.last_response = {}
204
+ self.last_response_status_code = None # Added line for status code
205
+ self.model = model
206
+ self.system_prompt = system_prompt
207
+ self.temperature = temperature
208
+ self.presence_penalty = presence_penalty
209
+ self.frequency_penalty = frequency_penalty
210
+ self.top_p = top_p
211
+
212
+ self.headers = {
213
+ "authority": "chat.typegpt.net",
214
+ "accept": "application/json, text/event-stream",
215
+ "accept-language": "en-US,en;q=0.9",
216
+ "content-type": "application/json",
217
+ "origin": "https://chat.typegpt.net",
218
+ "referer": "https://chat.typegpt.net/",
219
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
220
+ }
221
+
222
+ self.__available_optimizers = (
223
+ method
224
+ for method in dir(Optimizers)
225
+ if callable(getattr(Optimizers, method)) and not method.startswith("__")
226
+ )
227
+ Conversation.intro = (
228
+ AwesomePrompts().get_act(
229
+ act, raise_not_found=True, default=None, case_insensitive=True
230
+ )
231
+ if act
232
+ else intro or Conversation.intro
233
+ )
234
+ self.conversation = Conversation(
235
+ is_conversation, self.max_tokens_to_sample, filepath, update_file
236
+ )
237
+ self.conversation.history_offset = history_offset
238
+ self.session.proxies = proxies
239
+
240
+ def ask(
241
+ self,
242
+ prompt: str,
243
+ stream: bool = False,
244
+ raw: bool = False,
245
+ optimizer: str = None,
246
+ conversationally: bool = False,
247
+ ) -> Dict[str, Any] | Generator:
248
+ """Sends a prompt to the TypeGPT.net API and returns the response."""
249
+ conversation_prompt = self.conversation.gen_complete_prompt(prompt)
250
+ if optimizer:
251
+ if optimizer in self.__available_optimizers:
252
+ conversation_prompt = getattr(Optimizers, optimizer)(
253
+ conversation_prompt if conversationally else prompt
254
+ )
255
+ else:
256
+ raise exceptions.FailedToGenerateResponseError(
257
+ f"Optimizer is not one of {self.__available_optimizers}"
258
+ )
259
+
260
+ payload = {
261
+ "messages": [
262
+ {"role": "system", "content": self.system_prompt},
263
+ {"role": "user", "content": conversation_prompt}
264
+ ],
265
+ "stream": stream,
266
+ "model": self.model,
267
+ "temperature": self.temperature,
268
+ "presence_penalty": self.presence_penalty,
269
+ "frequency_penalty": self.frequency_penalty,
270
+ "top_p": self.top_p,
271
+ "max_tokens": self.max_tokens_to_sample,
272
+ }
273
+
274
+ def for_stream():
275
+ response = self.session.post(
276
+ self.api_endpoint, headers=self.headers, json=payload, stream=True, timeout=self.timeout
277
+ )
278
+ self.last_response_status_code = response.status_code # Capture status code
279
+ if not response.ok:
280
+ raise exceptions.FailedToGenerateResponseError(
281
+ f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
282
+ )
283
+ message_load = ""
284
+ for line in response.iter_lines():
285
+ if line:
286
+ line = line.decode("utf-8")
287
+ if line.startswith("data: "):
288
+ line = line[6:] # Remove "data: " prefix
289
+ # Skip [DONE] message
290
+ if line.strip() == "[DONE]":
291
+ break
292
+
293
+ try:
294
+ data = json.loads(line)
295
+
296
+ # Extract and yield only new content
297
+ if 'choices' in data and len(data['choices']) > 0:
298
+ delta = data['choices'][0].get('delta', {})
299
+ if 'content' in delta:
300
+ new_content = delta['content']
301
+ message_load += new_content
302
+ # Yield only the new content
303
+ yield dict(text=new_content) if not raw else new_content
304
+ self.last_response = dict(text=message_load)
305
+
306
+ except json.JSONDecodeError:
307
+ continue
308
+ self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
309
+
310
+ def for_non_stream():
311
+ response = self.session.post(self.api_endpoint, headers=self.headers, json=payload)
312
+ self.last_response_status_code = response.status_code # Capture status code
313
+ if not response.ok:
314
+ raise exceptions.FailedToGenerateResponseError(
315
+ f"Request failed - {response.status_code}: {response.text}"
316
+ )
317
+ self.last_response = response.json()
318
+ self.conversation.update_chat_history(prompt, self.get_message(self.last_response))
319
+ return self.last_response
320
+
321
+ return for_stream() if stream else for_non_stream()
322
+
323
+ def chat(
324
+ self,
325
+ prompt: str,
326
+ stream: bool = False,
327
+ optimizer: str = None,
328
+ conversationally: bool = False,
329
+ ) -> str | Generator[str, None, None]:
330
+ """Generate response `str` or stream."""
331
+ if stream:
332
+ gen = self.ask(
333
+ prompt, stream=True, optimizer=optimizer, conversationally=conversationally
334
+ )
335
+ for chunk in gen:
336
+ yield self.get_message(chunk) # Extract text from streamed chunks
337
+ else:
338
+ return self.get_message(self.ask(prompt, stream=False, optimizer=optimizer, conversationally=conversationally))
339
+
340
+ def get_message(self, response: Dict[str, Any]) -> str:
341
+ """Retrieves message from response."""
342
+ if isinstance(response, str): # Handle raw responses
343
+ return response
344
+ elif isinstance(response, dict):
345
+ assert isinstance(response, dict), "Response should be of dict data-type only"
346
+ return response.get("text", "") # Extract text from dictionary response
347
+ else:
348
+ raise TypeError("Invalid response type. Expected str or dict.")
349
+
350
+ if __name__ == "__main__":
351
+ from rich import print
352
+ from rich.progress import Progress, BarColumn, TextColumn, TimeRemainingColumn, SpinnerColumn
353
+ from rich.console import Console
354
+ from rich.table import Table
355
+ import concurrent.futures
356
+
357
+ def make_api_call(thread_number, results):
358
+ ai = TypeGPT()
359
+ try:
360
+ ai.ask("Test message", stream=False)
361
+ status_code = ai.last_response_status_code
362
+ results[thread_number] = status_code
363
+ except Exception as e:
364
+ results[thread_number] = str(e)
365
+
366
+ results = {}
367
+ total_requests = 100
368
+
369
+ console = Console()
370
+
371
+ print("[bold magenta]Starting API Load Test with 100 simultaneous requests...[/bold magenta]\n")
372
+
373
+ with Progress(
374
+ SpinnerColumn(),
375
+ "[progress.description]{task.description}",
376
+ BarColumn(bar_width=None),
377
+ "[progress.percentage]{task.percentage:>3.0f}%",
378
+ TimeRemainingColumn(),
379
+ console=console,
380
+ ) as progress:
381
+ task = progress.add_task("[cyan]Sending API Requests...", total=total_requests)
382
+ with concurrent.futures.ThreadPoolExecutor(max_workers=total_requests) as executor:
383
+ futures = {
384
+ executor.submit(make_api_call, i, results): i for i in range(total_requests)
385
+ }
386
+ for future in concurrent.futures.as_completed(futures):
387
+ progress.update(task, advance=1)
388
+ progress.stop()
389
+
390
+ # Process and display the results
391
+ successful_calls = sum(1 for status in results.values() if status == 200)
392
+ failed_calls = total_requests - successful_calls
393
+
394
+ print("\n[bold magenta]API Load Test Results:[/bold magenta]\n")
395
+ print(f"[bold green]Successful calls: {successful_calls}")
396
+ print(f"[bold red]Failed calls: {failed_calls}\n")
397
+
398
+ # Create a table to display detailed results
399
+ table = Table(show_header=True, header_style="bold blue")
400
+ table.add_column("Thread Number", justify="right", style="dim")
401
+ table.add_column("Status", style="bold")
402
+
403
+ for thread_number, status in results.items():
404
+ if status == 200:
405
+ table.add_row(f"{thread_number}", f"[green]Success[/green]")
406
+ else:
407
+ table.add_row(f"{thread_number}", f"[red]Failed ({status})[/red]")
408
+
409
+ print(table)
TYPEGPT/typegpt_normal.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ # List of available models
5
+ models = [
6
+ # OpenAI Models
7
+ "gpt-3.5-turbo",
8
+ "gpt-3.5-turbo-202201",
9
+ "gpt-4o",
10
+ "gpt-4o-2024-05-13",
11
+ "o1-preview",
12
+
13
+ # Claude Models
14
+ "claude",
15
+ "claude-3-5-sonnet",
16
+ "claude-sonnet-3.5",
17
+ "claude-3-5-sonnet-20240620",
18
+
19
+ # Meta/LLaMA Models
20
+ "@cf/meta/llama-2-7b-chat-fp16",
21
+ "@cf/meta/llama-2-7b-chat-int8",
22
+ "@cf/meta/llama-3-8b-instruct",
23
+ "@cf/meta/llama-3.1-8b-instruct",
24
+ "@cf/meta-llama/llama-2-7b-chat-hf-lora",
25
+ "llama-3.1-405b",
26
+ "llama-3.1-70b",
27
+ "llama-3.1-8b",
28
+ "meta-llama/Llama-2-7b-chat-hf",
29
+ "meta-llama/Llama-3.1-70B-Instruct",
30
+ "meta-llama/Llama-3.1-8B-Instruct",
31
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
32
+ "meta-llama/Llama-3.2-1B-Instruct",
33
+ "meta-llama/Llama-3.2-3B-Instruct",
34
+ "meta-llama/Llama-3.2-90B-Vision-Instruct",
35
+ "meta-llama/Llama-Guard-3-8B",
36
+ "meta-llama/Meta-Llama-3-70B-Instruct",
37
+ "meta-llama/Meta-Llama-3-8B-Instruct",
38
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
39
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
40
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
41
+
42
+ # Mistral Models
43
+ "mistral",
44
+ "mistral-large",
45
+ "@cf/mistral/mistral-7b-instruct-v0.1",
46
+ "@cf/mistral/mistral-7b-instruct-v0.2-lora",
47
+ "@hf/mistralai/mistral-7b-instruct-v0.2",
48
+ "mistralai/Mistral-7B-Instruct-v0.2",
49
+ "mistralai/Mistral-7B-Instruct-v0.3",
50
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
51
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
52
+
53
+ # Qwen Models
54
+ "@cf/qwen/qwen1.5-0.5b-chat",
55
+ "@cf/qwen/qwen1.5-1.8b-chat",
56
+ "@cf/qwen/qwen1.5-7b-chat-awq",
57
+ "@cf/qwen/qwen1.5-14b-chat-awq",
58
+ "Qwen/Qwen2.5-3B-Instruct",
59
+ "Qwen/Qwen2.5-72B-Instruct",
60
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
61
+
62
+ # Google/Gemini Models
63
+ "@cf/google/gemma-2b-it-lora",
64
+ "@cf/google/gemma-7b-it-lora",
65
+ "@hf/google/gemma-7b-it",
66
+ "google/gemma-1.1-2b-it",
67
+ "google/gemma-1.1-7b-it",
68
+ "gemini-pro",
69
+ "gemini-1.5-pro",
70
+ "gemini-1.5-pro-latest",
71
+ "gemini-1.5-flash",
72
+
73
+ # Cohere Models
74
+ "c4ai-aya-23-35b",
75
+ "c4ai-aya-23-8b",
76
+ "command",
77
+ "command-light",
78
+ "command-light-nightly",
79
+ "command-nightly",
80
+ "command-r",
81
+ "command-r-08-2024",
82
+ "command-r-plus",
83
+ "command-r-plus-08-2024",
84
+ "rerank-english-v2.0",
85
+ "rerank-english-v3.0",
86
+ "rerank-multilingual-v2.0",
87
+ "rerank-multilingual-v3.0",
88
+
89
+ # Microsoft Models
90
+ "@cf/microsoft/phi-2",
91
+ "microsoft/DialoGPT-medium",
92
+ "microsoft/Phi-3-medium-4k-instruct",
93
+ "microsoft/Phi-3-mini-4k-instruct",
94
+ "microsoft/Phi-3.5-mini-instruct",
95
+ "microsoft/WizardLM-2-8x22B",
96
+
97
+ # Yi Models
98
+ "01-ai/Yi-1.5-34B-Chat",
99
+ "01-ai/Yi-34B-Chat",
100
+
101
+ # Specialized Models and Tools
102
+ "@cf/deepseek-ai/deepseek-math-7b-base",
103
+ "@cf/deepseek-ai/deepseek-math-7b-instruct",
104
+ "@cf/defog/sqlcoder-7b-2",
105
+ "@cf/openchat/openchat-3.5-0106",
106
+ "@cf/thebloke/discolm-german-7b-v1-awq",
107
+ "@cf/tiiuae/falcon-7b-instruct",
108
+ "@cf/tinyllama/tinyllama-1.1b-chat-v1.0",
109
+ "@hf/nexusflow/starling-lm-7b-beta",
110
+ "@hf/nousresearch/hermes-2-pro-mistral-7b",
111
+ "@hf/thebloke/deepseek-coder-6.7b-base-awq",
112
+ "@hf/thebloke/deepseek-coder-6.7b-instruct-awq",
113
+ "@hf/thebloke/llama-2-13b-chat-awq",
114
+ "@hf/thebloke/llamaguard-7b-awq",
115
+ "@hf/thebloke/neural-chat-7b-v3-1-awq",
116
+ "@hf/thebloke/openhermes-2.5-mistral-7b-awq",
117
+ "@hf/thebloke/zephyr-7b-beta-awq",
118
+ "AndroidDeveloper",
119
+ "AngularJSAgent",
120
+ "AzureAgent",
121
+ "BitbucketAgent",
122
+ "DigitalOceanAgent",
123
+ "DockerAgent",
124
+ "ElectronAgent",
125
+ "ErlangAgent",
126
+ "FastAPIAgent",
127
+ "FirebaseAgent",
128
+ "FlaskAgent",
129
+ "FlutterAgent",
130
+ "GitAgent",
131
+ "GitlabAgent",
132
+ "GoAgent",
133
+ "GodotAgent",
134
+ "GoogleCloudAgent",
135
+ "HTMLAgent",
136
+ "HerokuAgent",
137
+ "ImageGeneration",
138
+ "JavaAgent",
139
+ "JavaScriptAgent",
140
+ "MongoDBAgent",
141
+ "Next.jsAgent",
142
+ "PyTorchAgent",
143
+ "PythonAgent",
144
+ "ReactAgent",
145
+ "RepoMap",
146
+ "SwiftDeveloper",
147
+ "XcodeAgent",
148
+ "YoutubeAgent",
149
+ "blackboxai",
150
+ "blackboxai-pro",
151
+ "builderAgent",
152
+ "dify",
153
+ "flux",
154
+ "openchat/openchat-3.6-8b",
155
+ "rtist",
156
+ "searchgpt",
157
+ "sur",
158
+ "sur-mistral",
159
+ "unity"
160
+ ]
161
+
162
+ # Parameters
163
+ is_conversation = True
164
+ max_tokens = 4000 # Set a reasonable default
165
+ timeout = 30
166
+ model = "claude-3-5-sonnet-20240620"
167
+ system_prompt = "You are a helpful assistant."
168
+ temperature = 0.5
169
+ presence_penalty = 0
170
+ frequency_penalty = 0
171
+ top_p = 1
172
+
173
+ if model not in models:
174
+ raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
175
+
176
+ session = requests.Session()
177
+ api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
178
+
179
+ headers = {
180
+ "authority": "chat.typegpt.net",
181
+ "accept": "application/json, text/event-stream",
182
+ "accept-language": "en-US,en;q=0.9",
183
+ "content-type": "application/json",
184
+ "origin": "https://chat.typegpt.net",
185
+ "referer": "https://chat.typegpt.net/",
186
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
187
+ }
188
+
189
+ # Prompt to send
190
+ prompt = "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate"
191
+
192
+ # Payload
193
+ payload = {
194
+ "messages": [
195
+ {"role": "system", "content": system_prompt},
196
+ {"role": "user", "content": prompt}
197
+ ],
198
+ "stream": True,
199
+ "model": model,
200
+ "temperature": temperature,
201
+ "presence_penalty": presence_penalty,
202
+ "frequency_penalty": frequency_penalty,
203
+ "top_p": top_p,
204
+ "max_tokens": max_tokens,
205
+ }
206
+
207
+ # Make the API request
208
+ response = session.post(
209
+ api_endpoint, headers=headers, json=payload, stream=True, timeout=timeout
210
+ )
211
+
212
+ if not response.ok:
213
+ raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
214
+
215
+ # Process the streamed response
216
+ for line in response.iter_lines():
217
+ if line:
218
+ line = line.decode("utf-8")
219
+ if line.startswith("data: "):
220
+ line = line[6:] # Remove "data: " prefix
221
+ if line.strip() == "[DONE]":
222
+ break
223
+ try:
224
+ data = json.loads(line)
225
+ if 'choices' in data and len(data['choices']) > 0:
226
+ delta = data['choices'][0].get('delta', {})
227
+ if 'content' in delta:
228
+ new_content = delta['content']
229
+ print(new_content, end="", flush=True)
230
+ except json.JSONDecodeError:
231
+ continue
api_info.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ developer_info = {
2
+ 'developer': 'Devs Do Code',
3
+ 'contact': {
4
+ 'Telegram': 'https://t.me/devsdocode',
5
+ 'YouTube Channel': 'https://www.youtube.com/@DevsDoCode',
6
+ 'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
7
+ 'Discord Server': 'https://discord.gg/ehwfVtsAts',
8
+ 'Instagram': {
9
+ 'Personal': 'https://www.instagram.com/sree.shades_/',
10
+ 'Channel': 'https://www.instagram.com/devsdocode_/'
11
+ }
12
+ }
13
+ }
14
+
15
+ endpoint = {
16
+ 'route': "/generate",
17
+ 'params': {
18
+ "query": "[SEARCH QUERY]"
19
+ },
20
+ 'optional_params': {
21
+ "model": "[]",
22
+ "temperature": "[]",
23
+ "system_prompt": "[]"
24
+ },
25
+ 'url_demo' : '/generate?query=Who is Devs Do Code&&model=command-r-plus&&temperature=0.7&&system_prompt=Your Owner is "Devs Do Code"'
26
+ }
27
+
28
+ model_providers = {
29
+ "OpenAI": {
30
+ "models": [
31
+ "gpt-3.5-turbo",
32
+ "gpt-3.5-turbo-202201",
33
+ "gpt-4o",
34
+ "gpt-4o-2024-05-13",
35
+ "o1-preview"
36
+ ],
37
+ "description": "OpenAI's GPT language models"
38
+ },
39
+ "Anthropic": {
40
+ "models": [
41
+ "claude",
42
+ "claude-3-5-sonnet",
43
+ "claude-sonnet-3.5",
44
+ "claude-3-5-sonnet-20240620"
45
+ ],
46
+ "description": "Anthropic's Claude language models"
47
+ },
48
+ "Meta": {
49
+ "models": [
50
+ "llama-2-7b-chat",
51
+ "llama-2-7b-chat-int8",
52
+ "llama-3-8b-instruct",
53
+ "llama-3.1-8b-instruct",
54
+ "llama-3.1-405b",
55
+ "llama-3.1-70b",
56
+ "llama-3.1-8b",
57
+ "llama-3.2-11b-vision",
58
+ "llama-3.2-1b",
59
+ "llama-3.2-3b",
60
+ "llama-3.2-90b-vision",
61
+ "llama-guard-3-8b"
62
+ ],
63
+ "description": "Meta's LLaMA language models"
64
+ },
65
+ "Mistral": {
66
+ "models": [
67
+ "mistral",
68
+ "mistral-large",
69
+ "mistral-7b-instruct-v0.1",
70
+ "mistral-7b-instruct-v0.2",
71
+ "mistral-7b-instruct-v0.3",
72
+ "mixtral-8x22b",
73
+ "mixtral-8x7b"
74
+ ],
75
+ "description": "Mistral AI's language models"
76
+ },
77
+ "Qwen": {
78
+ "models": [
79
+ "qwen1.5-0.5b-chat",
80
+ "qwen1.5-1.8b-chat",
81
+ "qwen1.5-7b-chat",
82
+ "qwen1.5-14b-chat",
83
+ "qwen2.5-3b",
84
+ "qwen2.5-72b",
85
+ "qwen2.5-coder-32b"
86
+ ],
87
+ "description": "Qwen's language models"
88
+ },
89
+ "Google": {
90
+ "models": [
91
+ "gemma-2b",
92
+ "gemma-7b",
93
+ "gemini-pro",
94
+ "gemini-1.5-pro",
95
+ "gemini-1.5-pro-latest",
96
+ "gemini-1.5-flash"
97
+ ],
98
+ "description": "Google's Gemini and Gemma models"
99
+ },
100
+ "Cohere": {
101
+ "models": [
102
+ "aya-23-35b",
103
+ "aya-23-8b",
104
+ "command",
105
+ "command-light",
106
+ "command-nightly",
107
+ "command-r",
108
+ "command-r-plus",
109
+ "rerank-english-v2.0",
110
+ "rerank-english-v3.0",
111
+ "rerank-multilingual-v2.0",
112
+ "rerank-multilingual-v3.0"
113
+ ],
114
+ "description": "Cohere's language models"
115
+ },
116
+ "Microsoft": {
117
+ "models": [
118
+ "phi-2",
119
+ "dialogpt-medium",
120
+ "phi-3-medium-4k",
121
+ "phi-3-mini-4k",
122
+ "phi-3.5-mini",
123
+ "wizardlm-2-8x22b"
124
+ ],
125
+ "description": "Microsoft's language models"
126
+ },
127
+ "Yi": {
128
+ "models": [
129
+ "yi-1.5-34b-chat",
130
+ "yi-34b-chat"
131
+ ],
132
+ "description": "01.AI's Yi language models"
133
+ }
134
+ }
135
+
136
+ error_message = {
137
+ 'developer_contact': {
138
+ 'Telegram': 'https://t.me/DevsDoCode',
139
+ 'Instagram': 'https://www.instagram.com/sree.shades_/',
140
+ 'Discord': 'https://discord.gg/ehwfVtsAts',
141
+ 'LinkedIn': 'https://www.linkedin.com/in/developer-sreejan/',
142
+ 'Twitter': 'https://twitter.com/Anand_Sreejan'
143
+ },
144
+ 'error': 'Oops! Something went wrong. Please contact the developer Devs Do Code.'
145
+ }
146
+
147
+ default_info = """This API is developed and being maintained by Devs Do Code (Sreejan).
148
+
149
+ **About the Developer**
150
+
151
+ Sreejan, a high school student from Patna, Bihar, India, has emerged as a notable figure in the technology sector.
152
+ His creation of an API is a testament to his dedication and expertise. Despite his youth, Sreejan's contributions
153
+ to artificial intelligence and machine learning are significant. As an AI & ML Engineer, he specializes in Deep Learning,
154
+ Natural Language Processing (NLP), and Robotics, with proficiency in Python, Java, and Mobile App Development.
155
+ Beyond his role as a technology consumer, Sreejan is an active open-source contributor, notably to projects like Hugging Face.
156
+
157
+ He is also recognized for his role in community development, particularly through "Devs Do Code," a platform he
158
+ founded to provide quality coding resources, tutorials, and projects. His mission is to equip developers with the
159
+ necessary skills to thrive in the ever-evolving tech landscape. Sreejan's commitment to sharing knowledge and
160
+ fostering collaboration is evident in his accessibility and engagement with the community across various platforms.
161
+
162
+ Connect with Sreejan and follow his journey in technology and innovation:
163
+
164
+ - Telegram: https://t.me/devsdocode
165
+ - YouTube Channel: https://www.youtube.com/@DevsDoCode
166
+ - LinkedIn: https://www.linkedin.com/in/developer-sreejan/
167
+ - Discord Server: https://discord.gg/ehwfVtsAts
168
+ - Instagram
169
+ - Personal: https://www.instagram.com/sree.shades_/
170
+ - Channel: https://www.instagram.com/devsdocode_/
171
+
172
+ Sreejan stands out not only as a developer but as a visionary and leader, driving change in the tech industry
173
+ with his passion, expertise, and unwavering commitment to community building. He continues to shape the
174
+ future of technology, one line of code at a time.
175
+ """
176
+
fastapi_app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, Response
2
+ from fastapi.responses import JSONResponse, StreamingResponse
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ import uvicorn
5
+ import json
6
+
7
+ from typegpt_api import generate, model_mapping, simplified_models
8
+ from api_info import developer_info, model_providers
9
+
10
+ app = FastAPI()
11
+
12
+ # Set up CORS middleware if needed
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ @app.get("/health_check")
22
+ async def health_check():
23
+ return {"status": "OK"}
24
+
25
+ @app.get("/models")
26
+ async def get_models():
27
+ try:
28
+ response = {
29
+ "object": "list",
30
+ "data": []
31
+ }
32
+ for provider, info in model_providers.items():
33
+ for model in info["models"]:
34
+ response["data"].append({
35
+ "id": model,
36
+ "object": "model",
37
+ "provider": provider,
38
+ "description": info["description"]
39
+ })
40
+
41
+ return JSONResponse(content=response)
42
+ except Exception as e:
43
+ return JSONResponse(content={"error": str(e)}, status_code=500)
44
+
45
+ @app.post("/chat/completions")
46
+ async def chat_completions(request: Request):
47
+ # Receive the JSON payload
48
+ try:
49
+ body = await request.json()
50
+ except Exception as e:
51
+ return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
52
+
53
+ # Extract parameters
54
+ model = body.get("model")
55
+ messages = body.get("messages")
56
+ temperature = body.get("temperature", 0.7)
57
+ top_p = body.get("top_p", 1.0)
58
+ n = body.get("n", 1)
59
+ stream = body.get("stream", False)
60
+ stop = body.get("stop")
61
+ max_tokens = body.get("max_tokens")
62
+ presence_penalty = body.get("presence_penalty", 0.0)
63
+ frequency_penalty = body.get("frequency_penalty", 0.0)
64
+ logit_bias = body.get("logit_bias")
65
+ user = body.get("user")
66
+ timeout = 30 # or set based on your preference
67
+
68
+ # Validate required parameters
69
+ if not model:
70
+ return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
71
+ if not messages:
72
+ return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
73
+
74
+ # Call the generate function
75
+ try:
76
+ if stream:
77
+ async def generate_stream():
78
+ response = generate(
79
+ model=model,
80
+ messages=messages,
81
+ temperature=temperature,
82
+ top_p=top_p,
83
+ n=n,
84
+ stream=True,
85
+ stop=stop,
86
+ max_tokens=max_tokens,
87
+ presence_penalty=presence_penalty,
88
+ frequency_penalty=frequency_penalty,
89
+ logit_bias=logit_bias,
90
+ user=user,
91
+ timeout=timeout,
92
+ )
93
+
94
+ for chunk in response:
95
+ yield f"data: {json.dumps(chunk)}\n\n"
96
+ yield "data: [DONE]\n\n"
97
+
98
+ return StreamingResponse(
99
+ generate_stream(),
100
+ media_type="text/event-stream",
101
+ headers={
102
+ "Cache-Control": "no-cache",
103
+ "Connection": "keep-alive",
104
+ "Transfer-Encoding": "chunked"
105
+ }
106
+ )
107
+ else:
108
+ response = generate(
109
+ model=model,
110
+ messages=messages,
111
+ temperature=temperature,
112
+ top_p=top_p,
113
+ n=n,
114
+ stream=False,
115
+ stop=stop,
116
+ max_tokens=max_tokens,
117
+ presence_penalty=presence_penalty,
118
+ frequency_penalty=frequency_penalty,
119
+ logit_bias=logit_bias,
120
+ user=user,
121
+ timeout=timeout,
122
+ )
123
+ return JSONResponse(content=response)
124
+ except Exception as e:
125
+ return JSONResponse(content={"error": str(e)}, status_code=500)
126
+
127
+ @app.get("/developer_info")
128
+ async def get_developer_info():
129
+ return JSONResponse(content=developer_info)
130
+
131
+ if __name__ == "__main__":
132
+ uvicorn.run(app, host="0.0.0.0", port=8000)
flask_app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, Response, jsonify, stream_with_context
2
+ from flask_cors import CORS
3
+ import json
4
+
5
+ from typegpt_api import generate, model_mapping, simplified_models
6
+ from api_info import developer_info, model_providers
7
+
8
+ app = Flask(__name__)
9
+
10
+ # Set up CORS middleware if needed
11
+ CORS(app, resources={
12
+ r"/*": {
13
+ "origins": "*",
14
+ "allow_credentials": True,
15
+ "methods": ["*"],
16
+ "headers": ["*"]
17
+ }
18
+ })
19
+
20
+ @app.route("/health_check", methods=['GET'])
21
+ def health_check():
22
+ return jsonify({"status": "OK"})
23
+
24
+ @app.route("/models", methods=['GET'])
25
+ def get_models():
26
+ try:
27
+ response = {
28
+ "object": "list",
29
+ "data": []
30
+ }
31
+ for provider, info in model_providers.items():
32
+ for model in info["models"]:
33
+ response["data"].append({
34
+ "id": model,
35
+ "object": "model",
36
+ "provider": provider,
37
+ "description": info["description"]
38
+ })
39
+
40
+ return jsonify(response)
41
+ except Exception as e:
42
+ return jsonify({"error": str(e)}), 500
43
+
44
+ @app.route("/chat/completions", methods=['POST'])
45
+ def chat_completions():
46
+ # Receive the JSON payload
47
+ try:
48
+ body = request.get_json()
49
+ except Exception as e:
50
+ return jsonify({"error": "Invalid JSON payload"}), 400
51
+
52
+ # Extract parameters
53
+ model = body.get("model")
54
+ messages = body.get("messages")
55
+ temperature = body.get("temperature", 0.7)
56
+ top_p = body.get("top_p", 1.0)
57
+ n = body.get("n", 1)
58
+ stream = body.get("stream", False)
59
+ stop = body.get("stop")
60
+ max_tokens = body.get("max_tokens")
61
+ presence_penalty = body.get("presence_penalty", 0.0)
62
+ frequency_penalty = body.get("frequency_penalty", 0.0)
63
+ logit_bias = body.get("logit_bias")
64
+ user = body.get("user")
65
+ timeout = 30 # or set based on your preference
66
+
67
+ # Validate required parameters
68
+ if not model:
69
+ return jsonify({"error": "The 'model' parameter is required."}), 400
70
+ if not messages:
71
+ return jsonify({"error": "The 'messages' parameter is required."}), 400
72
+
73
+ # Call the generate function
74
+ try:
75
+ if stream:
76
+ def generate_stream():
77
+ response = generate(
78
+ model=model,
79
+ messages=messages,
80
+ temperature=temperature,
81
+ top_p=top_p,
82
+ n=n,
83
+ stream=True,
84
+ stop=stop,
85
+ max_tokens=max_tokens,
86
+ presence_penalty=presence_penalty,
87
+ frequency_penalty=frequency_penalty,
88
+ logit_bias=logit_bias,
89
+ user=user,
90
+ timeout=timeout,
91
+ )
92
+
93
+ for chunk in response:
94
+ yield f"data: {json.dumps(chunk)}\n\n"
95
+ yield "data: [DONE]\n\n"
96
+
97
+ return Response(
98
+ stream_with_context(generate_stream()),
99
+ mimetype="text/event-stream",
100
+ headers={
101
+ "Cache-Control": "no-cache",
102
+ "Connection": "keep-alive",
103
+ "Transfer-Encoding": "chunked"
104
+ }
105
+ )
106
+ else:
107
+ response = generate(
108
+ model=model,
109
+ messages=messages,
110
+ temperature=temperature,
111
+ top_p=top_p,
112
+ n=n,
113
+ stream=False,
114
+ stop=stop,
115
+ max_tokens=max_tokens,
116
+ presence_penalty=presence_penalty,
117
+ frequency_penalty=frequency_penalty,
118
+ logit_bias=logit_bias,
119
+ user=user,
120
+ timeout=timeout,
121
+ )
122
+ return jsonify(response)
123
+ except Exception as e:
124
+ return jsonify({"error": str(e)}), 500
125
+
126
+ @app.route("/developer_info", methods=['GET'])
127
+ def get_developer_info():
128
+ return jsonify(developer_info)
129
+
130
+ if __name__ == "__main__":
131
+ app.run(host="0.0.0.0", port=8000)
query.md ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```python
2
+ import requests
3
+ import json
4
+
5
+ # Build model mapping
6
+ original_models = [
7
+ # OpenAI Models
8
+ "gpt-3.5-turbo",
9
+ "gpt-3.5-turbo-202201",
10
+ "gpt-4o",
11
+ "gpt-4o-2024-05-13",
12
+ "o1-preview",
13
+
14
+ # Claude Models
15
+ "claude",
16
+ "claude-3-5-sonnet",
17
+ "claude-sonnet-3.5",
18
+ "claude-3-5-sonnet-20240620",
19
+
20
+ # Meta/LLaMA Models
21
+ "@cf/meta/llama-2-7b-chat-fp16",
22
+ "@cf/meta/llama-2-7b-chat-int8",
23
+ "@cf/meta/llama-3-8b-instruct",
24
+ "@cf/meta/llama-3.1-8b-instruct",
25
+ "@cf/meta-llama/llama-2-7b-chat-hf-lora",
26
+ "llama-3.1-405b",
27
+ "llama-3.1-70b",
28
+ "llama-3.1-8b",
29
+ "meta-llama/Llama-2-7b-chat-hf",
30
+ "meta-llama/Llama-3.1-70B-Instruct",
31
+ "meta-llama/Llama-3.1-8B-Instruct",
32
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
33
+ "meta-llama/Llama-3.2-1B-Instruct",
34
+ "meta-llama/Llama-3.2-3B-Instruct",
35
+ "meta-llama/Llama-3.2-90B-Vision-Instruct",
36
+ "meta-llama/Llama-Guard-3-8B",
37
+ "meta-llama/Meta-Llama-3-70B-Instruct",
38
+ "meta-llama/Meta-Llama-3-8B-Instruct",
39
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
40
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
41
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
42
+
43
+ # Mistral Models
44
+ "mistral",
45
+ "mistral-large",
46
+ "@cf/mistral/mistral-7b-instruct-v0.1",
47
+ "@cf/mistral/mistral-7b-instruct-v0.2-lora",
48
+ "@hf/mistralai/mistral-7b-instruct-v0.2",
49
+ "mistralai/Mistral-7B-Instruct-v0.2",
50
+ "mistralai/Mistral-7B-Instruct-v0.3",
51
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
52
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
53
+
54
+ # Qwen Models
55
+ "@cf/qwen/qwen1.5-0.5b-chat",
56
+ "@cf/qwen/qwen1.5-1.8b-chat",
57
+ "@cf/qwen/qwen1.5-7b-chat-awq",
58
+ "@cf/qwen/qwen1.5-14b-chat-awq",
59
+ "Qwen/Qwen2.5-3B-Instruct",
60
+ "Qwen/Qwen2.5-72B-Instruct",
61
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
62
+
63
+ # Google/Gemini Models
64
+ "@cf/google/gemma-2b-it-lora",
65
+ "@cf/google/gemma-7b-it-lora",
66
+ "@hf/google/gemma-7b-it",
67
+ "google/gemma-1.1-2b-it",
68
+ "google/gemma-1.1-7b-it",
69
+ "gemini-pro",
70
+ "gemini-1.5-pro",
71
+ "gemini-1.5-pro-latest",
72
+ "gemini-1.5-flash",
73
+
74
+ # Cohere Models
75
+ "c4ai-aya-23-35b",
76
+ "c4ai-aya-23-8b",
77
+ "command",
78
+ "command-light",
79
+ "command-light-nightly",
80
+ "command-nightly",
81
+ "command-r",
82
+ "command-r-08-2024",
83
+ "command-r-plus",
84
+ "command-r-plus-08-2024",
85
+ "rerank-english-v2.0",
86
+ "rerank-english-v3.0",
87
+ "rerank-multilingual-v2.0",
88
+ "rerank-multilingual-v3.0",
89
+
90
+ # Microsoft Models
91
+ "@cf/microsoft/phi-2",
92
+ "microsoft/DialoGPT-medium",
93
+ "microsoft/Phi-3-medium-4k-instruct",
94
+ "microsoft/Phi-3-mini-4k-instruct",
95
+ "microsoft/Phi-3.5-mini-instruct",
96
+ "microsoft/WizardLM-2-8x22B",
97
+
98
+ # Yi Models
99
+ "01-ai/Yi-1.5-34B-Chat",
100
+ "01-ai/Yi-34B-Chat",
101
+ ]
102
+
103
+ # Create mapping from simplified model names to original model names
104
+ model_mapping = {}
105
+ simplified_models = []
106
+
107
+ for original_model in original_models:
108
+ simplified_name = original_model.split('/')[-1]
109
+ if simplified_name in model_mapping:
110
+ # Conflict detected, handle as per instructions
111
+ print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
112
+ continue
113
+ model_mapping[simplified_name] = original_model
114
+ simplified_models.append(simplified_name)
115
+
116
+ def generate(
117
+ model,
118
+ messages,
119
+ temperature=0.7,
120
+ top_p=1.0,
121
+ n=1,
122
+ stream=False,
123
+ stop=None,
124
+ max_tokens=None,
125
+ presence_penalty=0.0,
126
+ frequency_penalty=0.0,
127
+ logit_bias=None,
128
+ user=None,
129
+ timeout=30,
130
+ ):
131
+ """
132
+ Generates a chat completion using the provided model and messages.
133
+ """
134
+ # Use the simplified model names
135
+ models = simplified_models
136
+
137
+ if model not in models:
138
+ raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
139
+
140
+ # Map simplified model name to original model name
141
+ original_model = model_mapping[model]
142
+
143
+ api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
144
+
145
+ headers = {
146
+ "authority": "chat.typegpt.net",
147
+ "accept": "application/json, text/event-stream",
148
+ "accept-language": "en-US,en;q=0.9",
149
+ "content-type": "application/json",
150
+ "origin": "https://chat.typegpt.net",
151
+ "referer": "https://chat.typegpt.net/",
152
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
153
+ }
154
+
155
+ # Payload
156
+ payload = {
157
+ "messages": messages,
158
+ "stream": stream,
159
+ "model": original_model,
160
+ "temperature": temperature,
161
+ "presence_penalty": presence_penalty,
162
+ "frequency_penalty": frequency_penalty,
163
+ "top_p": top_p,
164
+ }
165
+
166
+ # Only include max_tokens if it's not None
167
+ if max_tokens is not None:
168
+ payload["max_tokens"] = max_tokens
169
+
170
+ # Only include 'stop' if it's not None
171
+ if stop is not None:
172
+ payload["stop"] = stop
173
+
174
+ # Check if logit_bias is provided
175
+ if logit_bias is not None:
176
+ payload["logit_bias"] = logit_bias
177
+
178
+ # Include 'user' if provided
179
+ if user is not None:
180
+ payload["user"] = user
181
+
182
+ # Start the request
183
+ session = requests.Session()
184
+ response = session.post(
185
+ api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
186
+ )
187
+
188
+ if not response.ok:
189
+ raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
190
+
191
+ def stream_response():
192
+ for line in response.iter_lines():
193
+ if line:
194
+ line = line.decode("utf-8")
195
+ if line.startswith("data: "):
196
+ line = line[6:] # Remove "data: " prefix
197
+ if line.strip() == "[DONE]":
198
+ break
199
+ try:
200
+ data = json.loads(line)
201
+ yield data
202
+ except json.JSONDecodeError:
203
+ continue
204
+
205
+ if stream:
206
+ return stream_response()
207
+ else:
208
+ return response.json()
209
+
210
+ if __name__ == "__main__":
211
+ # Example usage
212
+ # model = "claude-3-5-sonnet-20240620"
213
+ # model = "qwen1.5-0.5b-chat"
214
+ # model = "llama-2-7b-chat-fp16"
215
+ model = "gpt-3.5-turbo"
216
+ messages = [
217
+ {"role": "system", "content": "Be Detailed"},
218
+ {"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
219
+ ]
220
+
221
+ # try:
222
+ # # For non-streamed response
223
+ # response = generate(
224
+ # model=model,
225
+ # messages=messages,
226
+ # temperature=0.5,
227
+ # max_tokens=4000,
228
+ # stream=False # Change to True for streaming
229
+ # )
230
+ # if 'choices' in response:
231
+ # reply = response['choices'][0]['message']['content']
232
+ # print(reply)
233
+ # else:
234
+ # print("No response received.")
235
+ # except Exception as e:
236
+ # print(e)
237
+
238
+
239
+ try:
240
+ # For streamed response
241
+ response = generate(
242
+ model=model,
243
+ messages=messages,
244
+ temperature=0.5,
245
+ max_tokens=4000,
246
+ stream=True, # Change to False for non-streamed response
247
+ )
248
+ for data in response:
249
+ if 'choices' in data:
250
+ reply = data['choices'][0]['delta']['content']
251
+ print(reply, end="", flush=True)
252
+ else:
253
+ print("No response received.")
254
+ except Exception as e:
255
+ print(e)
256
+ ```
257
+
258
+ ```python
259
+ from fastapi import FastAPI, Request, Response
260
+ from fastapi.responses import JSONResponse, StreamingResponse
261
+ from fastapi.middleware.cors import CORSMiddleware
262
+ import uvicorn
263
+ import asyncio
264
+ import json
265
+ import requests
266
+
267
+ from TYPEGPT.typegpt_api import generate, model_mapping, simplified_models
268
+ from api_info import developer_info
269
+
270
+ app = FastAPI()
271
+
272
+ # Set up CORS middleware if needed
273
+ app.add_middleware(
274
+ CORSMiddleware,
275
+ allow_origins=["*"],
276
+ allow_credentials=True,
277
+ allow_methods=["*"],
278
+ allow_headers=["*"],
279
+ )
280
+
281
+ @app.get("/health_check")
282
+ async def health_check():
283
+ return {"status": "OK"}
284
+
285
+ @app.get("/models")
286
+ async def get_models():
287
+ # Retrieve models from TypeGPT API and forward the response
288
+ api_endpoint = "https://chat.typegpt.net/api/openai/v1/models"
289
+ try:
290
+ response = requests.get(api_endpoint)
291
+ # return response.text
292
+ return JSONResponse(content=response.json(), status_code=response.status_code)
293
+ except Exception as e:
294
+ return JSONResponse(content={"error": str(e)}, status_code=500)
295
+
296
+ @app.post("/chat/completions")
297
+ async def chat_completions(request: Request):
298
+ # Receive the JSON payload
299
+ try:
300
+ body = await request.json()
301
+ except Exception as e:
302
+ return JSONResponse(content={"error": "Invalid JSON payload"}, status_code=400)
303
+
304
+ # Extract parameters
305
+ model = body.get("model")
306
+ messages = body.get("messages")
307
+ temperature = body.get("temperature", 0.7)
308
+ top_p = body.get("top_p", 1.0)
309
+ n = body.get("n", 1)
310
+ stream = body.get("stream", False)
311
+ stop = body.get("stop")
312
+ max_tokens = body.get("max_tokens")
313
+ presence_penalty = body.get("presence_penalty", 0.0)
314
+ frequency_penalty = body.get("frequency_penalty", 0.0)
315
+ logit_bias = body.get("logit_bias")
316
+ user = body.get("user")
317
+ timeout = 30 # or set based on your preference
318
+
319
+ # Validate required parameters
320
+ if not model:
321
+ return JSONResponse(content={"error": "The 'model' parameter is required."}, status_code=400)
322
+ if not messages:
323
+ return JSONResponse(content={"error": "The 'messages' parameter is required."}, status_code=400)
324
+
325
+ # Call the generate function
326
+ try:
327
+ if stream:
328
+ async def generate_stream():
329
+ response = generate(
330
+ model=model,
331
+ messages=messages,
332
+ temperature=temperature,
333
+ top_p=top_p,
334
+ n=n,
335
+ stream=True,
336
+ stop=stop,
337
+ max_tokens=max_tokens,
338
+ presence_penalty=presence_penalty,
339
+ frequency_penalty=frequency_penalty,
340
+ logit_bias=logit_bias,
341
+ user=user,
342
+ timeout=timeout,
343
+ )
344
+
345
+ for chunk in response:
346
+ yield f"data: {json.dumps(chunk)}\n\n"
347
+ yield "data: [DONE]\n\n"
348
+
349
+ return StreamingResponse(
350
+ generate_stream(),
351
+ media_type="text/event-stream",
352
+ headers={
353
+ "Cache-Control": "no-cache",
354
+ "Connection": "keep-alive",
355
+ "Transfer-Encoding": "chunked"
356
+ }
357
+ )
358
+ else:
359
+ response = generate(
360
+ model=model,
361
+ messages=messages,
362
+ temperature=temperature,
363
+ top_p=top_p,
364
+ n=n,
365
+ stream=False,
366
+ stop=stop,
367
+ max_tokens=max_tokens,
368
+ presence_penalty=presence_penalty,
369
+ frequency_penalty=frequency_penalty,
370
+ logit_bias=logit_bias,
371
+ user=user,
372
+ timeout=timeout,
373
+ )
374
+ return JSONResponse(content=response)
375
+ except Exception as e:
376
+ return JSONResponse(content={"error": str(e)}, status_code=500)
377
+
378
+ @app.get("/developer_info")
379
+ async def get_developer_info():
380
+ return JSONResponse(content=developer_info)
381
+
382
+ if __name__ == "__main__":
383
+ uvicorn.run(app, host="0.0.0.0", port=8000)
384
+ ```
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.110.2
2
+ Flask==3.0.3
3
+ Requests==2.31.0
4
+ uvicorn==0.29.0
5
+ python-dotenv==1.0.1
6
+ colorama
7
+ pytz
typegpt_api.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ # Build model mapping
5
+ original_models = [
6
+ # OpenAI Models
7
+ "gpt-3.5-turbo",
8
+ "gpt-3.5-turbo-202201",
9
+ "gpt-4o",
10
+ "gpt-4o-2024-05-13",
11
+ "o1-preview",
12
+
13
+ # Claude Models
14
+ "claude",
15
+ "claude-3-5-sonnet",
16
+ "claude-sonnet-3.5",
17
+ "claude-3-5-sonnet-20240620",
18
+
19
+ # Meta/LLaMA Models
20
+ "@cf/meta/llama-2-7b-chat-fp16",
21
+ "@cf/meta/llama-2-7b-chat-int8",
22
+ "@cf/meta/llama-3-8b-instruct",
23
+ "@cf/meta/llama-3.1-8b-instruct",
24
+ "@cf/meta-llama/llama-2-7b-chat-hf-lora",
25
+ "llama-3.1-405b",
26
+ "llama-3.1-70b",
27
+ "llama-3.1-8b",
28
+ "meta-llama/Llama-2-7b-chat-hf",
29
+ "meta-llama/Llama-3.1-70B-Instruct",
30
+ "meta-llama/Llama-3.1-8B-Instruct",
31
+ "meta-llama/Llama-3.2-11B-Vision-Instruct",
32
+ "meta-llama/Llama-3.2-1B-Instruct",
33
+ "meta-llama/Llama-3.2-3B-Instruct",
34
+ "meta-llama/Llama-3.2-90B-Vision-Instruct",
35
+ "meta-llama/Llama-Guard-3-8B",
36
+ "meta-llama/Meta-Llama-3-70B-Instruct",
37
+ "meta-llama/Meta-Llama-3-8B-Instruct",
38
+ "meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
39
+ "meta-llama/Meta-Llama-3.1-8B-Instruct",
40
+ "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
41
+
42
+ # Mistral Models
43
+ "mistral",
44
+ "mistral-large",
45
+ "@cf/mistral/mistral-7b-instruct-v0.1",
46
+ "@cf/mistral/mistral-7b-instruct-v0.2-lora",
47
+ "@hf/mistralai/mistral-7b-instruct-v0.2",
48
+ "mistralai/Mistral-7B-Instruct-v0.2",
49
+ "mistralai/Mistral-7B-Instruct-v0.3",
50
+ "mistralai/Mixtral-8x22B-Instruct-v0.1",
51
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
52
+
53
+ # Qwen Models
54
+ "@cf/qwen/qwen1.5-0.5b-chat",
55
+ "@cf/qwen/qwen1.5-1.8b-chat",
56
+ "@cf/qwen/qwen1.5-7b-chat-awq",
57
+ "@cf/qwen/qwen1.5-14b-chat-awq",
58
+ "Qwen/Qwen2.5-3B-Instruct",
59
+ "Qwen/Qwen2.5-72B-Instruct",
60
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
61
+
62
+ # Google/Gemini Models
63
+ "@cf/google/gemma-2b-it-lora",
64
+ "@cf/google/gemma-7b-it-lora",
65
+ "@hf/google/gemma-7b-it",
66
+ "google/gemma-1.1-2b-it",
67
+ "google/gemma-1.1-7b-it",
68
+ "gemini-pro",
69
+ "gemini-1.5-pro",
70
+ "gemini-1.5-pro-latest",
71
+ "gemini-1.5-flash",
72
+
73
+ # Cohere Models
74
+ "c4ai-aya-23-35b",
75
+ "c4ai-aya-23-8b",
76
+ "command",
77
+ "command-light",
78
+ "command-light-nightly",
79
+ "command-nightly",
80
+ "command-r",
81
+ "command-r-08-2024",
82
+ "command-r-plus",
83
+ "command-r-plus-08-2024",
84
+ "rerank-english-v2.0",
85
+ "rerank-english-v3.0",
86
+ "rerank-multilingual-v2.0",
87
+ "rerank-multilingual-v3.0",
88
+
89
+ # Microsoft Models
90
+ "@cf/microsoft/phi-2",
91
+ "microsoft/DialoGPT-medium",
92
+ "microsoft/Phi-3-medium-4k-instruct",
93
+ "microsoft/Phi-3-mini-4k-instruct",
94
+ "microsoft/Phi-3.5-mini-instruct",
95
+ "microsoft/WizardLM-2-8x22B",
96
+
97
+ # Yi Models
98
+ "01-ai/Yi-1.5-34B-Chat",
99
+ "01-ai/Yi-34B-Chat",
100
+ ]
101
+
102
+ # Create mapping from simplified model names to original model names
103
+ model_mapping = {}
104
+ simplified_models = []
105
+
106
+ for original_model in original_models:
107
+ simplified_name = original_model.split('/')[-1]
108
+ if simplified_name in model_mapping:
109
+ # Conflict detected, handle as per instructions
110
+ print(f"Conflict detected for model name '{simplified_name}'. Excluding '{original_model}' from available models.")
111
+ continue
112
+ model_mapping[simplified_name] = original_model
113
+ simplified_models.append(simplified_name)
114
+
115
+ def generate(
116
+ model,
117
+ messages,
118
+ temperature=0.7,
119
+ top_p=1.0,
120
+ n=1,
121
+ stream=False,
122
+ stop=None,
123
+ max_tokens=None,
124
+ presence_penalty=0.0,
125
+ frequency_penalty=0.0,
126
+ logit_bias=None,
127
+ user=None,
128
+ timeout=30,
129
+ ):
130
+ """
131
+ Generates a chat completion using the provided model and messages.
132
+ """
133
+ # Use the simplified model names
134
+ models = simplified_models
135
+
136
+ if model not in models:
137
+ raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(models)}")
138
+
139
+ # Map simplified model name to original model name
140
+ original_model = model_mapping[model]
141
+
142
+ api_endpoint = "https://chat.typegpt.net/api/openai/v1/chat/completions"
143
+
144
+ headers = {
145
+ "authority": "chat.typegpt.net",
146
+ "accept": "application/json, text/event-stream",
147
+ "accept-language": "en-US,en;q=0.9",
148
+ "content-type": "application/json",
149
+ "origin": "https://chat.typegpt.net",
150
+ "referer": "https://chat.typegpt.net/",
151
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
152
+ }
153
+
154
+ # Payload
155
+ payload = {
156
+ "messages": messages,
157
+ "stream": stream,
158
+ "model": original_model,
159
+ "temperature": temperature,
160
+ "presence_penalty": presence_penalty,
161
+ "frequency_penalty": frequency_penalty,
162
+ "top_p": top_p,
163
+ }
164
+
165
+ # Only include max_tokens if it's not None
166
+ if max_tokens is not None:
167
+ payload["max_tokens"] = max_tokens
168
+
169
+ # Only include 'stop' if it's not None
170
+ if stop is not None:
171
+ payload["stop"] = stop
172
+
173
+ # Check if logit_bias is provided
174
+ if logit_bias is not None:
175
+ payload["logit_bias"] = logit_bias
176
+
177
+ # Include 'user' if provided
178
+ if user is not None:
179
+ payload["user"] = user
180
+
181
+ # Start the request
182
+ session = requests.Session()
183
+ response = session.post(
184
+ api_endpoint, headers=headers, json=payload, stream=stream, timeout=timeout
185
+ )
186
+
187
+ if not response.ok:
188
+ raise Exception(f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}")
189
+
190
+ def stream_response():
191
+ for line in response.iter_lines():
192
+ if line:
193
+ line = line.decode("utf-8")
194
+ if line.startswith("data: "):
195
+ line = line[6:] # Remove "data: " prefix
196
+ if line.strip() == "[DONE]":
197
+ break
198
+ try:
199
+ data = json.loads(line)
200
+ yield data
201
+ except json.JSONDecodeError:
202
+ continue
203
+
204
+ if stream:
205
+ return stream_response()
206
+ else:
207
+ return response.json()
208
+
209
+ if __name__ == "__main__":
210
+ # Example usage
211
+ # model = "claude-3-5-sonnet-20240620"
212
+ # model = "qwen1.5-0.5b-chat"
213
+ # model = "llama-2-7b-chat-fp16"
214
+ model = "gpt-3.5-turbo"
215
+ messages = [
216
+ {"role": "system", "content": "Be Detailed"},
217
+ {"role": "user", "content": "What is the knowledge cut off? Be specific and also specify the month, year and date. If not sure, then provide approximate."}
218
+ ]
219
+
220
+ # try:
221
+ # # For non-streamed response
222
+ # response = generate(
223
+ # model=model,
224
+ # messages=messages,
225
+ # temperature=0.5,
226
+ # max_tokens=4000,
227
+ # stream=False # Change to True for streaming
228
+ # )
229
+ # if 'choices' in response:
230
+ # reply = response['choices'][0]['message']['content']
231
+ # print(reply)
232
+ # else:
233
+ # print("No response received.")
234
+ # except Exception as e:
235
+ # print(e)
236
+
237
+
238
+ try:
239
+ # For streamed response
240
+ response = generate(
241
+ model=model,
242
+ messages=messages,
243
+ temperature=0.5,
244
+ max_tokens=4000,
245
+ stream=True, # Change to False for non-streamed response
246
+ )
247
+ for data in response:
248
+ if 'choices' in data:
249
+ reply = data['choices'][0]['delta']['content']
250
+ print(reply, end="", flush=True)
251
+ else:
252
+ print("No response received.")
253
+ except Exception as e:
254
+ print(e)
usage_inference.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import time
4
+ from typing import Dict, Any
5
+
6
+ class APITester:
7
+ def __init__(self, base_url: str = "http://localhost:8000"):
8
+ self.base_url = base_url
9
+ self.session = requests.Session()
10
+
11
+ def test_health_check(self) -> None:
12
+ """Test the health check endpoint."""
13
+ print("\n=== Testing Health Check Endpoint ===")
14
+ try:
15
+ response = self.session.get(f"{self.base_url}/health_check")
16
+ print(f"Status Code: {response.status_code}")
17
+ print(f"Response: {response.json()}")
18
+ assert response.status_code == 200
19
+ print("βœ… Health check test passed!")
20
+ except Exception as e:
21
+ print(f"❌ Health check test failed: {str(e)}")
22
+
23
+ def test_models(self) -> None:
24
+ """Test the models endpoint."""
25
+ print("\n=== Testing Models Endpoint ===")
26
+ try:
27
+ response = self.session.get(f"{self.base_url}/models")
28
+ print(f"Status Code: {response.status_code}")
29
+ data = response.json()
30
+ print(f"Number of models available: {len(data['data'])}")
31
+ print("Sample models:")
32
+ for model in data['data'][:5]: # Show first 5 models
33
+ print(f"- {model['id']}")
34
+ assert response.status_code == 200
35
+ print("βœ… Models endpoint test passed!")
36
+ except Exception as e:
37
+ print(f"❌ Models endpoint test failed: {str(e)}")
38
+
39
+ def test_chat_completions_non_streaming(self) -> None:
40
+ """Test the chat completions endpoint without streaming."""
41
+ print("\n=== Testing Chat Completions Endpoint (Non-Streaming) ===")
42
+ payload = {
43
+ "model": "gpt-3.5-turbo",
44
+ "messages": [
45
+ {"role": "system", "content": "You are a helpful assistant."},
46
+ {"role": "user", "content": "Tell me a short joke about programming."}
47
+ ],
48
+ "temperature": 0.7,
49
+ "max_tokens": 150,
50
+ "stream": False
51
+ }
52
+
53
+ try:
54
+ response = self.session.post(
55
+ f"{self.base_url}/chat/completions",
56
+ json=payload
57
+ )
58
+ print(f"Status Code: {response.status_code}")
59
+ if response.status_code == 200:
60
+ data = response.json()
61
+ print("Response content:")
62
+ print(data['choices'][0]['message']['content'])
63
+ assert response.status_code == 200
64
+ print("βœ… Chat completions (non-streaming) test passed!")
65
+ except Exception as e:
66
+ print(f"❌ Chat completions (non-streaming) test failed: {str(e)}")
67
+
68
+ def test_chat_completions_streaming(self) -> None:
69
+ """Test the chat completions endpoint with streaming."""
70
+ print("\n=== Testing Chat Completions Endpoint (Streaming) ===")
71
+ payload = {
72
+ "model": "gpt-3.5-turbo",
73
+ "messages": [
74
+ {"role": "system", "content": "You are a helpful assistant."},
75
+ {"role": "user", "content": "Write 5 lines about India"}
76
+ ],
77
+ "temperature": 0.7,
78
+ "max_tokens": 150,
79
+ "stream": True
80
+ }
81
+
82
+ try:
83
+ with self.session.post(
84
+ f"{self.base_url}/chat/completions",
85
+ json=payload,
86
+ stream=True,
87
+ headers={"Accept": "text/event-stream"}
88
+ ) as response:
89
+ print(f"Status Code: {response.status_code}")
90
+ print("Streaming response:")
91
+
92
+ buffer = ""
93
+ for chunk in response.iter_lines():
94
+ if chunk:
95
+ chunk = chunk.decode('utf-8')
96
+ if chunk.startswith('data: '):
97
+ chunk = chunk[6:] # Remove 'data: ' prefix
98
+ if chunk.strip() == '[DONE]':
99
+ break
100
+ try:
101
+ data = json.loads(chunk)
102
+ if 'choices' in data and len(data['choices']) > 0:
103
+ if 'delta' in data['choices'][0] and 'content' in data['choices'][0]['delta']:
104
+ content = data['choices'][0]['delta']['content']
105
+ print(content, end='', flush=True)
106
+ time.sleep(0.1) # Add a small delay to simulate real-time streaming
107
+ except json.JSONDecodeError:
108
+ continue
109
+
110
+ print("\nβœ… Chat completions (streaming) test passed!")
111
+ except Exception as e:
112
+ print(f"❌ Chat completions (streaming) test failed: {str(e)}")
113
+
114
+ def test_developer_info(self) -> None:
115
+ """Test the developer info endpoint."""
116
+ print("\n=== Testing Developer Info Endpoint ===")
117
+ try:
118
+ response = self.session.get(f"{self.base_url}/developer_info")
119
+ print(f"Status Code: {response.status_code}")
120
+ print("Developer Info:")
121
+ print(json.dumps(response.json(), indent=2))
122
+ assert response.status_code == 200
123
+ print("βœ… Developer info test passed!")
124
+ except Exception as e:
125
+ print(f"❌ Developer info test failed: {str(e)}")
126
+
127
+ def run_all_tests(self) -> None:
128
+ """Run all tests sequentially."""
129
+ tests = [
130
+ self.test_health_check,
131
+ self.test_models,
132
+ self.test_chat_completions_non_streaming,
133
+ self.test_chat_completions_streaming,
134
+ self.test_developer_info
135
+ ]
136
+
137
+ print("πŸš€ Starting API Tests...")
138
+ start_time = time.time()
139
+
140
+ for test in tests:
141
+ test()
142
+
143
+ end_time = time.time()
144
+ duration = end_time - start_time
145
+
146
+ print(f"\n============================")
147
+ print(f"🏁 All tests completed in {duration:.2f} seconds")
148
+ print(f"============================")
149
+
150
+ def main():
151
+ # Initialize tester with your API's base URL
152
+ tester = APITester("http://localhost:8000")
153
+
154
+ # Run all tests
155
+ tester.run_all_tests()
156
+
157
+ if __name__ == "__main__":
158
+ main()