asasasText

Runtime error

App Files Files Community

asasasText / app.py

Hjgugugjhuhjggg

Update app.py

cb4a018 verified 3 months ago

raw

history blame

3.59 kB

	from llama_cpp import Llama
	from concurrent.futures import ThreadPoolExecutor, as_completed
	import re
	import uvicorn
	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	import os
	from dotenv import load_dotenv
	import gradio as gr
	import requests
	import asyncio
	from pydantic import BaseModel

	load_dotenv()
	HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")

	global_data = {'models': {}, 'tokens': {k: k + '_token' for k in ['eos', 'pad', 'padding', 'unk', 'bos', 'sep', 'cls', 'mask']}}

	model_configs = [{"repo_id": "Hjgugugjhuhjggg/mergekit-ties-tzamfyy-Q2_K-GGUF", "filename": "mergekit-ties-tzamfyy-q2_k.gguf", "name": "my_model"}]

	models = {}

	def load_model(model_config):
	model_name = model_config['name']
	try:
	model = Llama.from_pretrained(repo_id=model_config['repo_id'], filename=model_config['filename'], use_auth_token=HUGGINGFACE_TOKEN)
	models[model_name] = model
	global_data['models'] = models
	return model
	except Exception as e:
	print(f"Error loading model {model_name}: {e}")
	return None

	for config in model_configs:
	model = load_model(config)
	if model is None:
	exit(1)

	class ChatRequest(BaseModel):
	message: str

	def normalize_input(input_text):
	return input_text.strip()

	def remove_duplicates(text):
	lines = [line.strip() for line in text.split('\n') if line.strip()]
	return '\n'.join(dict.fromkeys(lines))

	def generate_model_response(model, inputs):
	try:
	if model is None:
	return "Model loading failed."
	response = model(inputs, max_tokens=512) #max_tokens adjusted for practicality
	return remove_duplicates(response['choices'][0]['text'])
	except Exception as e:
	print(f"Error generating response: {e}")
	return f"Error: {e}"

	app = FastAPI()
	origins = ["*"]
	app.add_middleware(
	CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=[""], allow_headers=[""]
	)

	@app.post("/generate")
	async def generate(request: ChatRequest):
	inputs = normalize_input(request.message)
	chunk_size = 400 #Reduced chunk size
	chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), chunk_size)]
	overall_response = ""
	for chunk in chunks:
	with ThreadPoolExecutor() as executor:
	futures = [executor.submit(generate_model_response, model, chunk) for model in models.values()]
	responses = [{'model': name, 'response': future.result()} for name, future in zip(models, as_completed(futures))]
	for response in responses:
	overall_response += f"{response['model']}:\n{response['response']}\n\n"
	return {"response": overall_response}

	async def process_message(message, history):
	try:
	port = os.environ.get("PORT", 7860)
	response = requests.post(f"http://localhost:{port}/generate", json={"message": message}).json()
	formatted_response = response["response"]
	history.append((message, formatted_response))
	return history, history
	except requests.exceptions.RequestException as e:
	return history, f"Error: {e}"

	iface = gr.Interface(
	fn=process_message,
	inputs=[gr.Textbox(lines=2, placeholder="Enter your message here..."), gr.State([])],
	outputs=[gr.Chatbot(), gr.State([])],
	title="Multi-Model LLM API", description="Enter a message and get responses from multiple LLMs."
	)

	if __name__ == "__main__":
	port = int(os.environ.get("PORT", 7860))
	uvicorn.run(app, host="0.0.0.0", port=port)
	iface.launch(server_port=7860)