import uvicorn from fastapi import FastAPI, Request from langchain_community.llms.ctransformers import CTransformers MODEL_TYPE = 'mistral' MODEL_BIN_PATH = "mistral-7b-instruct-v0.1.Q3_K_S.gguf" MAX_NEW_TOKEN = 600 TEMPRATURE = 0.01 CONTEXT_LENGTH = 6000 llm = CTransformers( model=MODEL_BIN_PATH, config={ 'max_new_tokens': MAX_NEW_TOKEN, 'temperature': TEMPRATURE, 'context_length': CONTEXT_LENGTH }, model_type=MODEL_TYPE ) app = FastAPI() @app.post("//llm_on_cpu") async def generate_text(request: Request): raw_data = await request.body() # Get the raw body data from the request prompt = raw_data.decode('utf-8') if not prompt: return {'error': 'Prompt is required'}, 400 try: generated_text = llm(prompt) print(generated_text) return {'generated_text': generated_text} else: return {"generated_text": "Error!"}