Spaces:

saifeddinemk
/

cv_job

Sleeping

cv_job / app.py

Fixed app v2

d088330 8 days ago

1.3 kB

	from fastapi import FastAPI, Form
	from llama_cpp import Llama
	from typing import List
	import json

	# Initialize FastAPI app
	app = FastAPI()

	# Load the Llama model
	llm = Llama.from_pretrained(
	repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
	filename="smollm2-360m-instruct-q8_0.gguf", # Replace with the actual path to your GGUF file
	)

	# Endpoint to generate response from model based on user input
	@app.post("/ask/")
	async def ask_question(prompt: str = Form(...)):
	# Format the prompt as a chat message
	messages = [
	{"role": "user", "content": prompt}
	]

	# Generate a response using Llama
	response = llm.create_chat_completion(messages=messages)
	response_content = response["choices"][0]["message"]["content"]

	return {"response": response_content}

	# Endpoint to test a simple query (optional)
	@app.get("/test/")
	async def test():
	# Test the model with a simple question
	messages = [{"role": "user", "content": "What is the capital of France?"}]
	response = llm.create_chat_completion(messages=messages)
	response_content = response["choices"][0]["message"]["content"]

	return {"test_response": response_content}


	import uvicorn

	if __name__ == "__main__":
	uvicorn.run("main:app", host="0.0.0.0", port=8000)