import os import logging from fastapi import FastAPI, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel from langchain_community.llms import Ollama from langchain.callbacks.manager import CallbackManager from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI() MODEL_NAME = 'tinyllama' # MODEL_NAME = 'llama3.2' def get_llm(): callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) return Ollama(model=MODEL_NAME, callback_manager=callback_manager) class Question(BaseModel): text: str @app.get("/") def read_root(): return {"Hello": f"This API uses langchain ollama"} @app.post("/ask") async def ask_question(question: Question): llm = get_llm() response = llm(question.text) # Assuming this triggers the model's response return {"response": response} @app.on_event("startup") async def startup_event(): logger.info(f"Starting up with model: {MODEL_NAME}") @app.on_event("shutdown") async def shutdown_event(): logger.info("Shutting down")