# gpt_tools.py import os from transformers import pipeline # Load small and fast open-access model chat_model = pipeline( "text-generation", model="tiiuae/falcon-rw-1b", # ✅ Fast & lightweight model for Hugging Face Spaces token=os.getenv("HUGGINGFACEHUB_API_TOKEN") ) def handle_chat(message, history=[]): prompt = f"[INST] {message} [/INST]" response = chat_model(prompt, max_new_tokens=300)[0]['generated_text'] reply = response.replace(prompt, "").strip() history.append((message, reply)) return history def handle_code(code): prompt = f"[INST] Fix, optimize, and explain this code:\n{code} [/INST]" response = chat_model(prompt, max_new_tokens=300)[0]['generated_text'] return response.replace(prompt, "").strip()