import requests import os from dotenv import load_dotenv import time load_dotenv() # Load environment variables from .env file # Ensure the Hugging Face API token is set in your environment api_token = os.getenv("HF_API_TOKEN") # Check if the token is available if api_token is None: print("API token is not set. Please set the 'HF_API_TOKEN' environment variable.") exit(1) # Set the authorization header with the token headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json" } # URL for the Hugging Face model inference model_url = "https://api-inference.huggingface.co/models/Ouiam123/Llama-2-7b-chat-finetune-tourism" # Input text you want to send to the model (match the first code's formatting) input_text = "What should I do if I get lost in Morocco?" formatted_prompt = f"[INST] {input_text} [/INST>" # Request payload payload = { "inputs": formatted_prompt, "parameters": { "max_new_tokens": 500, "temperature": 0.7, "top_p": 0.95, "repetition_penalty": 1.15 } } # Function to make the API request with retry on failure def get_model_response(): try: response = requests.post( model_url, headers=headers, json=payload, timeout=30 ) if response.status_code == 200: return response.json() # Return the response if successful elif response.status_code == 503: # Retry on service unavailable (503) print("Service unavailable, retrying...") time.sleep(20) # Wait before retrying return get_model_response() # Recursive retry else: print(f"Error {response.status_code}: {response.text}") return None # Return None in case of error except requests.exceptions.RequestException as e: print(f"Request error: {e}") return None # Handle request exceptions # Get the model response model_response = get_model_response() # Output the result if model_response: print("Response:", model_response) # Print the model's response else: print("Failed to get a valid response from the model.")