from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
import uvicorn

# Initialize FastAPI app
app = FastAPI()

# Configure and load the quantized model
model_id = 'model_result'

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load tokenizer and model with 4-bit quantization settings
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
)
model.eval()

# Define request and response models
class SecurityLogRequest(BaseModel):
    log_data: str

class SecurityAnalysisResponse(BaseModel):
    analysis: str

# Inference function
def generate_response(input_text: str) -> str:
    streamer = TextStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True)
    
    messages = [
        {"role": "system", "content": "You are an information security AI assistant specialized in analyzing security logs. Identify potential threats, suspicious IP addresses, unauthorized access attempts, and recommend actions based on the logs."},
        {"role": "user", "content": f"Please analyze the following security logs and provide insights on any potential malicious activity:\n{input_text}"}
    ]
    
    input_ids = tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt",
    ).to(model.device)
    
    # Generate response with the model
    outputs = model.generate(
        input_ids,
        streamer=streamer,
        max_new_tokens=512,  # Limit max tokens for faster response
        num_beams=1,
        do_sample=True,
        temperature=0.1,
        top_p=0.95,
        top_k=10
    )
    
    # Extract and return generated text
    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response_text

# Define the route for security log analysis
@app.post("/analyze_security_logs", response_model=SecurityAnalysisResponse)
async def analyze_security_logs(request: SecurityLogRequest):
    try:
        # Run inference
        analysis_text = generate_response(request.log_data)
        return SecurityAnalysisResponse(analysis=analysis_text)
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))

# Run the FastAPI app using uvicorn
if __name__ == "__main__":
    uvicorn.run("app:app", host="0.0.0.0", port=8000, workers=4, reload=True)