Spaces:

saifeddinemk
/

mlai

Sleeping

App Files Files Community

saifeddinemk commited on 6 days ago

Commit

9b9a132

•

1 Parent(s): 6c70ef6

Fixed app v2

Browse files

Files changed (1) hide show

app.py +62 -51

app.py CHANGED Viewed

@@ -1,68 +1,79 @@
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
-from llama_cpp import Llama
-from functools import lru_cache
-import asyncio
 import uvicorn
 # Initialize FastAPI app
 app = FastAPI()
-# Lazy load the Llama model with float16 precision
-@lru_cache(maxsize=1)
-def load_model():
-    try:
-        return Llama.from_pretrained(
-            repo_id="QuantFactory/SecurityLLM-GGUF",
-            filename="SecurityLLM.Q8_0.gguf",
-            torch_dtype="float16"  # Specify FP16 precision
-        )
-    except Exception as e:
-        raise RuntimeError(f"Failed to load model: {e}")
-# Define request model for log data
-class LogRequest(BaseModel):
     log_data: str
-# Define response model
-class AnalysisResponse(BaseModel):
     analysis: str
 # Define the route for security log analysis
-@app.post("/analyze_security_logs", response_model=AnalysisResponse)
-async def analyze_security_logs(request: LogRequest):
-    llm = load_model()
     try:
-        # Security-focused prompt
-        prompt = (
-            "You are an advanced cybersecurity analysis assistant. Carefully analyze the following network log data for any indicators of malicious or suspicious activity. "
-            "Specifically, look for patterns or unusual events that might suggest unauthorized access, data exfiltration, suspicious IP addresses, frequent access attempts, "
-            "or other anomalies. Provide a detailed analysis that includes:\n\n"
-            "1. A list of any suspicious IP addresses with explanations of why they are flagged as such.\n"
-            "2. Any patterns or sequences in the logs that could indicate an ongoing attack or probing activity.\n"
-            "3. Identified unauthorized access attempts, with details on the methods or vulnerabilities being exploited, if detectable.\n"
-            "4. Recommendations on immediate actions or mitigations the system administrator should take to address any identified threats.\n"
-            "5. An assessment of the overall security posture based on the log data, including any potential weaknesses or areas for improvement.\n\n"
-            "Log Data:\n"
-            f"{request.log_data}\n\n"
-            "Please provide a comprehensive response addressing all points in detail."
-        )
-        # Generate response with controlled max tokens
-        response = await asyncio.to_thread(
-            llm.create_chat_completion,
-            messages=[
-                {
-                    "role": "user",
-                    "content": prompt
-                }
-            ],
-            max_tokens=1024  # Adjust to limit the response length
-        )
-        # Extract and return the analysis text
-        analysis_text = response["choices"][0]["message"]["content"]
-        return AnalysisResponse(analysis=analysis_text)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
 import uvicorn
 # Initialize FastAPI app
 app = FastAPI()
+# Configure and load the quantized model
+model_id = 'model_result'
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=True,
+)
+# Load tokenizer and model with 4-bit quantization settings
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+tokenizer.pad_token = tokenizer.eos_token
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    quantization_config=bnb_config,
+    device_map="auto",
+)
+model.eval()
+# Define request and response models
+class SecurityLogRequest(BaseModel):
     log_data: str
+class SecurityAnalysisResponse(BaseModel):
     analysis: str
+# Inference function
+def generate_response(input_text: str) -> str:
+    streamer = TextStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True)
+    messages = [
+        {"role": "system", "content": "You are an information security AI assistant specialized in analyzing security logs. Identify potential threats, suspicious IP addresses, unauthorized access attempts, and recommend actions based on the logs."},
+        {"role": "user", "content": f"Please analyze the following security logs and provide insights on any potential malicious activity:\n{input_text}"}
+    ]
+    input_ids = tokenizer.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt",
+    ).to(model.device)
+    # Generate response with the model
+    outputs = model.generate(
+        input_ids,
+        streamer=streamer,
+        max_new_tokens=512,  # Limit max tokens for faster response
+        num_beams=1,
+        do_sample=True,
+        temperature=0.1,
+        top_p=0.95,
+        top_k=10
+    )
+    # Extract and return generated text
+    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return response_text
 # Define the route for security log analysis
+@app.post("/analyze_security_logs", response_model=SecurityAnalysisResponse)
+async def analyze_security_logs(request: SecurityLogRequest):
     try:
+        # Run inference
+        analysis_text = generate_response(request.log_data)
+        return SecurityAnalysisResponse(analysis=analysis_text)
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))