saifeddinemk commited on
Commit
efadfea
1 Parent(s): 9e9c793

Fixed app v2

Browse files
Files changed (1) hide show
  1. app.py +25 -14
app.py CHANGED
@@ -1,18 +1,24 @@
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
 
 
 
4
 
5
  # Initialize FastAPI app
6
  app = FastAPI()
7
 
8
- # Load the Llama model
9
- try:
10
- llm = Llama.from_pretrained(
11
- repo_id="QuantFactory/SecurityLLM-GGUF",
12
- filename="SecurityLLM.Q8_0.gguf",
13
- )
14
- except Exception as e:
15
- raise RuntimeError(f"Failed to load model: {e}")
 
 
 
16
 
17
  # Define request model for log data
18
  class LogRequest(BaseModel):
@@ -25,6 +31,7 @@ class AnalysisResponse(BaseModel):
25
  # Define the route for security log analysis
26
  @app.post("/analyze_security_logs", response_model=AnalysisResponse)
27
  async def analyze_security_logs(request: LogRequest):
 
28
  try:
29
  # Security-focused prompt
30
  prompt = (
@@ -33,21 +40,25 @@ async def analyze_security_logs(request: LogRequest):
33
  "Provide details on potential threats, IPs involved, and suggest actions if any threats are detected.\n\n"
34
  f"{request.log_data}"
35
  )
36
-
37
- # Generate response from the model
38
- response = llm.create_chat_completion(
 
39
  messages=[
40
  {
41
  "role": "user",
42
  "content": prompt
43
  }
44
- ]
 
45
  )
46
-
47
  # Extract and return the analysis text
48
  analysis_text = response["choices"][0]["message"]["content"]
49
  return AnalysisResponse(analysis=analysis_text)
50
  except Exception as e:
51
  raise HTTPException(status_code=500, detail=str(e))
52
 
53
- # To run the app, use: uvicorn app:app --reload
 
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
+ from functools import lru_cache
5
+ import asyncio
6
+ import uvicorn
7
 
8
  # Initialize FastAPI app
9
  app = FastAPI()
10
 
11
+ # Lazy load the Llama model with float16 precision
12
+ @lru_cache(maxsize=1)
13
+ def load_model():
14
+ try:
15
+ return Llama.from_pretrained(
16
+ repo_id="QuantFactory/SecurityLLM-GGUF",
17
+ filename="SecurityLLM.Q8_0.gguf",
18
+ torch_dtype="float16" # Specify FP16 precision
19
+ )
20
+ except Exception as e:
21
+ raise RuntimeError(f"Failed to load model: {e}")
22
 
23
  # Define request model for log data
24
  class LogRequest(BaseModel):
 
31
  # Define the route for security log analysis
32
  @app.post("/analyze_security_logs", response_model=AnalysisResponse)
33
  async def analyze_security_logs(request: LogRequest):
34
+ llm = load_model()
35
  try:
36
  # Security-focused prompt
37
  prompt = (
 
40
  "Provide details on potential threats, IPs involved, and suggest actions if any threats are detected.\n\n"
41
  f"{request.log_data}"
42
  )
43
+
44
+ # Generate response with controlled max tokens
45
+ response = await asyncio.to_thread(
46
+ llm.create_chat_completion,
47
  messages=[
48
  {
49
  "role": "user",
50
  "content": prompt
51
  }
52
+ ],
53
+ max_tokens=512 # Adjust to limit the response length
54
  )
55
+
56
  # Extract and return the analysis text
57
  analysis_text = response["choices"][0]["message"]["content"]
58
  return AnalysisResponse(analysis=analysis_text)
59
  except Exception as e:
60
  raise HTTPException(status_code=500, detail=str(e))
61
 
62
+ # Run the FastAPI app using uvicorn
63
+ if __name__ == "__main__":
64
+ uvicorn.run("app:app", host="0.0.0.0", port=8000, workers=4, reload=True)