Spaces:
Sleeping
Sleeping
saifeddinemk
commited on
Commit
•
efadfea
1
Parent(s):
9e9c793
Fixed app v2
Browse files
app.py
CHANGED
@@ -1,18 +1,24 @@
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
from llama_cpp import Llama
|
|
|
|
|
|
|
4 |
|
5 |
# Initialize FastAPI app
|
6 |
app = FastAPI()
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
16 |
|
17 |
# Define request model for log data
|
18 |
class LogRequest(BaseModel):
|
@@ -25,6 +31,7 @@ class AnalysisResponse(BaseModel):
|
|
25 |
# Define the route for security log analysis
|
26 |
@app.post("/analyze_security_logs", response_model=AnalysisResponse)
|
27 |
async def analyze_security_logs(request: LogRequest):
|
|
|
28 |
try:
|
29 |
# Security-focused prompt
|
30 |
prompt = (
|
@@ -33,21 +40,25 @@ async def analyze_security_logs(request: LogRequest):
|
|
33 |
"Provide details on potential threats, IPs involved, and suggest actions if any threats are detected.\n\n"
|
34 |
f"{request.log_data}"
|
35 |
)
|
36 |
-
|
37 |
-
# Generate response
|
38 |
-
response =
|
|
|
39 |
messages=[
|
40 |
{
|
41 |
"role": "user",
|
42 |
"content": prompt
|
43 |
}
|
44 |
-
]
|
|
|
45 |
)
|
46 |
-
|
47 |
# Extract and return the analysis text
|
48 |
analysis_text = response["choices"][0]["message"]["content"]
|
49 |
return AnalysisResponse(analysis=analysis_text)
|
50 |
except Exception as e:
|
51 |
raise HTTPException(status_code=500, detail=str(e))
|
52 |
|
53 |
-
#
|
|
|
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
from llama_cpp import Llama
|
4 |
+
from functools import lru_cache
|
5 |
+
import asyncio
|
6 |
+
import uvicorn
|
7 |
|
8 |
# Initialize FastAPI app
|
9 |
app = FastAPI()
|
10 |
|
11 |
+
# Lazy load the Llama model with float16 precision
|
12 |
+
@lru_cache(maxsize=1)
|
13 |
+
def load_model():
|
14 |
+
try:
|
15 |
+
return Llama.from_pretrained(
|
16 |
+
repo_id="QuantFactory/SecurityLLM-GGUF",
|
17 |
+
filename="SecurityLLM.Q8_0.gguf",
|
18 |
+
torch_dtype="float16" # Specify FP16 precision
|
19 |
+
)
|
20 |
+
except Exception as e:
|
21 |
+
raise RuntimeError(f"Failed to load model: {e}")
|
22 |
|
23 |
# Define request model for log data
|
24 |
class LogRequest(BaseModel):
|
|
|
31 |
# Define the route for security log analysis
|
32 |
@app.post("/analyze_security_logs", response_model=AnalysisResponse)
|
33 |
async def analyze_security_logs(request: LogRequest):
|
34 |
+
llm = load_model()
|
35 |
try:
|
36 |
# Security-focused prompt
|
37 |
prompt = (
|
|
|
40 |
"Provide details on potential threats, IPs involved, and suggest actions if any threats are detected.\n\n"
|
41 |
f"{request.log_data}"
|
42 |
)
|
43 |
+
|
44 |
+
# Generate response with controlled max tokens
|
45 |
+
response = await asyncio.to_thread(
|
46 |
+
llm.create_chat_completion,
|
47 |
messages=[
|
48 |
{
|
49 |
"role": "user",
|
50 |
"content": prompt
|
51 |
}
|
52 |
+
],
|
53 |
+
max_tokens=512 # Adjust to limit the response length
|
54 |
)
|
55 |
+
|
56 |
# Extract and return the analysis text
|
57 |
analysis_text = response["choices"][0]["message"]["content"]
|
58 |
return AnalysisResponse(analysis=analysis_text)
|
59 |
except Exception as e:
|
60 |
raise HTTPException(status_code=500, detail=str(e))
|
61 |
|
62 |
+
# Run the FastAPI app using uvicorn
|
63 |
+
if __name__ == "__main__":
|
64 |
+
uvicorn.run("app:app", host="0.0.0.0", port=8000, workers=4, reload=True)
|