Hzqhssn commited on
Commit
c6e91d5
·
1 Parent(s): 46736e9

initial push

Browse files
Files changed (4) hide show
  1. .gitignore +2 -0
  2. Dockerfile +40 -0
  3. app.py +36 -0
  4. requirements.txt +27 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Virtual environments
2
+ venv
Dockerfile ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official Python image with a specific version
2
+ FROM python:3.12.5-slim
3
+
4
+
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+ ENV PATH="/home/user/.local/bin:$PATH"
8
+
9
+ # Set the working directory in the container
10
+ WORKDIR /app
11
+
12
+ # Install system dependencies as root
13
+ USER root
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ gcc \
16
+ g++ \
17
+ cmake \
18
+ git \
19
+ && apt-get clean \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+
23
+ # Switch back to the non-root user
24
+ USER user
25
+
26
+ # Copy the requirements file into the container
27
+ COPY --chown=user ./requirements.txt requirements.txt
28
+
29
+ # Install dependencies
30
+ RUN pip install --no-cache-dir -r requirements.txt
31
+
32
+ # Copy the application code into the container
33
+ COPY --chown=user ./llm.py /app/llm.py
34
+ COPY --chown=user ./llama-3.2-1b-instruct-q4_k_m.gguf /app/llama-3.2-1b-instruct-q4_k_m.gguf
35
+
36
+ # Expose the application port
37
+ EXPOSE 7860
38
+
39
+ # Define the command to run the application
40
+ CMD ["uvicorn", "llm:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+
3
+ # Initialize the LLM
4
+ llm = Llama.from_pretrained(
5
+ repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
6
+ filename="llama-3.2-1b-instruct-q4_k_m.gguf"
7
+ )
8
+
9
+ from fastapi import FastAPI, HTTPException
10
+ from pydantic import BaseModel
11
+ from llama_cpp import Llama
12
+
13
+ # Initialize the LLM once when the application starts
14
+ llm = Llama.from_pretrained(
15
+ repo_id="hugging-quants/Llama-3.2-1B-Instruct-Q4_K_M-GGUF",
16
+ filename="llama-3.2-1b-instruct-q4_k_m.gguf"
17
+ )
18
+
19
+ app = FastAPI()
20
+
21
+ class ChatRequest(BaseModel):
22
+ message: str
23
+
24
+ @app.post("/chat")
25
+ async def chat_completion(request: ChatRequest):
26
+ try:
27
+ response = llm.create_chat_completion(
28
+ messages=[
29
+ {"role": "user", "content": request.message}
30
+ ]
31
+ )
32
+ return {
33
+ "response": response['choices'][0]['message']['content']
34
+ }
35
+ except Exception as e:
36
+ raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ annotated-types==0.7.0
2
+ anyio==4.6.2.post1
3
+ certifi==2024.8.30
4
+ charset-normalizer==3.4.0
5
+ click==8.1.7
6
+ diskcache==5.6.3
7
+ fastapi==0.115.5
8
+ filelock==3.16.1
9
+ fsspec==2024.10.0
10
+ h11==0.14.0
11
+ huggingface-hub==0.26.2
12
+ idna==3.10
13
+ Jinja2==3.1.4
14
+ llama_cpp_python==0.3.2
15
+ MarkupSafe==3.0.2
16
+ numpy==2.1.3
17
+ packaging==24.2
18
+ pydantic==2.9.2
19
+ pydantic_core==2.23.4
20
+ PyYAML==6.0.2
21
+ requests==2.32.3
22
+ sniffio==1.3.1
23
+ starlette==0.41.2
24
+ tqdm==4.67.0
25
+ typing_extensions==4.12.2
26
+ urllib3==2.2.3
27
+ uvicorn==0.32.0