Spaces:

jerukperas
/

test

Sleeping

jerukperas commited on Aug 26

Commit

01a9f90

•

1 Parent(s): 39f9c9e

wip

Files changed (2) hide show

Dockerfile ADDED Viewed

+FROM python:3.10
+RUN apt-get update && \
+    apt-get install -y \
+    build-essential \
+    libssl-dev \
+    libffi-dev \
+    libbz2-dev \
+    liblzma-dev \
+    zlib1g-dev \
+    libsqlite3-dev \
+    wget \
+    curl \
+    git \
+    cmake \
+    libblis64-4 \
+    libblis64-4-openmp \
+    libblis64-4-pthread \
+    libblis64-4-serial \
+    libblis64-dev \
+    libblis64-openmp-dev \
+    libblis64-pthread-dev \
+    libblis64-serial-dev && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+ENV GOMP_CPU_AFFINITY="0-19"
+ENV BLIS_NUM_THREADS=14
+WORKDIR /app
+RUN pip install --no-cache-dir --upgrade pip
+RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FLAME" \
+    pip install llama-cpp-python
+COPY --chown=user ./app.py /app/app.py
+CMD ["python", "app.py"]

app.py ADDED Viewed

+import gradio as gr
+from llama_cpp import Llama
+print("Downloading model")
+llm = Llama.from_pretrained(
+    repo_id="bartowski/gemma-2-2b-it-abliterated-GGUF",
+    filename="gemma-2-2b-it-abliterated-IQ4_XS.gguf",
+    numa=True,
+    n_ctx=4096,
+)
+def respond(prompt: str):
+    stream = llm.create_chat_completion(stream=True, messages=[{"role": "user", "content": prompt}])
+    response = ""
+    for chunk in stream:
+        if "content" in chunk["choices"][0]["delta"]:
+            response += chunk["choices"][0]["delta"]["content"]
+            yield response
+demo = gr.Interface(fn=respond, inputs=[gr.TextArea("What is the capital of France?")], outputs=[gr.TextArea()])
+demo.launch(server_name="0.0.0.0", server_port=7860)