robinroy03 commited on
Commit
1d2e069
·
1 Parent(s): dce77ea
Files changed (4) hide show
  1. Dockerfile +5 -35
  2. app.py +0 -10
  3. entrypoint.sh +0 -21
  4. ollama-api-demo.ipynb +0 -220
Dockerfile CHANGED
@@ -1,39 +1,9 @@
1
- # Builder stage
2
- FROM ubuntu:latest
3
 
4
- # Update packages and install curl and gnupg
5
- RUN apt-get update && apt-get install -y \
6
- curl \
7
- gnupg
8
-
9
- # Add NVIDIA package repositories
10
- RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
11
- && echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/ $(. /etc/os-release; echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/nvidia-container-toolkit.list
12
-
13
- # Install NVIDIA container toolkit (Check for any updated methods or URLs for Ubuntu jammy)
14
- RUN apt-get update && apt-get install -y nvidia-container-toolkit || true
15
-
16
- # Install application
17
- RUN curl https://ollama.ai/install.sh | sh
18
- # Below is to fix embedding bug as per
19
- # RUN curl -fsSL https://ollama.com/install.sh | sed 's#https://ollama.com/download#https://github.com/jmorganca/ollama/releases/download/v0.1.29#' | sh
20
-
21
-
22
- # Create the directory and give appropriate permissions
23
  RUN mkdir -p /.ollama && chmod 777 /.ollama
24
 
25
- WORKDIR /.ollama
26
-
27
- # Copy the entry point script
28
- COPY entrypoint.sh /entrypoint.sh
29
- RUN chmod +x /entrypoint.sh
30
-
31
- # Set the entry point script as the default command
32
- ENTRYPOINT ["/entrypoint.sh"]
33
- CMD ["ollama", "serve"]
34
-
35
- # Set the model as an environment variable (this can be overridden)
36
- ENV model=${model}
37
 
38
- # Expose the server port
39
- EXPOSE 7860
 
1
+ FROM ollama/ollama
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  RUN mkdir -p /.ollama && chmod 777 /.ollama
4
 
5
+ ENV OLLAMA_MAX_LOADED_MODELS=20 --OLLAMA_NUM_PARALLEL=20
6
+ ENV OLLAMA_HOST "0.0.0.0:7860"
 
 
 
 
 
 
 
 
 
 
7
 
8
+ CMD ["serve"]
9
+ EXPOSE 7860
app.py DELETED
@@ -1,10 +0,0 @@
1
- from langchain_community.llms import Ollama
2
- import gradio as gr
3
-
4
- def generate(user_input):
5
- llm = Ollama(model="openchat")
6
- response = llm.invoke(user_input)
7
- return response
8
-
9
- iface = gr.Interface(fn=generate, inputs="text", outputs="text", title="Ollama Chat", description="Chat with Ollama by entering your message.")
10
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
entrypoint.sh DELETED
@@ -1,21 +0,0 @@
1
- #!/bin/bash
2
-
3
- # Starting server
4
- echo "Starting server"
5
- ollama serve &
6
- sleep 1
7
-
8
- # Splitting the models by comma and pulling each
9
- IFS=',' read -ra MODELS <<< "$model"
10
- for m in "${MODELS[@]}"; do
11
- echo "Pulling $m"
12
- ollama pull "$m"
13
- sleep 5
14
- # echo "Running $m"
15
- # ollama run "$m"
16
- # No need to sleep here unless you want to give some delay between each pull for some reason
17
- python3 app.py
18
- done
19
-
20
- # Keep the script running to prevent the container from exiting
21
- wait
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ollama-api-demo.ipynb DELETED
@@ -1,220 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "attachments": {},
5
- "cell_type": "markdown",
6
- "metadata": {},
7
- "source": [
8
- "### Dependencies"
9
- ]
10
- },
11
- {
12
- "cell_type": "code",
13
- "execution_count": null,
14
- "metadata": {},
15
- "outputs": [],
16
- "source": [
17
- "%pip install openai --upgrade"
18
- ]
19
- },
20
- {
21
- "attachments": {},
22
- "cell_type": "markdown",
23
- "metadata": {},
24
- "source": [
25
- "## API Response"
26
- ]
27
- },
28
- {
29
- "cell_type": "code",
30
- "execution_count": 68,
31
- "metadata": {},
32
- "outputs": [
33
- {
34
- "name": "stdout",
35
- "output_type": "stream",
36
- "text": [
37
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.839736985Z\",\"response\":\"```\",\"done\":false}\n",
38
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.859007873Z\",\"response\":\"\\n\",\"done\":false}\n",
39
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.878431213Z\",\"response\":\"def\",\"done\":false}\n",
40
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.897784641Z\",\"response\":\" add\",\"done\":false}\n",
41
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.91718876Z\",\"response\":\"(\",\"done\":false}\n",
42
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.936866527Z\",\"response\":\"a\",\"done\":false}\n",
43
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.95776024Z\",\"response\":\",\",\"done\":false}\n",
44
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.979133947Z\",\"response\":\" b\",\"done\":false}\n",
45
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.000494731Z\",\"response\":\"):\",\"done\":false}\n",
46
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.021318934Z\",\"response\":\"\\n\",\"done\":false}\n",
47
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.041779731Z\",\"response\":\" \",\"done\":false}\n",
48
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.062190588Z\",\"response\":\" return\",\"done\":false}\n",
49
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.082505875Z\",\"response\":\" a\",\"done\":false}\n",
50
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.102662719Z\",\"response\":\" +\",\"done\":false}\n",
51
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.122760355Z\",\"response\":\" b\",\"done\":false}\n",
52
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.142907745Z\",\"response\":\"\\n\",\"done\":false}\n",
53
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.163285108Z\",\"response\":\"```\",\"done\":false}\n",
54
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.18370624Z\",\"response\":\"\\n\",\"done\":false}\n",
55
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.203963933Z\",\"response\":\"Example\",\"done\":false}\n",
56
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.224025854Z\",\"response\":\" usage\",\"done\":false}\n",
57
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.244386112Z\",\"response\":\":\",\"done\":false}\n",
58
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.264846213Z\",\"response\":\"\\n\",\"done\":false}\n",
59
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.285448321Z\",\"response\":\"```\",\"done\":false}\n",
60
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.305657169Z\",\"response\":\"\\n\",\"done\":false}\n",
61
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.325782131Z\",\"response\":\"print\",\"done\":false}\n",
62
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.346353022Z\",\"response\":\"(\",\"done\":false}\n",
63
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.366430166Z\",\"response\":\"add\",\"done\":false}\n",
64
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.386881006Z\",\"response\":\"(\",\"done\":false}\n",
65
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.406680624Z\",\"response\":\"3\",\"done\":false}\n",
66
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.426827031Z\",\"response\":\",\",\"done\":false}\n",
67
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.447157302Z\",\"response\":\" \",\"done\":false}\n",
68
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.467234406Z\",\"response\":\"5\",\"done\":false}\n",
69
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.487442969Z\",\"response\":\"))\",\"done\":false}\n",
70
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.50753674Z\",\"response\":\" #\",\"done\":false}\n",
71
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.527739408Z\",\"response\":\" Output\",\"done\":false}\n",
72
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.54789446Z\",\"response\":\":\",\"done\":false}\n",
73
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.568672362Z\",\"response\":\" \",\"done\":false}\n",
74
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.591076535Z\",\"response\":\"8\",\"done\":false}\n",
75
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.614757129Z\",\"response\":\"\\n\",\"done\":false}\n",
76
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.637841098Z\",\"response\":\"```\",\"done\":false}\n",
77
- "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.660407109Z\",\"response\":\"\",\"done\":true,\"context\":[518,25580,29962,3532,14816,29903,29958,5299,829,14816,29903,6778,13,13,6113,5132,775,304,788,29871,29906,3694,518,29914,25580,29962,13,28956,13,1753,788,29898,29874,29892,289,1125,13,1678,736,263,718,289,13,28956,13,14023,8744,29901,13,28956,13,2158,29898,1202,29898,29941,29892,29871,29945,876,396,10604,29901,29871,29947,13,28956],\"total_duration\":10037918982,\"load_duration\":9097178085,\"prompt_eval_count\":28,\"prompt_eval_duration\":119308000,\"eval_count\":41,\"eval_duration\":820449000}\n"
78
- ]
79
- }
80
- ],
81
- "source": [
82
- "!curl https://thewise-ollama-server.hf.space/api/generate -d '''{\"model\": \"codellama\",\"prompt\":\"Write Python code to add 2 numbers\"}'''"
83
- ]
84
- },
85
- {
86
- "attachments": {},
87
- "cell_type": "markdown",
88
- "metadata": {},
89
- "source": [
90
- "## Langchain Demo"
91
- ]
92
- },
93
- {
94
- "cell_type": "code",
95
- "execution_count": 69,
96
- "metadata": {},
97
- "outputs": [],
98
- "source": [
99
- "from langchain.llms import Ollama\n",
100
- "from langchain.callbacks.manager import CallbackManager\n",
101
- "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
102
- ]
103
- },
104
- {
105
- "attachments": {},
106
- "cell_type": "markdown",
107
- "metadata": {},
108
- "source": [
109
- "##### CODELLAMA"
110
- ]
111
- },
112
- {
113
- "cell_type": "code",
114
- "execution_count": 70,
115
- "metadata": {},
116
- "outputs": [
117
- {
118
- "name": "stdout",
119
- "output_type": "stream",
120
- "text": [
121
- "```\n",
122
- "def add(a, b):\n",
123
- " return a + b\n",
124
- "```\n",
125
- "This function takes two arguments `a` and `b`, adds them together, and returns the result. You can call this function by passing in two numbers, like this:\n",
126
- "```\n",
127
- "print(add(3, 5)) # prints 8\n",
128
- "```"
129
- ]
130
- },
131
- {
132
- "data": {
133
- "text/plain": [
134
- "'```\\ndef add(a, b):\\n return a + b\\n```\\nThis function takes two arguments `a` and `b`, adds them together, and returns the result. You can call this function by passing in two numbers, like this:\\n```\\nprint(add(3, 5)) # prints 8\\n```'"
135
- ]
136
- },
137
- "execution_count": 70,
138
- "metadata": {},
139
- "output_type": "execute_result"
140
- }
141
- ],
142
- "source": [
143
- "llm = Ollama(\n",
144
- " model=\"codellama\",\n",
145
- " base_url=\"https://thewise-ollama-server.hf.space\",\n",
146
- " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n",
147
- "\n",
148
- "llm('Write Python code to add 2 numbers')"
149
- ]
150
- },
151
- {
152
- "attachments": {},
153
- "cell_type": "markdown",
154
- "metadata": {},
155
- "source": [
156
- "##### LLAMA2"
157
- ]
158
- },
159
- {
160
- "cell_type": "code",
161
- "execution_count": 71,
162
- "metadata": {},
163
- "outputs": [
164
- {
165
- "name": "stdout",
166
- "output_type": "stream",
167
- "text": [
168
- "```\n",
169
- "# Adding two numbers\n",
170
- "a = 5\n",
171
- "b = 3\n",
172
- "result = a + b\n",
173
- "print(result) # Output: 8\n",
174
- "```"
175
- ]
176
- },
177
- {
178
- "data": {
179
- "text/plain": [
180
- "'```\\n# Adding two numbers\\na = 5\\nb = 3\\nresult = a + b\\nprint(result) # Output: 8\\n```'"
181
- ]
182
- },
183
- "execution_count": 71,
184
- "metadata": {},
185
- "output_type": "execute_result"
186
- }
187
- ],
188
- "source": [
189
- "llm = Ollama(\n",
190
- " model=\"llama2\",\n",
191
- " base_url=\"https://thewise-ollama-server.hf.space\",\n",
192
- " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n",
193
- "\n",
194
- "llm('Write Python code to add 2 numbers')"
195
- ]
196
- }
197
- ],
198
- "metadata": {
199
- "kernelspec": {
200
- "display_name": "langchain",
201
- "language": "python",
202
- "name": "python3"
203
- },
204
- "language_info": {
205
- "codemirror_mode": {
206
- "name": "ipython",
207
- "version": 3
208
- },
209
- "file_extension": ".py",
210
- "mimetype": "text/x-python",
211
- "name": "python",
212
- "nbconvert_exporter": "python",
213
- "pygments_lexer": "ipython3",
214
- "version": "3.11.4"
215
- },
216
- "orig_nbformat": 4
217
- },
218
- "nbformat": 4,
219
- "nbformat_minor": 2
220
- }