zhengr commited on
Commit
d6611b3
·
verified ·
1 Parent(s): a077c9d

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +67 -0
  2. README.md +3 -3
  3. app.py +1 -0
  4. entrypoint.sh +20 -0
  5. ollama-api-demo.ipynb +220 -0
Dockerfile ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Builder stage
2
+ # FROM ubuntu:latest
3
+
4
+ # # Update packages and install curl and gnupg
5
+ # RUN apt-get update && apt-get install -y \
6
+ # curl \
7
+ # gnupg
8
+
9
+ # # Add NVIDIA package repositories
10
+ # RUN curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
11
+ # && echo "deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://nvidia.github.io/libnvidia-container/stable/deb/ $(. /etc/os-release; echo $UBUNTU_CODENAME) main" > /etc/apt/sources.list.d/nvidia-container-toolkit.list
12
+
13
+ # # Install NVIDIA container toolkit (Check for any updated methods or URLs for Ubuntu jammy)
14
+ # RUN apt-get update && apt-get install -y nvidia-container-toolkit || true
15
+
16
+ # # Install application
17
+ # RUN curl https://ollama.ai/install.sh | sh
18
+ # # Below is to fix embedding bug as per
19
+ # # RUN curl -fsSL https://ollama.com/install.sh | sed 's#https://ollama.com/download#https://github.com/jmorganca/ollama/releases/download/v0.1.29#' | sh
20
+
21
+
22
+ # # Create the directory and give appropriate permissions
23
+ # RUN mkdir -p /.ollama && chmod 777 /.ollama
24
+
25
+ # WORKDIR /.ollama
26
+
27
+ # # Copy the entry point script
28
+ # COPY entrypoint.sh /entrypoint.sh
29
+ # RUN chmod +x /entrypoint.sh
30
+
31
+ # # Set the entry point script as the default command
32
+ # ENTRYPOINT ["/entrypoint.sh"]
33
+ # CMD ["ollama", "serve"]
34
+
35
+ # # Set the model as an environment variable (this can be overridden)
36
+ # ENV model=${model}
37
+
38
+ # Expose the server port
39
+ # Use the official Ollama Docker image as the base image
40
+ FROM ollama/ollama:latest
41
+
42
+ RUN apt update && apt install -y python3 && apt install -y python3-pip
43
+
44
+ RUN pip install litellm
45
+ RUN pip install 'litellm[proxy]'
46
+
47
+ # Create a directory for Ollama data
48
+ RUN mkdir -p /.ollama
49
+ RUN chmod -R 777 /.ollama
50
+
51
+ WORKDIR /.ollama
52
+
53
+ # Copy the entry point script
54
+ COPY entrypoint.sh /entrypoint.sh
55
+ RUN chmod +x /entrypoint.sh
56
+
57
+ # Set the entry point script as the default command
58
+ ENTRYPOINT ["/entrypoint.sh"]
59
+
60
+ # Set the model as an environment variable (this can be overridden)
61
+ ENV model=${model}
62
+
63
+ # Expose the port that Ollama runs on
64
+ EXPOSE 7860
65
+
66
+ # Command to start the Ollama server
67
+ CMD ["serve"]
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
  title: Ollama Server
3
- emoji: 🦀
4
- colorFrom: yellow
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
 
1
  ---
2
  title: Ollama Server
3
+ emoji: 🦙⚡︎
4
+ colorFrom: blue
5
+ colorTo: gray
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
app.py ADDED
@@ -0,0 +1 @@
 
 
1
+ print("running server....")
entrypoint.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Starting server
4
+ echo "Starting server"
5
+ ollama serve &
6
+ sleep 1
7
+
8
+ # Splitting the models by comma and pulling each
9
+ IFS=',' read -ra MODELS <<< "$model"
10
+ for m in "${MODELS[@]}"; do
11
+ echo "Pulling $m"
12
+ ollama pull "$m"
13
+ sleep 5
14
+ echo "Running $m"
15
+ ollama run "$m" --keepalive -1s
16
+ # No need to sleep here unless you want to give some delay between each pull for some reason
17
+ done
18
+
19
+ # Keep the script running to prevent the container from exiting
20
+ wait
ollama-api-demo.ipynb ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "attachments": {},
5
+ "cell_type": "markdown",
6
+ "metadata": {},
7
+ "source": [
8
+ "### Dependencies"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "metadata": {},
15
+ "outputs": [],
16
+ "source": [
17
+ "%pip install openai --upgrade"
18
+ ]
19
+ },
20
+ {
21
+ "attachments": {},
22
+ "cell_type": "markdown",
23
+ "metadata": {},
24
+ "source": [
25
+ "## API Response"
26
+ ]
27
+ },
28
+ {
29
+ "cell_type": "code",
30
+ "execution_count": 68,
31
+ "metadata": {},
32
+ "outputs": [
33
+ {
34
+ "name": "stdout",
35
+ "output_type": "stream",
36
+ "text": [
37
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.839736985Z\",\"response\":\"```\",\"done\":false}\n",
38
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.859007873Z\",\"response\":\"\\n\",\"done\":false}\n",
39
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.878431213Z\",\"response\":\"def\",\"done\":false}\n",
40
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.897784641Z\",\"response\":\" add\",\"done\":false}\n",
41
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.91718876Z\",\"response\":\"(\",\"done\":false}\n",
42
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.936866527Z\",\"response\":\"a\",\"done\":false}\n",
43
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.95776024Z\",\"response\":\",\",\"done\":false}\n",
44
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:42:59.979133947Z\",\"response\":\" b\",\"done\":false}\n",
45
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.000494731Z\",\"response\":\"):\",\"done\":false}\n",
46
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.021318934Z\",\"response\":\"\\n\",\"done\":false}\n",
47
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.041779731Z\",\"response\":\" \",\"done\":false}\n",
48
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.062190588Z\",\"response\":\" return\",\"done\":false}\n",
49
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.082505875Z\",\"response\":\" a\",\"done\":false}\n",
50
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.102662719Z\",\"response\":\" +\",\"done\":false}\n",
51
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.122760355Z\",\"response\":\" b\",\"done\":false}\n",
52
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.142907745Z\",\"response\":\"\\n\",\"done\":false}\n",
53
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.163285108Z\",\"response\":\"```\",\"done\":false}\n",
54
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.18370624Z\",\"response\":\"\\n\",\"done\":false}\n",
55
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.203963933Z\",\"response\":\"Example\",\"done\":false}\n",
56
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.224025854Z\",\"response\":\" usage\",\"done\":false}\n",
57
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.244386112Z\",\"response\":\":\",\"done\":false}\n",
58
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.264846213Z\",\"response\":\"\\n\",\"done\":false}\n",
59
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.285448321Z\",\"response\":\"```\",\"done\":false}\n",
60
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.305657169Z\",\"response\":\"\\n\",\"done\":false}\n",
61
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.325782131Z\",\"response\":\"print\",\"done\":false}\n",
62
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.346353022Z\",\"response\":\"(\",\"done\":false}\n",
63
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.366430166Z\",\"response\":\"add\",\"done\":false}\n",
64
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.386881006Z\",\"response\":\"(\",\"done\":false}\n",
65
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.406680624Z\",\"response\":\"3\",\"done\":false}\n",
66
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.426827031Z\",\"response\":\",\",\"done\":false}\n",
67
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.447157302Z\",\"response\":\" \",\"done\":false}\n",
68
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.467234406Z\",\"response\":\"5\",\"done\":false}\n",
69
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.487442969Z\",\"response\":\"))\",\"done\":false}\n",
70
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.50753674Z\",\"response\":\" #\",\"done\":false}\n",
71
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.527739408Z\",\"response\":\" Output\",\"done\":false}\n",
72
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.54789446Z\",\"response\":\":\",\"done\":false}\n",
73
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.568672362Z\",\"response\":\" \",\"done\":false}\n",
74
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.591076535Z\",\"response\":\"8\",\"done\":false}\n",
75
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.614757129Z\",\"response\":\"\\n\",\"done\":false}\n",
76
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.637841098Z\",\"response\":\"```\",\"done\":false}\n",
77
+ "{\"model\":\"codellama\",\"created_at\":\"2024-03-20T17:43:00.660407109Z\",\"response\":\"\",\"done\":true,\"context\":[518,25580,29962,3532,14816,29903,29958,5299,829,14816,29903,6778,13,13,6113,5132,775,304,788,29871,29906,3694,518,29914,25580,29962,13,28956,13,1753,788,29898,29874,29892,289,1125,13,1678,736,263,718,289,13,28956,13,14023,8744,29901,13,28956,13,2158,29898,1202,29898,29941,29892,29871,29945,876,396,10604,29901,29871,29947,13,28956],\"total_duration\":10037918982,\"load_duration\":9097178085,\"prompt_eval_count\":28,\"prompt_eval_duration\":119308000,\"eval_count\":41,\"eval_duration\":820449000}\n"
78
+ ]
79
+ }
80
+ ],
81
+ "source": [
82
+ "!curl https://thewise-ollama-server.hf.space/api/generate -d '''{\"model\": \"codellama\",\"prompt\":\"Write Python code to add 2 numbers\"}'''"
83
+ ]
84
+ },
85
+ {
86
+ "attachments": {},
87
+ "cell_type": "markdown",
88
+ "metadata": {},
89
+ "source": [
90
+ "## Langchain Demo"
91
+ ]
92
+ },
93
+ {
94
+ "cell_type": "code",
95
+ "execution_count": 69,
96
+ "metadata": {},
97
+ "outputs": [],
98
+ "source": [
99
+ "from langchain.llms import Ollama\n",
100
+ "from langchain.callbacks.manager import CallbackManager\n",
101
+ "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
102
+ ]
103
+ },
104
+ {
105
+ "attachments": {},
106
+ "cell_type": "markdown",
107
+ "metadata": {},
108
+ "source": [
109
+ "##### CODELLAMA"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": 70,
115
+ "metadata": {},
116
+ "outputs": [
117
+ {
118
+ "name": "stdout",
119
+ "output_type": "stream",
120
+ "text": [
121
+ "```\n",
122
+ "def add(a, b):\n",
123
+ " return a + b\n",
124
+ "```\n",
125
+ "This function takes two arguments `a` and `b`, adds them together, and returns the result. You can call this function by passing in two numbers, like this:\n",
126
+ "```\n",
127
+ "print(add(3, 5)) # prints 8\n",
128
+ "```"
129
+ ]
130
+ },
131
+ {
132
+ "data": {
133
+ "text/plain": [
134
+ "'```\\ndef add(a, b):\\n return a + b\\n```\\nThis function takes two arguments `a` and `b`, adds them together, and returns the result. You can call this function by passing in two numbers, like this:\\n```\\nprint(add(3, 5)) # prints 8\\n```'"
135
+ ]
136
+ },
137
+ "execution_count": 70,
138
+ "metadata": {},
139
+ "output_type": "execute_result"
140
+ }
141
+ ],
142
+ "source": [
143
+ "llm = Ollama(\n",
144
+ " model=\"codellama\",\n",
145
+ " base_url=\"https://thewise-ollama-server.hf.space\",\n",
146
+ " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n",
147
+ "\n",
148
+ "llm('Write Python code to add 2 numbers')"
149
+ ]
150
+ },
151
+ {
152
+ "attachments": {},
153
+ "cell_type": "markdown",
154
+ "metadata": {},
155
+ "source": [
156
+ "##### LLAMA2"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": 71,
162
+ "metadata": {},
163
+ "outputs": [
164
+ {
165
+ "name": "stdout",
166
+ "output_type": "stream",
167
+ "text": [
168
+ "```\n",
169
+ "# Adding two numbers\n",
170
+ "a = 5\n",
171
+ "b = 3\n",
172
+ "result = a + b\n",
173
+ "print(result) # Output: 8\n",
174
+ "```"
175
+ ]
176
+ },
177
+ {
178
+ "data": {
179
+ "text/plain": [
180
+ "'```\\n# Adding two numbers\\na = 5\\nb = 3\\nresult = a + b\\nprint(result) # Output: 8\\n```'"
181
+ ]
182
+ },
183
+ "execution_count": 71,
184
+ "metadata": {},
185
+ "output_type": "execute_result"
186
+ }
187
+ ],
188
+ "source": [
189
+ "llm = Ollama(\n",
190
+ " model=\"llama2\",\n",
191
+ " base_url=\"https://thewise-ollama-server.hf.space\",\n",
192
+ " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))\n",
193
+ "\n",
194
+ "llm('Write Python code to add 2 numbers')"
195
+ ]
196
+ }
197
+ ],
198
+ "metadata": {
199
+ "kernelspec": {
200
+ "display_name": "langchain",
201
+ "language": "python",
202
+ "name": "python3"
203
+ },
204
+ "language_info": {
205
+ "codemirror_mode": {
206
+ "name": "ipython",
207
+ "version": 3
208
+ },
209
+ "file_extension": ".py",
210
+ "mimetype": "text/x-python",
211
+ "name": "python",
212
+ "nbconvert_exporter": "python",
213
+ "pygments_lexer": "ipython3",
214
+ "version": "3.11.4"
215
+ },
216
+ "orig_nbformat": 4
217
+ },
218
+ "nbformat": 4,
219
+ "nbformat_minor": 2
220
+ }