Commit
·
5535b95
1
Parent(s):
1e042d7
install deps in /tmp
Browse files- Dockerfile +4 -2
- src/index.mts +1 -1
Dockerfile
CHANGED
@@ -48,10 +48,12 @@ RUN pnpm --version
|
|
48 |
|
49 |
RUN pnpm install
|
50 |
|
|
|
|
|
51 |
# ok! let's try to compile llama-node
|
52 |
RUN git clone https://github.com/Atome-FE/llama-node.git
|
53 |
|
54 |
-
WORKDIR
|
55 |
|
56 |
RUN git submodule update --init --recursive
|
57 |
|
@@ -62,7 +64,7 @@ RUN pnpm build:llama-cpp
|
|
62 |
|
63 |
RUN pnpm:build
|
64 |
|
65 |
-
WORKDIR
|
66 |
|
67 |
RUN pnpm build:cuda
|
68 |
|
|
|
48 |
|
49 |
RUN pnpm install
|
50 |
|
51 |
+
WORKDIR /tmp
|
52 |
+
|
53 |
# ok! let's try to compile llama-node
|
54 |
RUN git clone https://github.com/Atome-FE/llama-node.git
|
55 |
|
56 |
+
WORKDIR /tmp/llama-node
|
57 |
|
58 |
RUN git submodule update --init --recursive
|
59 |
|
|
|
64 |
|
65 |
RUN pnpm:build
|
66 |
|
67 |
+
WORKDIR /tmp/llama-node/packages/llama-cpp
|
68 |
|
69 |
RUN pnpm build:cuda
|
70 |
|
src/index.mts
CHANGED
@@ -93,7 +93,7 @@ app.get("/app", async (req, res) => {
|
|
93 |
|
94 |
// naive implementation: we say we are out of capacity
|
95 |
if (pending.queue.length >= maxParallelRequests) {
|
96 |
-
res.write('
|
97 |
res.end()
|
98 |
return
|
99 |
}
|
|
|
93 |
|
94 |
// naive implementation: we say we are out of capacity
|
95 |
if (pending.queue.length >= maxParallelRequests) {
|
96 |
+
res.write('Sorry, max nb of parallel requests reached. A new slot should be available in < 5 min.')
|
97 |
res.end()
|
98 |
return
|
99 |
}
|