Spaces:
Running
Running
move some tasks to small model (#479)
Browse files
.env
CHANGED
@@ -61,6 +61,7 @@ MODELS=`[
|
|
61 |
}
|
62 |
]`
|
63 |
OLD_MODELS=`[]`# any removed models, `{ name: string, displayName?: string, id?: string }`
|
|
|
64 |
|
65 |
PUBLIC_ORIGIN=#https://huggingface.co
|
66 |
PUBLIC_SHARE_PREFIX=#https://hf.co/chat
|
|
|
61 |
}
|
62 |
]`
|
63 |
OLD_MODELS=`[]`# any removed models, `{ name: string, displayName?: string, id?: string }`
|
64 |
+
TASK_MODEL='' # name of the model used for tasks such as summarizing title, creating query, etc.
|
65 |
|
66 |
PUBLIC_ORIGIN=#https://huggingface.co
|
67 |
PUBLIC_SHARE_PREFIX=#https://hf.co/chat
|
src/lib/server/generateFromDefaultEndpoint.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import {
|
2 |
import { modelEndpoint } from "./modelEndpoint";
|
3 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
4 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
@@ -16,12 +16,12 @@ export async function generateFromDefaultEndpoint(
|
|
16 |
parameters?: Partial<Parameters>
|
17 |
): Promise<string> {
|
18 |
const newParameters = {
|
19 |
-
...
|
20 |
...parameters,
|
21 |
return_full_text: false,
|
22 |
};
|
23 |
|
24 |
-
const randomEndpoint = modelEndpoint(
|
25 |
|
26 |
const abortController = new AbortController();
|
27 |
|
|
|
1 |
+
import { smallModel } from "$lib/server/models";
|
2 |
import { modelEndpoint } from "./modelEndpoint";
|
3 |
import { trimSuffix } from "$lib/utils/trimSuffix";
|
4 |
import { trimPrefix } from "$lib/utils/trimPrefix";
|
|
|
16 |
parameters?: Partial<Parameters>
|
17 |
): Promise<string> {
|
18 |
const newParameters = {
|
19 |
+
...smallModel.parameters,
|
20 |
...parameters,
|
21 |
return_full_text: false,
|
22 |
};
|
23 |
|
24 |
+
const randomEndpoint = modelEndpoint(smallModel);
|
25 |
|
26 |
const abortController = new AbortController();
|
27 |
|
src/lib/server/models.ts
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import { HF_ACCESS_TOKEN, MODELS, OLD_MODELS } from "$env/static/private";
|
2 |
import type { ChatTemplateInput, WebSearchQueryTemplateInput } from "$lib/types/Template";
|
3 |
import { compileTemplate } from "$lib/utils/template";
|
4 |
import { z } from "zod";
|
@@ -133,6 +133,8 @@ export type Endpoint = z.infer<typeof endpoint>;
|
|
133 |
|
134 |
export const defaultModel = models[0];
|
135 |
|
|
|
|
|
136 |
export const validateModel = (_models: BackendModel[]) => {
|
137 |
// Zod enum function requires 2 parameters
|
138 |
return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]);
|
|
|
1 |
+
import { HF_ACCESS_TOKEN, MODELS, OLD_MODELS, TASK_MODEL } from "$env/static/private";
|
2 |
import type { ChatTemplateInput, WebSearchQueryTemplateInput } from "$lib/types/Template";
|
3 |
import { compileTemplate } from "$lib/utils/template";
|
4 |
import { z } from "zod";
|
|
|
133 |
|
134 |
export const defaultModel = models[0];
|
135 |
|
136 |
+
export const smallModel = models.find((m) => m.name === TASK_MODEL) || defaultModel;
|
137 |
+
|
138 |
export const validateModel = (_models: BackendModel[]) => {
|
139 |
// Zod enum function requires 2 parameters
|
140 |
return z.enum([_models[0].id, ..._models.slice(1).map((m) => m.id)]);
|
src/lib/server/summarize.ts
CHANGED
@@ -7,8 +7,18 @@ export async function summarize(prompt: string) {
|
|
7 |
|
8 |
const summaryPrompt = await buildPrompt({
|
9 |
messages: [{ from: "user", content: userPrompt }],
|
10 |
-
preprompt:
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
model: defaultModel,
|
13 |
});
|
14 |
|
|
|
7 |
|
8 |
const summaryPrompt = await buildPrompt({
|
9 |
messages: [{ from: "user", content: userPrompt }],
|
10 |
+
preprompt: `
|
11 |
+
You are a summarization AI. Your task is to summarize user requests, in a single sentence of less than 5 words. Do not try to answer questions, just summarize the user's request. Start your answer with an emoji relevant to the summary."
|
12 |
+
|
13 |
+
Example: "Who is the president of France ?"
|
14 |
+
Summary: "🇫🇷 President of France request"
|
15 |
+
|
16 |
+
Example: "What are the latest news ?"
|
17 |
+
Summary: "📰 Latest news"
|
18 |
+
|
19 |
+
Example: "Can you debug this python code?"
|
20 |
+
Summary: "🐍 Python code debugging request"
|
21 |
+
`,
|
22 |
model: defaultModel,
|
23 |
});
|
24 |
|
src/routes/conversation/[id]/+page.svelte
CHANGED
@@ -161,8 +161,6 @@
|
|
161 |
}
|
162 |
} else if (update.type === "webSearch") {
|
163 |
webSearchMessages = [...webSearchMessages, update];
|
164 |
-
} else {
|
165 |
-
console.log();
|
166 |
}
|
167 |
} catch (parseError) {
|
168 |
// in case of parsing error we wait for the next message
|
|
|
161 |
}
|
162 |
} else if (update.type === "webSearch") {
|
163 |
webSearchMessages = [...webSearchMessages, update];
|
|
|
|
|
164 |
}
|
165 |
} catch (parseError) {
|
166 |
// in case of parsing error we wait for the next message
|