File size: 3,217 Bytes
b14d567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
<script lang="ts">
    import Textarea from "@/lib/components/ui/textarea/textarea.svelte";
    import Badge from "@/lib/components/ui/badge/badge.svelte";
    import * as webllm from "@mlc-ai/web-llm";
    import { onMount } from 'svelte';

    let engine: webllm.MLCEngineInterface;
    let isLoading = false;
    let loadingStatus = '';
    let inputText = '';
    let outputText = '';
    let error = '';
    let completionSpeed: number | null = null;
    let selectedModel = "SmolLM-360M-Instruct-q4f16_1-MLC";
    let isGenerating = false;

    async function loadWebLLM() {
        isLoading = true;
        error = '';
        const initProgressCallback = (report: webllm.InitProgressReport) => {
            loadingStatus = report.text;
        };

        const appConfig: webllm.AppConfig = {
            model_list: [{
                model: `https://huggingface.co/mlc-ai/${selectedModel}`,
                model_id: selectedModel,
                model_lib: `${webllm.modelLibURLPrefix}${webllm.modelVersion}/SmolLM-360M-Instruct-q4f16_1-ctx2k_cs1k-webgpu.wasm`,
                overrides: { context_window_size: 2048 },
            }],
        };

        try {
            engine = await webllm.CreateMLCEngine(selectedModel, {
                appConfig,
                initProgressCallback,
                logLevel: "INFO",
            });
        } catch (err) {
            error = `Failed to load the model: ${(err as Error).message}`;
        } finally {
            isLoading = false;
        }
    }

    async function generateCompletion() {
        if (!engine || !inputText.trim() || isGenerating) return;

        isGenerating = true;
        const startTime = performance.now();
        try {
            const reply = await engine.completions.create({
                prompt: inputText,
                echo: false,
                max_tokens: 10,
            });

            outputText = reply.choices[0].text;
            completionSpeed = Math.round(performance.now() - startTime);
            error = '';
        } catch (err) {
            error = `Error: ${(err as Error).message}`;
        } finally {
            isGenerating = false;
        }
    }

    onMount(loadWebLLM);
</script>

<div class="flex my-20 flex-col items-center gap-4 max-w-lg mx-auto">
    <h1 class="text-center font-mono font-bold text-4xl">Mini Playground</h1>
    <p class="text-center font-mono text-sm mb-4">Powered by `{selectedModel}`</p>
    <Textarea 
        bind:value={inputText} 
        on:input={() => {
            if (!isGenerating) {
                generateCompletion();
            }
        }} 
        disabled={isLoading}
        class="w-full" 
        placeholder="Enter your prompt here"
    />
    <pre class="text-lg whitespace-pre-wrap">{outputText}</pre>
    {#if isLoading}
        <p class="text-sm text-slate-600 text-center">{loadingStatus}</p>
    {:else if error}
        <p class="text-sm text-red-600">{error}</p>
    {:else}
        <div class="flex gap-2">
            {#if completionSpeed !== null}
                <Badge>{completionSpeed}ms</Badge>
            {/if}
            <Badge class="bg-green-700">{selectedModel}</Badge>
        </div>
    {/if}
</div>