martia_f commited on
Commit
df38d58
·
1 Parent(s): fe5cf35

idk what changed here

Browse files
.vscode/c_cpp_properties.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "configurations": [
3
+ {
4
+ "name": "macos-clang-x64",
5
+ "includePath": [
6
+ "${workspaceFolder}/**"
7
+ ],
8
+ "compilerPath": "/usr/bin/clang",
9
+ "cStandard": "c23",
10
+ "cppStandard": "c++20",
11
+ "intelliSenseMode": "macos-clang-x64",
12
+ "compilerArgs": [
13
+ ""
14
+ ]
15
+ }
16
+ ],
17
+ "version": 4
18
+ }
.vscode/launch.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.2.0",
3
+ "configurations": [
4
+ {
5
+ "name": "C/C++ Runner: Debug Session",
6
+ "type": "lldb",
7
+ "request": "launch",
8
+ "args": [],
9
+ "cwd": "/Users/martia/phi-offline",
10
+ "program": "/Users/martia/phi-offline/build/Debug/outDebug"
11
+ }
12
+ ]
13
+ }
.vscode/settings.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "C_Cpp_Runner.cCompilerPath": "clang",
3
+ "C_Cpp_Runner.cppCompilerPath": "clang++",
4
+ "C_Cpp_Runner.debuggerPath": "lldb",
5
+ "C_Cpp_Runner.cStandard": "c23",
6
+ "C_Cpp_Runner.cppStandard": "c++20",
7
+ "C_Cpp_Runner.msvcBatchPath": "",
8
+ "C_Cpp_Runner.useMsvc": false,
9
+ "C_Cpp_Runner.warnings": [
10
+ "-Wall",
11
+ "-Wextra",
12
+ "-Wpedantic",
13
+ "-Wshadow",
14
+ "-Wformat=2",
15
+ "-Wcast-align",
16
+ "-Wconversion",
17
+ "-Wsign-conversion",
18
+ "-Wnull-dereference"
19
+ ],
20
+ "C_Cpp_Runner.msvcWarnings": [
21
+ "/W4",
22
+ "/permissive-",
23
+ "/w14242",
24
+ "/w14287",
25
+ "/w14296",
26
+ "/w14311",
27
+ "/w14826",
28
+ "/w44062",
29
+ "/w44242",
30
+ "/w14905",
31
+ "/w14906",
32
+ "/w14263",
33
+ "/w44265",
34
+ "/w14928"
35
+ ],
36
+ "C_Cpp_Runner.enableWarnings": true,
37
+ "C_Cpp_Runner.warningsAsError": false,
38
+ "C_Cpp_Runner.compilerArgs": [],
39
+ "C_Cpp_Runner.linkerArgs": [],
40
+ "C_Cpp_Runner.includePaths": [],
41
+ "C_Cpp_Runner.includeSearch": [
42
+ "*",
43
+ "**/*"
44
+ ],
45
+ "C_Cpp_Runner.excludeSearch": [
46
+ "**/build",
47
+ "**/build/**",
48
+ "**/.*",
49
+ "**/.*/**",
50
+ "**/.vscode",
51
+ "**/.vscode/**"
52
+ ],
53
+ "C_Cpp_Runner.useAddressSanitizer": false,
54
+ "C_Cpp_Runner.useUndefinedSanitizer": false,
55
+ "C_Cpp_Runner.useLeakSanitizer": false,
56
+ "C_Cpp_Runner.showCompilationTime": false,
57
+ "C_Cpp_Runner.useLinkTimeOptimization": false,
58
+ "C_Cpp_Runner.msvcSecureNoWarnings": false
59
+ }
README.md CHANGED
@@ -1,11 +1 @@
1
- ---
2
- title: Candle Phi Wasm Demo
3
- emoji: 🕯️ɸ
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: static
7
- pinned: false
8
- disable_embedding: true
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ # Candle phi 1.5 / 2 demo
 
 
 
 
 
 
 
 
 
 
build/{m_bg.wasm.d.ts → agent.d.ts} RENAMED
File without changes
build/{m_bg.wasm → agent.wasm} RENAMED
File without changes
build/m.js CHANGED
@@ -453,7 +453,7 @@ async function __wbg_init(input) {
453
  if (wasm !== undefined) return wasm;
454
 
455
  if (typeof input === 'undefined') {
456
- input = new URL('m_bg.wasm', import.meta.url);
457
  }
458
  const imports = __wbg_get_imports();
459
 
 
453
  if (wasm !== undefined) return wasm;
454
 
455
  if (typeof input === 'undefined') {
456
+ input = new URL('agent.wasm', import.meta.url);
457
  }
458
  const imports = __wbg_get_imports();
459
 
phi-offline/.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
phi-offline/README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Candle Phi Wasm Demo
3
+ emoji: 🕯️ɸ
4
+ colorFrom: gray
5
+ colorTo: purple
6
+ sdk: static
7
+ pinned: false
8
+ disable_embedding: true
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
phi-offline/build/m.d.ts ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ /**
4
+ */
5
+ export class Model {
6
+ free(): void;
7
+ /**
8
+ * @param {Uint8Array} weights
9
+ * @param {Uint8Array} tokenizer
10
+ * @param {Uint8Array} config
11
+ * @param {boolean} quantized
12
+ */
13
+ constructor(weights: Uint8Array, tokenizer: Uint8Array, config: Uint8Array, quantized: boolean);
14
+ /**
15
+ * @param {string} prompt
16
+ * @param {number} temp
17
+ * @param {number} top_p
18
+ * @param {number} repeat_penalty
19
+ * @param {number} repeat_last_n
20
+ * @param {bigint} seed
21
+ * @returns {string}
22
+ */
23
+ init_with_prompt(prompt: string, temp: number, top_p: number, repeat_penalty: number, repeat_last_n: number, seed: bigint): string;
24
+ /**
25
+ * @returns {string}
26
+ */
27
+ next_token(): string;
28
+ }
29
+
30
+ export type InitInput = RequestInfo | URL | Response | BufferSource | WebAssembly.Module;
31
+
32
+ export interface InitOutput {
33
+ readonly memory: WebAssembly.Memory;
34
+ readonly __wbg_model_free: (a: number) => void;
35
+ readonly model_load: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number) => void;
36
+ readonly model_init_with_prompt: (a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number) => void;
37
+ readonly model_next_token: (a: number, b: number) => void;
38
+ readonly main: (a: number, b: number) => number;
39
+ readonly __wbindgen_add_to_stack_pointer: (a: number) => number;
40
+ readonly __wbindgen_malloc: (a: number, b: number) => number;
41
+ readonly __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number;
42
+ readonly __wbindgen_free: (a: number, b: number, c: number) => void;
43
+ readonly __wbindgen_exn_store: (a: number) => void;
44
+ readonly __wbindgen_start: () => void;
45
+ }
46
+
47
+ export type SyncInitInput = BufferSource | WebAssembly.Module;
48
+ /**
49
+ * Instantiates the given `module`, which can either be bytes or
50
+ * a precompiled `WebAssembly.Module`.
51
+ *
52
+ * @param {SyncInitInput} module
53
+ *
54
+ * @returns {InitOutput}
55
+ */
56
+ export function initSync(module: SyncInitInput): InitOutput;
57
+
58
+ /**
59
+ * If `module_or_path` is {RequestInfo} or {URL}, makes a request and
60
+ * for everything else, calls `WebAssembly.instantiate` directly.
61
+ *
62
+ * @param {InitInput | Promise<InitInput>} module_or_path
63
+ *
64
+ * @returns {Promise<InitOutput>}
65
+ */
66
+ export default function __wbg_init (module_or_path?: InitInput | Promise<InitInput>): Promise<InitOutput>;
phi-offline/build/m.js ADDED
@@ -0,0 +1,472 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ let wasm;
2
+
3
+ const cachedTextDecoder = (typeof TextDecoder !== 'undefined' ? new TextDecoder('utf-8', { ignoreBOM: true, fatal: true }) : { decode: () => { throw Error('TextDecoder not available') } } );
4
+
5
+ if (typeof TextDecoder !== 'undefined') { cachedTextDecoder.decode(); };
6
+
7
+ let cachedUint8Memory0 = null;
8
+
9
+ function getUint8Memory0() {
10
+ if (cachedUint8Memory0 === null || cachedUint8Memory0.byteLength === 0) {
11
+ cachedUint8Memory0 = new Uint8Array(wasm.memory.buffer);
12
+ }
13
+ return cachedUint8Memory0;
14
+ }
15
+
16
+ function getStringFromWasm0(ptr, len) {
17
+ ptr = ptr >>> 0;
18
+ return cachedTextDecoder.decode(getUint8Memory0().subarray(ptr, ptr + len));
19
+ }
20
+
21
+ const heap = new Array(128).fill(undefined);
22
+
23
+ heap.push(undefined, null, true, false);
24
+
25
+ let heap_next = heap.length;
26
+
27
+ function addHeapObject(obj) {
28
+ if (heap_next === heap.length) heap.push(heap.length + 1);
29
+ const idx = heap_next;
30
+ heap_next = heap[idx];
31
+
32
+ heap[idx] = obj;
33
+ return idx;
34
+ }
35
+
36
+ function getObject(idx) { return heap[idx]; }
37
+
38
+ function dropObject(idx) {
39
+ if (idx < 132) return;
40
+ heap[idx] = heap_next;
41
+ heap_next = idx;
42
+ }
43
+
44
+ function takeObject(idx) {
45
+ const ret = getObject(idx);
46
+ dropObject(idx);
47
+ return ret;
48
+ }
49
+
50
+ let WASM_VECTOR_LEN = 0;
51
+
52
+ function passArray8ToWasm0(arg, malloc) {
53
+ const ptr = malloc(arg.length * 1, 1) >>> 0;
54
+ getUint8Memory0().set(arg, ptr / 1);
55
+ WASM_VECTOR_LEN = arg.length;
56
+ return ptr;
57
+ }
58
+
59
+ let cachedInt32Memory0 = null;
60
+
61
+ function getInt32Memory0() {
62
+ if (cachedInt32Memory0 === null || cachedInt32Memory0.byteLength === 0) {
63
+ cachedInt32Memory0 = new Int32Array(wasm.memory.buffer);
64
+ }
65
+ return cachedInt32Memory0;
66
+ }
67
+
68
+ const cachedTextEncoder = (typeof TextEncoder !== 'undefined' ? new TextEncoder('utf-8') : { encode: () => { throw Error('TextEncoder not available') } } );
69
+
70
+ const encodeString = (typeof cachedTextEncoder.encodeInto === 'function'
71
+ ? function (arg, view) {
72
+ return cachedTextEncoder.encodeInto(arg, view);
73
+ }
74
+ : function (arg, view) {
75
+ const buf = cachedTextEncoder.encode(arg);
76
+ view.set(buf);
77
+ return {
78
+ read: arg.length,
79
+ written: buf.length
80
+ };
81
+ });
82
+
83
+ function passStringToWasm0(arg, malloc, realloc) {
84
+
85
+ if (realloc === undefined) {
86
+ const buf = cachedTextEncoder.encode(arg);
87
+ const ptr = malloc(buf.length, 1) >>> 0;
88
+ getUint8Memory0().subarray(ptr, ptr + buf.length).set(buf);
89
+ WASM_VECTOR_LEN = buf.length;
90
+ return ptr;
91
+ }
92
+
93
+ let len = arg.length;
94
+ let ptr = malloc(len, 1) >>> 0;
95
+
96
+ const mem = getUint8Memory0();
97
+
98
+ let offset = 0;
99
+
100
+ for (; offset < len; offset++) {
101
+ const code = arg.charCodeAt(offset);
102
+ if (code > 0x7F) break;
103
+ mem[ptr + offset] = code;
104
+ }
105
+
106
+ if (offset !== len) {
107
+ if (offset !== 0) {
108
+ arg = arg.slice(offset);
109
+ }
110
+ ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0;
111
+ const view = getUint8Memory0().subarray(ptr + offset, ptr + len);
112
+ const ret = encodeString(arg, view);
113
+
114
+ offset += ret.written;
115
+ }
116
+
117
+ WASM_VECTOR_LEN = offset;
118
+ return ptr;
119
+ }
120
+
121
+ function handleError(f, args) {
122
+ try {
123
+ return f.apply(this, args);
124
+ } catch (e) {
125
+ wasm.__wbindgen_exn_store(addHeapObject(e));
126
+ }
127
+ }
128
+ /**
129
+ */
130
+ export class Model {
131
+
132
+ __destroy_into_raw() {
133
+ const ptr = this.__wbg_ptr;
134
+ this.__wbg_ptr = 0;
135
+
136
+ return ptr;
137
+ }
138
+
139
+ free() {
140
+ const ptr = this.__destroy_into_raw();
141
+ wasm.__wbg_model_free(ptr);
142
+ }
143
+ /**
144
+ * @param {Uint8Array} weights
145
+ * @param {Uint8Array} tokenizer
146
+ * @param {Uint8Array} config
147
+ * @param {boolean} quantized
148
+ */
149
+ constructor(weights, tokenizer, config, quantized) {
150
+ try {
151
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
152
+ const ptr0 = passArray8ToWasm0(weights, wasm.__wbindgen_malloc);
153
+ const len0 = WASM_VECTOR_LEN;
154
+ const ptr1 = passArray8ToWasm0(tokenizer, wasm.__wbindgen_malloc);
155
+ const len1 = WASM_VECTOR_LEN;
156
+ const ptr2 = passArray8ToWasm0(config, wasm.__wbindgen_malloc);
157
+ const len2 = WASM_VECTOR_LEN;
158
+ wasm.model_load(retptr, ptr0, len0, ptr1, len1, ptr2, len2, quantized);
159
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
160
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
161
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
162
+ if (r2) {
163
+ throw takeObject(r1);
164
+ }
165
+ this.__wbg_ptr = r0 >>> 0;
166
+ return this;
167
+ } finally {
168
+ wasm.__wbindgen_add_to_stack_pointer(16);
169
+ }
170
+ }
171
+ /**
172
+ * @param {string} prompt
173
+ * @param {number} temp
174
+ * @param {number} top_p
175
+ * @param {number} repeat_penalty
176
+ * @param {number} repeat_last_n
177
+ * @param {bigint} seed
178
+ * @returns {string}
179
+ */
180
+ init_with_prompt(prompt, temp, top_p, repeat_penalty, repeat_last_n, seed) {
181
+ let deferred3_0;
182
+ let deferred3_1;
183
+ try {
184
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
185
+ const ptr0 = passStringToWasm0(prompt, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
186
+ const len0 = WASM_VECTOR_LEN;
187
+ wasm.model_init_with_prompt(retptr, this.__wbg_ptr, ptr0, len0, temp, top_p, repeat_penalty, repeat_last_n, seed);
188
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
189
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
190
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
191
+ var r3 = getInt32Memory0()[retptr / 4 + 3];
192
+ var ptr2 = r0;
193
+ var len2 = r1;
194
+ if (r3) {
195
+ ptr2 = 0; len2 = 0;
196
+ throw takeObject(r2);
197
+ }
198
+ deferred3_0 = ptr2;
199
+ deferred3_1 = len2;
200
+ return getStringFromWasm0(ptr2, len2);
201
+ } finally {
202
+ wasm.__wbindgen_add_to_stack_pointer(16);
203
+ wasm.__wbindgen_free(deferred3_0, deferred3_1, 1);
204
+ }
205
+ }
206
+ /**
207
+ * @returns {string}
208
+ */
209
+ next_token() {
210
+ let deferred2_0;
211
+ let deferred2_1;
212
+ try {
213
+ const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
214
+ wasm.model_next_token(retptr, this.__wbg_ptr);
215
+ var r0 = getInt32Memory0()[retptr / 4 + 0];
216
+ var r1 = getInt32Memory0()[retptr / 4 + 1];
217
+ var r2 = getInt32Memory0()[retptr / 4 + 2];
218
+ var r3 = getInt32Memory0()[retptr / 4 + 3];
219
+ var ptr1 = r0;
220
+ var len1 = r1;
221
+ if (r3) {
222
+ ptr1 = 0; len1 = 0;
223
+ throw takeObject(r2);
224
+ }
225
+ deferred2_0 = ptr1;
226
+ deferred2_1 = len1;
227
+ return getStringFromWasm0(ptr1, len1);
228
+ } finally {
229
+ wasm.__wbindgen_add_to_stack_pointer(16);
230
+ wasm.__wbindgen_free(deferred2_0, deferred2_1, 1);
231
+ }
232
+ }
233
+ }
234
+
235
+ async function __wbg_load(module, imports) {
236
+ if (typeof Response === 'function' && module instanceof Response) {
237
+ if (typeof WebAssembly.instantiateStreaming === 'function') {
238
+ try {
239
+ return await WebAssembly.instantiateStreaming(module, imports);
240
+
241
+ } catch (e) {
242
+ if (module.headers.get('Content-Type') != 'application/wasm') {
243
+ console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);
244
+
245
+ } else {
246
+ throw e;
247
+ }
248
+ }
249
+ }
250
+
251
+ const bytes = await module.arrayBuffer();
252
+ return await WebAssembly.instantiate(bytes, imports);
253
+
254
+ } else {
255
+ const instance = await WebAssembly.instantiate(module, imports);
256
+
257
+ if (instance instanceof WebAssembly.Instance) {
258
+ return { instance, module };
259
+
260
+ } else {
261
+ return instance;
262
+ }
263
+ }
264
+ }
265
+
266
+ function __wbg_get_imports() {
267
+ const imports = {};
268
+ imports.wbg = {};
269
+ imports.wbg.__wbindgen_error_new = function(arg0, arg1) {
270
+ const ret = new Error(getStringFromWasm0(arg0, arg1));
271
+ return addHeapObject(ret);
272
+ };
273
+ imports.wbg.__wbg_new_abda76e883ba8a5f = function() {
274
+ const ret = new Error();
275
+ return addHeapObject(ret);
276
+ };
277
+ imports.wbg.__wbg_stack_658279fe44541cf6 = function(arg0, arg1) {
278
+ const ret = getObject(arg1).stack;
279
+ const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc);
280
+ const len1 = WASM_VECTOR_LEN;
281
+ getInt32Memory0()[arg0 / 4 + 1] = len1;
282
+ getInt32Memory0()[arg0 / 4 + 0] = ptr1;
283
+ };
284
+ imports.wbg.__wbg_error_f851667af71bcfc6 = function(arg0, arg1) {
285
+ let deferred0_0;
286
+ let deferred0_1;
287
+ try {
288
+ deferred0_0 = arg0;
289
+ deferred0_1 = arg1;
290
+ console.error(getStringFromWasm0(arg0, arg1));
291
+ } finally {
292
+ wasm.__wbindgen_free(deferred0_0, deferred0_1, 1);
293
+ }
294
+ };
295
+ imports.wbg.__wbindgen_object_drop_ref = function(arg0) {
296
+ takeObject(arg0);
297
+ };
298
+ imports.wbg.__wbg_log_aa756f7b1647d2ab = function(arg0, arg1) {
299
+ console.log(getStringFromWasm0(arg0, arg1));
300
+ };
301
+ imports.wbg.__wbg_crypto_58f13aa23ffcb166 = function(arg0) {
302
+ const ret = getObject(arg0).crypto;
303
+ return addHeapObject(ret);
304
+ };
305
+ imports.wbg.__wbindgen_is_object = function(arg0) {
306
+ const val = getObject(arg0);
307
+ const ret = typeof(val) === 'object' && val !== null;
308
+ return ret;
309
+ };
310
+ imports.wbg.__wbg_process_5b786e71d465a513 = function(arg0) {
311
+ const ret = getObject(arg0).process;
312
+ return addHeapObject(ret);
313
+ };
314
+ imports.wbg.__wbg_versions_c2ab80650590b6a2 = function(arg0) {
315
+ const ret = getObject(arg0).versions;
316
+ return addHeapObject(ret);
317
+ };
318
+ imports.wbg.__wbg_node_523d7bd03ef69fba = function(arg0) {
319
+ const ret = getObject(arg0).node;
320
+ return addHeapObject(ret);
321
+ };
322
+ imports.wbg.__wbindgen_is_string = function(arg0) {
323
+ const ret = typeof(getObject(arg0)) === 'string';
324
+ return ret;
325
+ };
326
+ imports.wbg.__wbg_msCrypto_abcb1295e768d1f2 = function(arg0) {
327
+ const ret = getObject(arg0).msCrypto;
328
+ return addHeapObject(ret);
329
+ };
330
+ imports.wbg.__wbg_require_2784e593a4674877 = function() { return handleError(function () {
331
+ const ret = module.require;
332
+ return addHeapObject(ret);
333
+ }, arguments) };
334
+ imports.wbg.__wbindgen_is_function = function(arg0) {
335
+ const ret = typeof(getObject(arg0)) === 'function';
336
+ return ret;
337
+ };
338
+ imports.wbg.__wbindgen_string_new = function(arg0, arg1) {
339
+ const ret = getStringFromWasm0(arg0, arg1);
340
+ return addHeapObject(ret);
341
+ };
342
+ imports.wbg.__wbg_randomFillSync_a0d98aa11c81fe89 = function() { return handleError(function (arg0, arg1) {
343
+ getObject(arg0).randomFillSync(takeObject(arg1));
344
+ }, arguments) };
345
+ imports.wbg.__wbg_getRandomValues_504510b5564925af = function() { return handleError(function (arg0, arg1) {
346
+ getObject(arg0).getRandomValues(getObject(arg1));
347
+ }, arguments) };
348
+ imports.wbg.__wbg_newnoargs_ccdcae30fd002262 = function(arg0, arg1) {
349
+ const ret = new Function(getStringFromWasm0(arg0, arg1));
350
+ return addHeapObject(ret);
351
+ };
352
+ imports.wbg.__wbg_call_669127b9d730c650 = function() { return handleError(function (arg0, arg1) {
353
+ const ret = getObject(arg0).call(getObject(arg1));
354
+ return addHeapObject(ret);
355
+ }, arguments) };
356
+ imports.wbg.__wbindgen_object_clone_ref = function(arg0) {
357
+ const ret = getObject(arg0);
358
+ return addHeapObject(ret);
359
+ };
360
+ imports.wbg.__wbg_self_3fad056edded10bd = function() { return handleError(function () {
361
+ const ret = self.self;
362
+ return addHeapObject(ret);
363
+ }, arguments) };
364
+ imports.wbg.__wbg_window_a4f46c98a61d4089 = function() { return handleError(function () {
365
+ const ret = window.window;
366
+ return addHeapObject(ret);
367
+ }, arguments) };
368
+ imports.wbg.__wbg_globalThis_17eff828815f7d84 = function() { return handleError(function () {
369
+ const ret = globalThis.globalThis;
370
+ return addHeapObject(ret);
371
+ }, arguments) };
372
+ imports.wbg.__wbg_global_46f939f6541643c5 = function() { return handleError(function () {
373
+ const ret = global.global;
374
+ return addHeapObject(ret);
375
+ }, arguments) };
376
+ imports.wbg.__wbindgen_is_undefined = function(arg0) {
377
+ const ret = getObject(arg0) === undefined;
378
+ return ret;
379
+ };
380
+ imports.wbg.__wbg_call_53fc3abd42e24ec8 = function() { return handleError(function (arg0, arg1, arg2) {
381
+ const ret = getObject(arg0).call(getObject(arg1), getObject(arg2));
382
+ return addHeapObject(ret);
383
+ }, arguments) };
384
+ imports.wbg.__wbg_now_4579335d3581594c = function() {
385
+ const ret = Date.now();
386
+ return ret;
387
+ };
388
+ imports.wbg.__wbg_buffer_344d9b41efe96da7 = function(arg0) {
389
+ const ret = getObject(arg0).buffer;
390
+ return addHeapObject(ret);
391
+ };
392
+ imports.wbg.__wbg_newwithbyteoffsetandlength_2dc04d99088b15e3 = function(arg0, arg1, arg2) {
393
+ const ret = new Uint8Array(getObject(arg0), arg1 >>> 0, arg2 >>> 0);
394
+ return addHeapObject(ret);
395
+ };
396
+ imports.wbg.__wbg_new_d8a000788389a31e = function(arg0) {
397
+ const ret = new Uint8Array(getObject(arg0));
398
+ return addHeapObject(ret);
399
+ };
400
+ imports.wbg.__wbg_set_dcfd613a3420f908 = function(arg0, arg1, arg2) {
401
+ getObject(arg0).set(getObject(arg1), arg2 >>> 0);
402
+ };
403
+ imports.wbg.__wbg_newwithlength_13b5319ab422dcf6 = function(arg0) {
404
+ const ret = new Uint8Array(arg0 >>> 0);
405
+ return addHeapObject(ret);
406
+ };
407
+ imports.wbg.__wbg_subarray_6ca5cfa7fbb9abbe = function(arg0, arg1, arg2) {
408
+ const ret = getObject(arg0).subarray(arg1 >>> 0, arg2 >>> 0);
409
+ return addHeapObject(ret);
410
+ };
411
+ imports.wbg.__wbindgen_throw = function(arg0, arg1) {
412
+ throw new Error(getStringFromWasm0(arg0, arg1));
413
+ };
414
+ imports.wbg.__wbindgen_memory = function() {
415
+ const ret = wasm.memory;
416
+ return addHeapObject(ret);
417
+ };
418
+
419
+ return imports;
420
+ }
421
+
422
+ function __wbg_init_memory(imports, maybe_memory) {
423
+
424
+ }
425
+
426
+ function __wbg_finalize_init(instance, module) {
427
+ wasm = instance.exports;
428
+ __wbg_init.__wbindgen_wasm_module = module;
429
+ cachedInt32Memory0 = null;
430
+ cachedUint8Memory0 = null;
431
+
432
+ wasm.__wbindgen_start();
433
+ return wasm;
434
+ }
435
+
436
+ function initSync(module) {
437
+ if (wasm !== undefined) return wasm;
438
+
439
+ const imports = __wbg_get_imports();
440
+
441
+ __wbg_init_memory(imports);
442
+
443
+ if (!(module instanceof WebAssembly.Module)) {
444
+ module = new WebAssembly.Module(module);
445
+ }
446
+
447
+ const instance = new WebAssembly.Instance(module, imports);
448
+
449
+ return __wbg_finalize_init(instance, module);
450
+ }
451
+
452
+ async function __wbg_init(input) {
453
+ if (wasm !== undefined) return wasm;
454
+
455
+ if (typeof input === 'undefined') {
456
+ input = new URL('m_bg.wasm', import.meta.url);
457
+ }
458
+ const imports = __wbg_get_imports();
459
+
460
+ if (typeof input === 'string' || (typeof Request === 'function' && input instanceof Request) || (typeof URL === 'function' && input instanceof URL)) {
461
+ input = fetch(input);
462
+ }
463
+
464
+ __wbg_init_memory(imports);
465
+
466
+ const { instance, module } = await __wbg_load(await input, imports);
467
+
468
+ return __wbg_finalize_init(instance, module);
469
+ }
470
+
471
+ export { initSync }
472
+ export default __wbg_init;
phi-offline/build/m_bg.wasm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a62b31d8872b8c4c1f0f98dad8f2375d05ad120060731eb02ca21f827d188ac5
3
+ size 4566571
phi-offline/build/m_bg.wasm.d.ts ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* tslint:disable */
2
+ /* eslint-disable */
3
+ export const memory: WebAssembly.Memory;
4
+ export function __wbg_model_free(a: number): void;
5
+ export function model_load(a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number): void;
6
+ export function model_init_with_prompt(a: number, b: number, c: number, d: number, e: number, f: number, g: number, h: number, i: number): void;
7
+ export function model_next_token(a: number, b: number): void;
8
+ export function main(a: number, b: number): number;
9
+ export function __wbindgen_add_to_stack_pointer(a: number): number;
10
+ export function __wbindgen_malloc(a: number, b: number): number;
11
+ export function __wbindgen_realloc(a: number, b: number, c: number, d: number): number;
12
+ export function __wbindgen_free(a: number, b: number, c: number): void;
13
+ export function __wbindgen_exn_store(a: number): void;
14
+ export function __wbindgen_start(): void;
phi-offline/candle.png ADDED
phi-offline/index.html ADDED
@@ -0,0 +1,377 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+
4
+ <head>
5
+ <meta content="text/htmlcharset=utf-8" http-equiv="Content-Type" />
6
+ <meta name="apple-mobile-web-app-capable" content="yes">
7
+ <link rel="shortcut icon" href="candle.png">
8
+ <link rel="apple-touch-icon" href="candle.png">
9
+ <title>Candle Phi Rust/WASM</title>
10
+ <meta charset="UTF-8" />
11
+ <meta name="viewport" content="width=device-width, initial-scale=1.0" />
12
+ <link rel="stylesheet"
13
+ href="https://cdn.jsdelivr.net/gh/highlightjs/[email protected]/build/styles/default.min.css" />
14
+ <style>
15
+ @import url("https://fonts.googleapis.com/css2?family=Source+Code+Pro:wght@200300400&family=Source+Sans+3:wght@100200300400500600700800900&display=swap");
16
+
17
+ html,
18
+ body {
19
+ font-family: "Source Sans 3", sans-serif;
20
+ }
21
+
22
+ code,
23
+ output,
24
+ select,
25
+ pre {
26
+ font-family: "Source Code Pro", monospace;
27
+ }
28
+ </style>
29
+ <style type="text/tailwindcss">
30
+ .link { @apply underline hover:text-blue-500 hover:no-underline; }
31
+ </style>
32
+ <script src="https://cdn.tailwindcss.com"></script>
33
+ <script type="module">
34
+ import snarkdown from "https://cdn.skypack.dev/snarkdown"
35
+ import hljs from "https://cdn.skypack.dev/highlight.js"
36
+ // models base url
37
+ const MODELS = {
38
+ phi_1_5_q4k: {
39
+ base_url:
40
+ "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
41
+ model: "model-q4k.gguf",
42
+ tokenizer: "tokenizer.json",
43
+ config: "phi-1_5.json",
44
+ quantized: true,
45
+ seq_len: 2048,
46
+ size: "800 MB",
47
+ },
48
+ phi_1_5_q80: {
49
+ base_url:
50
+ "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
51
+ model: "model-q80.gguf",
52
+ tokenizer: "tokenizer.json",
53
+ config: "phi-1_5.json",
54
+ quantized: true,
55
+ seq_len: 2048,
56
+ size: "1.51 GB",
57
+ },
58
+ phi_2_0_q4k: {
59
+ base_url:
60
+ "https://huggingface.co/radames/phi-2-quantized/resolve/main/",
61
+ model: [
62
+ "model-v2-q4k.gguf_aa.part",
63
+ "model-v2-q4k.gguf_ab.part",
64
+ "model-v2-q4k.gguf_ac.part",
65
+ ],
66
+ tokenizer: "tokenizer.json",
67
+ config: "config.json",
68
+ quantized: true,
69
+ seq_len: 2048,
70
+ size: "1.57GB",
71
+ },
72
+ puffin_phi_v2_q4k: {
73
+ base_url:
74
+ "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
75
+ model: "model-puffin-phi-v2-q4k.gguf",
76
+ tokenizer: "tokenizer-puffin-phi-v2.json",
77
+ config: "puffin-phi-v2.json",
78
+ quantized: true,
79
+ seq_len: 2048,
80
+ size: "798 MB",
81
+ },
82
+ puffin_phi_v2_q80: {
83
+ base_url:
84
+ "https://huggingface.co/lmz/candle-quantized-phi/resolve/main/",
85
+ model: "model-puffin-phi-v2-q80.gguf",
86
+ tokenizer: "tokenizer-puffin-phi-v2.json",
87
+ config: "puffin-phi-v2.json",
88
+ quantized: true,
89
+ seq_len: 2048,
90
+ size: "1.50 GB",
91
+ },
92
+ }
93
+
94
+ const phiWorker = new Worker("./phiWorker.js", {
95
+ type: "module",
96
+ })
97
+ async function generateSequence(prompt, controller) {
98
+ const getValue = (id) => document.querySelector(`#${id}`).value
99
+ const modelID = getValue("model")
100
+ const model = MODELS[modelID]
101
+ const weightsURL =
102
+ model.model instanceof Array
103
+ ? model.model.map((m) => model.base_url + m)
104
+ : model.base_url + model.model
105
+ const tokenizerURL = model.base_url + model.tokenizer
106
+ const configURL = model.base_url + model.config
107
+
108
+ // const prompt = getValue("prompt").trim()
109
+ const temperature = getValue("temperature")
110
+ const topP = getValue("top-p")
111
+ const repeatPenalty = getValue("repeat_penalty")
112
+ const seed = getValue("seed")
113
+ const maxSeqLen = getValue("max-seq")
114
+
115
+ function updateStatus(data) {
116
+ const outStatus = document.querySelector("#output-status")
117
+ const outGen = document.querySelector("#output-generation")
118
+ const outCounter = document.querySelector("#output-counter")
119
+
120
+ switch (data.status) {
121
+ case "loading":
122
+ outStatus.hidden = false
123
+ outStatus.innerHTML = data.message.replaceAll("\n", "<br>\n")
124
+ outGen.hidden = true
125
+ outCounter.hidden = true
126
+ break
127
+ case "generating":
128
+ const { message, prompt, sentence, tokensSec, totalTime } = data
129
+ outStatus.hidden = true
130
+ outCounter.hidden = false
131
+ outGen.hidden = false
132
+ outGen.innerHTML = snarkdown(prompt + sentence).replaceAll('\n', '<br>\n')
133
+ outCounter.innerHTML = `${(totalTime / 1000).toFixed(
134
+ 2
135
+ )}s (${tokensSec.toFixed(2)} tok/s)`
136
+ hljs.highlightAll()
137
+ break
138
+ case "complete":
139
+ outStatus.hidden = true
140
+ outGen.hidden = false
141
+ break
142
+ }
143
+ }
144
+
145
+ function decodeHtml(html) {
146
+ var txt = document.createElement('textarea')
147
+ txt.innerHTML = html
148
+ return txt.value
149
+ }
150
+
151
+ return new Promise((resolve, reject) => {
152
+ let TEXT = document.querySelector('textarea#terminate')?.innerText
153
+ if (TEXT === '') TEXT = decodeHtml(document.querySelector('textarea#terminate')?.innerHTML)
154
+ phiWorker.postMessage({
155
+ weightsURL,
156
+ modelID,
157
+ tokenizerURL,
158
+ configURL,
159
+ quantized: model.quantized,
160
+ prompt,
161
+ temp: temperature,
162
+ top_p: topP,
163
+ repeatPenalty,
164
+ seed: seed,
165
+ maxSeqLen,
166
+ command: "start",
167
+ stuff: TEXT.split(',').map(e => e.trim())
168
+ })
169
+
170
+ const handleAbort = () => {
171
+ phiWorker.postMessage({ command: "abort" })
172
+ }
173
+ const handleMessage = (event) => {
174
+ const { status, error, message, prompt, sentence } = event.data
175
+ if (status) updateStatus(event.data)
176
+ if (error) {
177
+ phiWorker.removeEventListener("message", handleMessage)
178
+ reject(new Error(error))
179
+ }
180
+ if (status === "aborted") {
181
+ phiWorker.removeEventListener("message", handleMessage)
182
+ resolve(event.data)
183
+ }
184
+ if (status === "complete") {
185
+ phiWorker.removeEventListener("message", handleMessage)
186
+ resolve(event.data)
187
+ }
188
+ }
189
+
190
+ controller.signal.addEventListener("abort", handleAbort)
191
+ phiWorker.addEventListener("message", handleMessage)
192
+ })
193
+ }
194
+
195
+ const form = document.querySelector("#form")
196
+ const prompt = document.querySelector("#prompt")
197
+ const clearBtn = document.querySelector("#clear-btn")
198
+ const runBtn = document.querySelector("#run")
199
+ const modelSelect = document.querySelector("#model")
200
+ let runController = new AbortController()
201
+ let isRunning = false
202
+
203
+ document.addEventListener("DOMContentLoaded", () => {
204
+ for (const [id, model] of Object.entries(MODELS)) {
205
+ const option = document.createElement("option")
206
+ option.value = id
207
+ option.innerText = `${id} (${model.size})`
208
+ modelSelect.appendChild(option)
209
+ }
210
+ const query = new URLSearchParams(window.location.search)
211
+ const modelID = query.get("model")
212
+ if (modelID) {
213
+ modelSelect.value = modelID
214
+ } else {
215
+ modelSelect.value = "phi_1_5_q4k"
216
+ }
217
+ })
218
+
219
+ const TEMPLATES = { entries: () => [] }
220
+
221
+ for (const [i, { title, prompt }] of TEMPLATES.entries()) {
222
+ const div = document.createElement("div")
223
+ const input = document.createElement("input")
224
+ input.type = "radio"
225
+ input.name = "task"
226
+ input.id = `templates-${i}`
227
+ input.classList.add("font-light", "cursor-pointer")
228
+ input.value = prompt
229
+ const label = document.createElement("label")
230
+ label.htmlFor = `templates-${i}`
231
+ label.classList.add("cursor-pointer")
232
+ label.innerText = title
233
+ div.appendChild(input)
234
+ div.appendChild(label)
235
+ promptTemplates.appendChild(div)
236
+ }
237
+
238
+ modelSelect.addEventListener("change", (e) => {
239
+ const query = new URLSearchParams(window.location.search)
240
+ query.set("model", e.target.value)
241
+ window.history.replaceState(
242
+ {},
243
+ "",
244
+ `${window.location.pathname}?${query}`
245
+ )
246
+ window.parent.postMessage({ queryString: "?" + query }, "*")
247
+ const model = MODELS[e.target.value]
248
+ document.querySelector("#max-seq").max = model.seq_len
249
+ document.querySelector("#max-seq").nextElementSibling.value = 200
250
+ })
251
+
252
+ form.addEventListener("submit", async (e) => {
253
+ e.preventDefault()
254
+ if (isRunning) {
255
+ stopRunning()
256
+ } else {
257
+ startRunning()
258
+ await generateSequence(document.querySelector(`#prompt`).value, runController)
259
+ stopRunning()
260
+ }
261
+ })
262
+
263
+ function startRunning() {
264
+ isRunning = true
265
+ runBtn.textContent = "Stop"
266
+ }
267
+
268
+ function stopRunning() {
269
+ runController.abort()
270
+ runController = new AbortController()
271
+ runBtn.textContent = "Run"
272
+ isRunning = false
273
+ }
274
+ clearBtn.addEventListener("click", (e) => {
275
+ e.preventDefault()
276
+ prompt.value = ""
277
+ clearBtn.classList.add("invisible")
278
+ runBtn.disabled = true
279
+ stopRunning()
280
+ })
281
+ prompt.addEventListener("input", (e) => {
282
+ runBtn.disabled = false
283
+ if (e.target.value.length > 0) {
284
+ clearBtn.classList.remove("invisible")
285
+ } else {
286
+ clearBtn.classList.add("invisible")
287
+ }
288
+ })
289
+ </script>
290
+ </head>
291
+
292
+ <body class="container max-w-4xl mx-auto p-4 text-gray-800">
293
+ <main class="grid grid-cols-1 gap-8 relative">
294
+ <span class="absolute text-5xl -ml-[1em]"> 🕯️ </span>
295
+ <div>
296
+ <h1 class="text-5xl font-bold">Candle Phi 1.5 / Phi 2.0</h1>
297
+ </div>
298
+ <div>
299
+ <p class="text-m max-w-lg">
300
+ <b>Note:</b>
301
+ When first run, the app will download and cache the model, which could
302
+ take a few minutes. The models are <b>~800MB</b> or <b>~1.57GB</b> in
303
+ size.
304
+ </p>
305
+ </div>
306
+ <div>
307
+ <label for="model" class="font-medium">Models Options: </label>
308
+ <select id="model" class="border-2 border-gray-500 rounded-md font-light"></select>
309
+ </div>
310
+ <form id="form" class="flex text-normal px-1 py-1 border border-gray-700 rounded-md items-center">
311
+ <input type="submit" hidden />
312
+ <textarea type="text" id="prompt" class="font-light text-lg w-full px-3 py-2 mx-1 resize-none outline-none"
313
+ oninput="this.style.height = 0; this.style.height = this.scrollHeight + 'px'"
314
+ placeholder="Add your prompt here..."></textarea>
315
+ <button id="clear-btn">
316
+ <svg fill="none" xmlns="http://www.w3.org/2000/svg" width="40" viewBox="0 0 70 40">
317
+ <path opacity=".5" d="M39 .2v40.2" stroke="#1F2937" />
318
+ <path d="M1.5 11.5 19 29.1m0-17.6L1.5 29.1" opacity=".5" stroke="#1F2937" stroke-width="2" />
319
+ </svg>
320
+ </button>
321
+ <button id="run"
322
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-2 w-16 rounded disabled:bg-gray-300 disabled:cursor-not-allowed">
323
+ Run
324
+ </button>
325
+ </form>
326
+ <details>
327
+ <summary class="font-medium cursor-pointer">Advanced Options</summary>
328
+ <div class="grid grid-cols-3 max-w-md items-center gap-3 py-3">
329
+ <label class="text-sm font-medium" for="max-seq">Maximum length
330
+ </label>
331
+ <input type="range" id="max-seq" name="max-seq" min="1" max="2048" step="1" value="200"
332
+ oninput="this.nextElementSibling.value = Number(this.value)" />
333
+ <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
334
+ 200</output>
335
+ <label class="text-sm font-medium" for="temperature">Temperature</label>
336
+ <input type="range" id="temperature" name="temperature" min="0" max="2" step="0.01" value="0.50"
337
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
338
+ <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
339
+ 0.50</output>
340
+ <label class="text-sm font-medium" for="top-p">Top-p</label>
341
+ <input type="range" id="top-p" name="top-p" min="0" max="1" step="0.01" value="1.00"
342
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
343
+ <output class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">
344
+ 1.00</output>
345
+ <label class="text-sm font-medium" for="repeat_penalty">Repeat Penalty</label>
346
+ <input type="range" id="repeat_penalty" name="repeat_penalty" min="1" max="2" step="0.01" value="1.10"
347
+ oninput="this.nextElementSibling.value = Number(this.value).toFixed(2)" />
348
+ <output
349
+ class="text-xs w-[50px] text-center font-light px-1 py-1 border border-gray-700 rounded-md">1.10</output>
350
+ <label class="text-sm font-medium" for="seed">Seed</label>
351
+ <input type="number" id="seed" name="seed" value="299792458"
352
+ class="font-light border border-gray-700 text-right rounded-md p-2" />
353
+ <button id="run"
354
+ onclick="document.querySelector('#seed').value = Math.floor(Math.random() * Number.MAX_SAFE_INTEGER)"
355
+ class="bg-gray-700 hover:bg-gray-800 text-white font-normal py-1 w-[50px] rounded disabled:bg-gray-300 disabled:cursor-not-allowed text-sm">
356
+ Rand
357
+ </button>
358
+ <label hidden class="text-sm font-medium" for="terminate">End tokens</label>
359
+ <textarea hidden type="text" id="terminate"
360
+ class="font-light text-lg w-full px-3 py-2 mx-1 resize-none outline-none"
361
+ style="padding-left: -10px; border: 1px solid black; border-radius: 5px; width: 500px"
362
+ oninput="this.style.height = 0; this.style.height = this.scrollHeight + 'px'"
363
+ placeholder="Add your terminate tokens here, Separated by `, `"><|endoftext|>, <|user|>, <|system|>, <|assistant|></textarea>
364
+ </div>
365
+ </details>
366
+ <div>
367
+ <h3 class="font-medium">Generation:</h3>
368
+ <div class="min-h-[250px] bg-slate-100 text-gray-500 p-4 rounded-md flex flex-col gap-2">
369
+ <div id="output-counter" hidden class="ml-auto font-semibold grid-rows-1"></div>
370
+ <p hidden id="output-generation" class="grid-rows-2 text-lg"></p>
371
+ <span id="output-status" class="m-auto font-light">No output yet</span>
372
+ </div>
373
+ </div>
374
+ </main>
375
+ </body>
376
+
377
+ </html>
phi-offline/phiWorker.js ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import init, { Model } from "./build/m.js"
2
+
3
+ function fixTwo(x) { return Math.floor(x * 100) / 100 }
4
+
5
+ function humanSize(size) {
6
+ if(size < 1e3) return `${fixTwo(size)}b`
7
+ if(size < 1e6) return `${fixTwo(size/1e3)}kb`
8
+ if(size < 1e9) return `${fixTwo(size/1e6)}mb`
9
+ if(size < 1e12) return `${fixTwo(size/1e9)}gb`
10
+ return `${fixTwo(size/1e12)}tb`
11
+ }
12
+
13
+ function humanTime(seconds) {
14
+ const _year = 31536e3
15
+ const _mon = 2592e3
16
+ const _day = 864e2
17
+ const _hour = 36e2
18
+ const _min = 60
19
+ const _sec = 1
20
+
21
+ const year_rem = seconds % _year
22
+ const years = (seconds - year_rem) / _year
23
+
24
+ const month_rem = year_rem % _mon
25
+ const months = (year_rem - month_rem) / _mon
26
+
27
+ const day_rem = month_rem % _day
28
+ const days = (month_rem - day_rem) / _day
29
+
30
+ const hour_rem = day_rem % _hour
31
+ const hours = (day_rem - hour_rem) / _hour
32
+
33
+ const minute_rem = hour_rem % _min
34
+ const minutes = (hour_rem - minute_rem) / _min
35
+
36
+ const second_rem = minute_rem % _sec
37
+ const second = (minute_rem - second_rem) / _sec
38
+
39
+ return (years > 0 ? `${years} year${years == 1 ? '' : 's'} ` : '') + (months > 0 ? `${months} month${months == 1 ? '' : 's'} `: '') +
40
+ (days > 0 ? `${days} day${days == 1 ? '' : 's'} ` : '') + (hours > 0 ? `${hours} hour${hours == 1 ? '' : 's'} ` : '') +
41
+ (minutes > 0 ? `${minutes} minute${minutes == 1 ? '' : 's'} ` : '') + (seconds > 0 ? `${second} second${second == 1 ? '' : 's'} ` : '')
42
+ }
43
+
44
+ let lastSend = 0
45
+ let lastTime = Infinity
46
+ let times = [0, 0, 0, 0]
47
+
48
+ async function fetchArrayBuffer(url) {
49
+ const cacheName = "phi-mixformer-candle-cache"
50
+ const cache = await caches.open(cacheName)
51
+ const cachedResponse = await cache.match(url)
52
+ if (cachedResponse) {
53
+ const data = await cachedResponse.arrayBuffer()
54
+ return new Uint8Array(data)
55
+ }
56
+ const res = await fetch(url, { cache: "force-cache" })
57
+ while (!res.body) { }
58
+ const reader = res.body.getReader()
59
+ const contentLength = +(res.headers.get('Content-Length') ?? 0)
60
+ let receivedLength = 0
61
+ let chunks = []
62
+ while (true) {
63
+ const { done, value } = await reader.read()
64
+ if (done) {
65
+ break
66
+ }
67
+ chunks.push(value)
68
+ receivedLength += value.length
69
+ if(Date.now() - lastSend > 250) {
70
+ times.push(receivedLength)
71
+ times = times.slice(1)
72
+ let max = [times[3] - times[2], times[2] - times[1], times[1] - times[0]]
73
+ let median = (max[0] + max[1] + max[2]) / 3
74
+ let lengthPerSecond = median * 4
75
+ let leftSize = contentLength - receivedLength
76
+ let leftTime = Math.abs(leftSize / lengthPerSecond)
77
+
78
+ if(leftTime > lastTime * 1.5 && lastTime != 0) leftTime = lastTime * 1.2
79
+ // if(leftTime > lastTime) leftTime = lastTime
80
+ lastTime = leftTime
81
+ let downloadMessage = `Downloading... ${fixTwo((receivedLength / contentLength) * 100)}% (${humanSize(Math.floor(receivedLength * 100) / 100)})
82
+ Estimated time remaining: ${humanTime(leftTime)} (may be inaccurate)
83
+ Total size: ${humanSize(fixTwo(contentLength))}
84
+ Download URL: ${url}`
85
+ self.postMessage({ status: "loading", message: downloadMessage })
86
+ // console.log(downloadMessage)
87
+ lastSend = Date.now()
88
+ }
89
+ }
90
+ let chunksAll = new Uint8Array(receivedLength)
91
+ let position = 0
92
+ for (let chunk of chunks) {
93
+ chunksAll.set(chunk, position)
94
+ position += chunk.length
95
+ }
96
+ cache.put(url, new Response(chunksAll))
97
+ return chunksAll
98
+ }
99
+
100
+ async function concatenateArrayBuffers(urls) {
101
+ const arrayBuffers = await Promise.all(urls.map(url => fetchArrayBuffer(url)))
102
+
103
+ let totalLength = arrayBuffers.reduce((acc, arrayBuffer) => acc + arrayBuffer.byteLength, 0)
104
+ let concatenatedBuffer = new Uint8Array(totalLength)
105
+
106
+ let offset = 0
107
+ arrayBuffers.forEach(buffer => {
108
+ concatenatedBuffer.set(new Uint8Array(buffer), offset)
109
+ offset += buffer.byteLength
110
+ })
111
+ return concatenatedBuffer
112
+ }
113
+
114
+ class Phi {
115
+ static instance = {}
116
+
117
+ static async getInstance(
118
+ weightsURL,
119
+ modelID,
120
+ tokenizerURL,
121
+ configURL,
122
+ quantized
123
+ ) {
124
+ // load individual modelID only once
125
+ if (!this.instance[modelID]) {
126
+ await init()
127
+
128
+ self.postMessage({ status: "loading", message: "Loading Model" })
129
+ const [weightsArrayU8, tokenizerArrayU8, configArrayU8] =
130
+ await Promise.all([
131
+ weightsURL instanceof Array ? concatenateArrayBuffers(weightsURL) : fetchArrayBuffer(weightsURL),
132
+ fetchArrayBuffer(tokenizerURL),
133
+ fetchArrayBuffer(configURL),
134
+ ])
135
+
136
+ this.instance[modelID] = new Model(
137
+ weightsArrayU8,
138
+ tokenizerArrayU8,
139
+ configArrayU8,
140
+ quantized
141
+ )
142
+ }
143
+ return this.instance[modelID]
144
+ }
145
+ }
146
+
147
+ let controller = null
148
+ self.addEventListener("message", (event) => {
149
+ if (event.data.command === "start") {
150
+ controller = new AbortController()
151
+ generate(event.data)
152
+ } else if (event.data.command === "abort") {
153
+ controller.abort()
154
+ }
155
+ })
156
+
157
+ async function generate(data) {
158
+ const {
159
+ weightsURL,
160
+ modelID,
161
+ tokenizerURL,
162
+ configURL,
163
+ quantized,
164
+ prompt,
165
+ temp,
166
+ top_p,
167
+ repeatPenalty,
168
+ seed,
169
+ maxSeqLen,
170
+ stuff
171
+ } = data
172
+ try {
173
+ self.postMessage({ status: "loading", message: "Starting Phi" })
174
+ const model = await Phi.getInstance(
175
+ weightsURL,
176
+ modelID,
177
+ tokenizerURL,
178
+ configURL,
179
+ quantized
180
+ )
181
+
182
+ self.postMessage({ status: "loading", message: "Initializing model" })
183
+ const firstToken = model.init_with_prompt(
184
+ prompt,
185
+ temp,
186
+ top_p,
187
+ repeatPenalty,
188
+ 64,
189
+ BigInt(seed)
190
+ )
191
+ const seq_len = 2048
192
+
193
+ let sentence = firstToken
194
+ let maxTokens = maxSeqLen ? maxSeqLen : seq_len - prompt.length - 1
195
+ let startTime = performance.now()
196
+ let tokensCount = 0
197
+ while (tokensCount < maxTokens) {
198
+ await new Promise(async (resolve) => {
199
+ if (controller && controller.signal.aborted) {
200
+ self.postMessage({
201
+ status: "aborted",
202
+ message: "Aborted",
203
+ output: prompt + sentence,
204
+ })
205
+ return
206
+ }
207
+ const token = await model.next_token()
208
+ const terminates = `<|endoftext|>, <|user|>, <|system|>, <|assistant|>`.split(', ').map(e => e.trim())
209
+ if (terminates.includes(token)) {
210
+ self.postMessage({
211
+ status: "complete",
212
+ message: "complete",
213
+ output: prompt + sentence,
214
+ })
215
+ return
216
+ }
217
+ const tokensSec =
218
+ ((tokensCount + 1) / (performance.now() - startTime)) * 1000
219
+
220
+ sentence += token
221
+ self.postMessage({
222
+ status: "generating",
223
+ message: "Generating token",
224
+ token: token,
225
+ sentence: sentence,
226
+ totalTime: performance.now() - startTime,
227
+ tokensSec,
228
+ prompt: prompt,
229
+ })
230
+ setTimeout(resolve, 0)
231
+ })
232
+ tokensCount++
233
+ }
234
+ self.postMessage({
235
+ status: "complete",
236
+ message: "complete",
237
+ output: prompt + sentence,
238
+ })
239
+ } catch (e) {
240
+ self.postMessage({ error: e })
241
+ }
242
+ }
phi-offline/style.css ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ body {
2
+ padding: 2rem;
3
+ font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
4
+ }
5
+
6
+ h1 {
7
+ font-size: 16px;
8
+ margin-top: 0;
9
+ }
10
+
11
+ p {
12
+ color: rgb(107, 114, 128);
13
+ font-size: 15px;
14
+ margin-bottom: 10px;
15
+ margin-top: 5px;
16
+ }
17
+
18
+ .card {
19
+ max-width: 620px;
20
+ margin: 0 auto;
21
+ padding: 16px;
22
+ border: 1px solid lightgray;
23
+ border-radius: 16px;
24
+ }
25
+
26
+ .card p:last-child {
27
+ margin-bottom: 0;
28
+ }