NyxKrage commited on
Commit
bbd65b3
·
verified ·
1 Parent(s): 4fe61e8

Delete index.html

Browse files
Files changed (1) hide show
  1. index.html +0 -570
index.html DELETED
@@ -1,570 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8" />
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <script>
7
- function strToHtml(str) {
8
- let parser = new DOMParser();
9
- return parser.parseFromString(str, "text/html");
10
- }
11
-
12
- //Short, jQuery-independent function to read html table and write them into an Array.
13
- //Kudos to RobG at StackOverflow
14
- function tableToObj(table) {
15
- var rows = table.rows;
16
- var propCells = rows[0].cells;
17
- var propNames = [];
18
- var results = [];
19
- var obj, row, cells;
20
-
21
- // Use the first row for the property names
22
- // Could use a header section but result is the same if
23
- // there is only one header row
24
- for (var i = 0, iLen = propCells.length; i < iLen; i++) {
25
- propNames.push(
26
- (propCells[i].textContent || propCells[i].innerText).trim()
27
- );
28
- }
29
-
30
- // Use the rows for data
31
- // Could use tbody rows here to exclude header & footer
32
- // but starting from 1 gives required result
33
- for (var j = 1, jLen = rows.length; j < jLen; j++) {
34
- cells = rows[j].cells;
35
- obj = {};
36
-
37
- for (var k = 0; k < iLen; k++) {
38
- obj[propNames[k]] = (
39
- cells[k].textContent || cells[k].innerText
40
- ).trim();
41
- }
42
- results.push(obj);
43
- }
44
- return results;
45
- }
46
-
47
- function formatGpu(gpus) {
48
- return gpus.map(
49
- (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
50
- );
51
- }
52
-
53
- const gguf_quants = {
54
- "Q2_K": 3.35,
55
- "Q3_K_S": 3.5,
56
- "Q3_K_M": 3.91,
57
- "Q3_K_L": 4.27,
58
- "Q4_0": 4.55,
59
- "Q4_K_S": 4.58,
60
- "Q4_K_M": 4.85,
61
- "Q5_0": 5.54,
62
- "Q5_K_S": 5.54,
63
- "Q5_K_M": 5.69,
64
- "Q6_K": 6.59,
65
- "Q8_0": 8.5,
66
- }
67
-
68
- async function modelConfig(hf_model) {
69
- let config = await fetch(
70
- `https://huggingface.co/${hf_model}/raw/main/config.json`
71
- ).then(r => r.json())
72
- let model_size = 0
73
- try {
74
- model_size = (await fetch(`https://huggingface.co/${hf_model}/raw/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
75
- } catch (e) {
76
- try {
77
- model_size = await fetch(`https://huggingface.co/${hf_model}/raw/main/pytorch_model.bin.index.json`).then(r => r.json())["metadata"]["total_size"] / 2
78
- } catch {
79
- let model_page = await fetch(
80
- "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
81
- ).then(r => r.text())
82
- let el = document.createElement( 'html' );
83
- el.innerHTML = model_page
84
- let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
85
- if (params_el === null) {
86
- model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
87
- } else {
88
- params_el = el.querySelector('div[data-target="ModelHeader"]')
89
- model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
90
- }
91
- }
92
- }
93
- config.parameters = model_size
94
- return config
95
- }
96
-
97
- function inputBuffer(context=8192, model_config, bsz=512) {
98
- /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
99
- ctx->inp_tokens = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
100
- ctx->inp_embd = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
101
- ctx->inp_pos = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
102
- ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
103
- ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
104
- ctx->inp_sum = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);
105
-
106
- n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
107
- */
108
- const inp_tokens = bsz
109
- const inp_embd = model_config["hidden_size"] * bsz
110
- const inp_pos = bsz
111
- const inp_KQ_mask = context * bsz
112
- const inp_K_shift = context
113
- const inp_sum = bsz
114
-
115
- return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
116
- }
117
-
118
- function computeBuffer(context=8192, model_config, bsz=512) {
119
- if (bsz != 512) {
120
- alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
121
- }
122
- return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
123
- }
124
-
125
- function kvCache(context=8192, model_config, fp8_cache=false) {
126
- const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
127
- const n_embd_gqa = model_config["hidden_size"] / n_gqa
128
- const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
129
- const size = 2 * n_elements
130
- if (fp8_cache) {
131
- return size
132
- }
133
- return size * 2
134
- }
135
-
136
- function contextSize(context=8192, model_config, bsz=512, fp8_cache=false) {
137
- return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, fp8_cache) + computeBuffer(context, model_config, bsz)).toFixed(2))
138
- }
139
-
140
- function modelSize(model_config, bpw=4.5) {
141
- return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
142
- }
143
-
144
- async function calculateSizes(format) {
145
- const model_config = await modelConfig(document.getElementById("modelsearch").value)
146
- const context = parseInt(document.getElementById("contextsize").value)
147
- let bsz = 512
148
- let fp8_cache = false
149
- let bpw = 0
150
- if (format === "gguf") {
151
- bsz = parseInt(document.getElementById("batchsize").value)
152
- bpw = gguf_quants[document.getElementById("quantsize").innerText]
153
-
154
- } else if (format == "exl2") {
155
- fp8_cache = document.getElementById("fp8cache").checked
156
- bpw = Number.parseFloat(document.getElementById("bpw").value)
157
- }
158
-
159
- const model_size = modelSize(model_config, bpw)
160
- const context_size = contextSize(context, model_config, bsz, fp8_cache)
161
- const total_size = ((model_size + context_size) / 1e+9)
162
- document.getElementById("resultmodel").innerText = (model_size / 1e+9).toFixed(2)
163
- document.getElementById("resultcontext").innerText = (context_size / 1e+9).toFixed(2)
164
- const result_total_el = document.getElementById("resulttotal");
165
- result_total_el.innerText = total_size.toFixed(2)
166
-
167
- const gpu = document.getElementById("gpusearch").value
168
- if (gpu !== "") {
169
- const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim())
170
- if (vram - total_size > 0.5) {
171
- result_total_el.style.backgroundColor = "#bef264"
172
- } else if (vram - total_size > 0) {
173
- result_total_el.style.backgroundColor = "#facc15"
174
- } else {
175
- result_total_el.style.backgroundColor = "#ef4444"
176
- }
177
- }
178
-
179
- }
180
- </script>
181
- <link href="./styles.css" rel="stylesheet">
182
- <title>Can I run it? - LLM VRAM Calculator</title>
183
- </head>
184
- <body class="p-8">
185
- <div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
186
- <h1 class="text-xl font-semibold leading-6 text-gray-900">
187
- LLM Model, Can I run it?
188
- </h1>
189
- <div class="flex flex-col gap-10">
190
- <div class="w-auto flex flex-col gap-4">
191
- <!-- GPU Selector -->
192
- <div
193
- class="relative"
194
- x-data="{
195
- results: null,
196
- query: null
197
- }"
198
- >
199
- <label
200
- for="gpusearch"
201
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
202
- >GPU</label
203
- >
204
- <input
205
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
206
- placeholder="GeForce RTX 3090 - 24 GB"
207
- id="gpusearch"
208
- name="gpusearch"
209
- list="gpulist"
210
- x-model="query"
211
- @keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))"
212
- />
213
- <datalist id="gpulist">
214
- <template x-for="item in results">
215
- <option :value="item" x-text="item"></option>
216
- </template>
217
- </datalist>
218
- </div>
219
- <!-- Model Selector -->
220
-
221
-
222
- <div class="flex flex-row gap-4 relative">
223
- <label
224
- for="contextsize"
225
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
226
- >
227
- Model
228
- </label>
229
- <div
230
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
231
- x-data="{
232
- open: false,
233
- value: 'mistralai/Mistral-7B-v0.1',
234
- results: null,
235
- toggle() {
236
- if (this.open) {
237
- return this.close()
238
- }
239
-
240
- this.$refs.input.focus()
241
-
242
- this.open = true
243
- },
244
- close(focusAfter) {
245
- if (! this.open) return
246
-
247
- this.open = false
248
-
249
- focusAfter && focusAfter.focus()
250
- }
251
- }"
252
- x-on:keydown.escape.prevent.stop="close($refs.input)"
253
- x-id="['model-typeahead']"
254
- class="relative"
255
- >
256
- <!-- Input -->
257
- <input
258
- id="modelsearch"
259
- x-ref="input"
260
- x-on:click="toggle()"
261
- @keypress.debounce.150ms="results = (await
262
- fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
263
- encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
264
- :aria-expanded="open"
265
- :aria-controls="$id('model-typeahead')"
266
- x-model="value"
267
- class="flex justify-between items-center gap-2 w-full"
268
- />
269
-
270
- <!-- Panel -->
271
- <div
272
- x-ref="panel"
273
- x-show="open"
274
- x-transition.origin.top.left
275
- x-on:click.outside="close($refs.input)"
276
- :id="$id('model-typeahead')"
277
- style="display: none"
278
- class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
279
- >
280
- <template x-for="result in results">
281
- <a
282
- @click="value = result.id; close($refs.input)"
283
- x-text="result.id"
284
- class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
285
- ></a>
286
- </template>
287
- </div>
288
- </div>
289
- </div>
290
-
291
-
292
- <!-- Context Size Selector -->
293
- <div class="relative">
294
- <label
295
- for="contextsize"
296
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
297
- >
298
- Context Size
299
- </label>
300
- <input
301
- value="8192"
302
- type="number"
303
- name="contextsize"
304
- id="contextsize"
305
- step="1024"
306
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
307
- />
308
- </div>
309
- <!-- Quant Format Selector -->
310
- <div class="relative">
311
- <label
312
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
313
- >Quant Format</label
314
- >
315
- <fieldset
316
- x-model="format"
317
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
318
- >
319
- <legend class="sr-only">Quant format</legend>
320
- <div
321
- class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0"
322
- >
323
- <div class="flex items-center">
324
- <input
325
- id="gguf-format"
326
- name="quant-format"
327
- type="radio"
328
- value="gguf"
329
- checked
330
- class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
331
- />
332
- <label
333
- for="gguf-format"
334
- class="ml-3 block text-sm font-medium leading-6 text-gray-900"
335
- >GGUF</label
336
- >
337
- </div>
338
- <div class="flex items-center">
339
- <input
340
- id="exl2-format"
341
- name="quant-format"
342
- type="radio"
343
- value="exl2"
344
- class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
345
- />
346
- <label
347
- for="exl2-format"
348
- class="ml-3 block text-sm font-medium leading-6 text-gray-900"
349
- >EXL2</label
350
- >
351
- </div>
352
- <div class="flex items-center">
353
- <input
354
- id="gptq-format"
355
- name="quant-format"
356
- type="radio"
357
- disabled
358
- value="gptq"
359
- class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
360
- />
361
- <label
362
- for="gptq-format"
363
- class="ml-3 block text-sm font-medium leading-6 text-gray-900"
364
- >GPTQ (coming soon)</label
365
- >
366
- </div>
367
- </div>
368
- </fieldset>
369
- </div>
370
- <!-- EXL2 Options -->
371
- <div x-show="format === 'exl2'" class="flex flex-row gap-4">
372
- <div class="relative flex-grow">
373
- <label
374
- for="bpw"
375
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
376
- >
377
- BPW
378
- </label>
379
- <input
380
- value="4.5"
381
- type="number"
382
- step="0.01"
383
- id="bpw"
384
- name="bpw"
385
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
386
- />
387
- </div>
388
- <div
389
- class="flex-shrink relative rounded-md"
390
- >
391
- <div
392
- class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
393
- >
394
- <label
395
- for="fp8cache"
396
- class="inline-block bg-white text-xs font-medium text-gray-900"
397
- >
398
- FP8 Cache
399
- </label>
400
- <input id="fp8cache" type="checkbox">
401
- </input>
402
- </div>
403
- </div>
404
- </div>
405
- <!-- GGUF Options -->
406
- <div x-show="format === 'gguf'" class="relative">
407
- <div class="flex flex-row gap-4">
408
- <label
409
- for="contextsize"
410
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
411
- >
412
- Quantization Size
413
- </label>
414
- <div
415
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
416
- x-data="{
417
- open: false,
418
- value: '',
419
- toggle() {
420
- if (this.open) {
421
- return this.close()
422
- }
423
-
424
- this.$refs.button.focus()
425
-
426
- this.open = true
427
- },
428
- close(focusAfter) {
429
- if (! this.open) return
430
-
431
- this.open = false
432
-
433
- focusAfter && focusAfter.focus()
434
- }
435
- }"
436
- x-on:keydown.escape.prevent.stop="close($refs.button)"
437
- x-id="['dropdown-button']"
438
- class="relative"
439
- >
440
- <!-- Button -->
441
- <button
442
- x-ref="button"
443
- x-on:click="toggle()"
444
- :aria-expanded="open"
445
- :aria-controls="$id('dropdown-button')"
446
- type="button"
447
- id="quantsize"
448
- x-text="value.length === 0 ? 'Q4_K_S' : value"
449
- class="flex justify-between items-center gap-2 w-full"
450
- >
451
- Q4_K_S
452
-
453
- <!-- Heroicon: chevron-down -->
454
- <svg
455
- xmlns="http://www.w3.org/2000/svg"
456
- class="h-5 w-5 text-gray-400"
457
- viewBox="0 0 20 20"
458
- fill="currentColor"
459
- >
460
- <path
461
- fill-rule="evenodd"
462
- d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
463
- clip-rule="evenodd"
464
- />
465
- </svg>
466
- </button>
467
-
468
- <!-- Panel -->
469
- <div
470
- x-data="{ quants: [
471
- 'Q3_K_S',
472
- 'Q3_K_M',
473
- 'Q3_K_L',
474
- 'Q4_0',
475
- 'Q4_K_S',
476
- 'Q4_K_M',
477
- 'Q5_0',
478
- 'Q5_K_S',
479
- 'Q5_K_M',
480
- 'Q6_K',
481
- 'Q8_0'
482
- ]}"
483
- x-ref="panel"
484
- x-show="open"
485
- x-transition.origin.top.left
486
- x-on:click.outside="close($refs.button)"
487
- :id="$id('dropdown-button')"
488
- style="display: none"
489
- class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
490
- >
491
- <template x-for="quant in quants">
492
- <a
493
- @click="value = quant; close($refs.button)"
494
- x-text="quant"
495
- class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
496
- ></a>
497
- </template>
498
- </div>
499
- </div>
500
- <div class="relative">
501
- <label
502
- for="batchsize"
503
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
504
- >
505
- Batch Size
506
- </label>
507
- <input
508
- value="512"
509
- type="number"
510
- step="128"
511
- id="batchsize"
512
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
513
- />
514
- </div>
515
- </div>
516
- </div>
517
- <button
518
- type="button"
519
- class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
520
- @click="calculateSizes(format)"
521
- >
522
- Submit
523
- </button>
524
- </div>
525
- <div class="w-auto flex flex-col gap-4">
526
- <div class="relative">
527
- <label
528
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
529
- >
530
- Model Size (GB)
531
- </label>
532
- <div
533
- id="resultmodel"
534
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
535
- >4.20</div>
536
- </div>
537
- <div class="relative">
538
- <label
539
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
540
- >
541
- Context Size (GB)
542
- </label>
543
- <div
544
- id="resultcontext"
545
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
546
- >6.90</div>
547
- </div>
548
- <div class="relative">
549
- <label
550
- class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
551
- >
552
- Total Size (GB)
553
- </label>
554
- <div
555
- id="resulttotal"
556
- class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
557
- >420.69</div>
558
- </div>
559
- </div>
560
- </div>
561
- </div>
562
- <script
563
- defer
564
- src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"
565
- ></script>
566
- <script defer>
567
- calculateSizes("gguf")
568
- </script>
569
- </body>
570
- </html>