File size: 24,709 Bytes
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6551c6
 
 
 
 
 
 
fd2cf9d
 
c6551c6
 
fd2cf9d
 
c6551c6
 
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24da852
cb7ca95
f6519fe
 
fd2cf9d
 
24da852
cb7ca95
f6519fe
 
fd2cf9d
 
 
 
 
 
 
cbf6da2
 
fd2cf9d
 
cbf6da2
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107ba5f
fd2cf9d
 
 
 
107ba5f
fd2cf9d
 
107ba5f
 
fd2cf9d
 
 
 
 
 
 
0b4ceff
 
 
 
107ba5f
0b4ceff
 
 
 
 
 
107ba5f
0b4ceff
fd2cf9d
0b4ceff
 
107ba5f
 
 
 
0b4ceff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd2cf9d
 
 
 
 
 
 
 
 
 
 
595991f
 
 
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
5afc20b
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9243531
fd2cf9d
 
 
 
 
b82f52b
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107ba5f
fd2cf9d
 
107ba5f
fd2cf9d
107ba5f
 
 
 
 
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c6551c6
 
 
 
 
 
 
 
fd2cf9d
c6551c6
 
fd2cf9d
 
c6551c6
 
fd2cf9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <script>
      function strToHtml(str) {
        let parser = new DOMParser();
        return parser.parseFromString(str, "text/html");
      }

      //Short, jQuery-independent function to read html table and write them into an Array.
      //Kudos to RobG at StackOverflow
      function tableToObj(table) {
        var rows = table.rows;
        var propCells = rows[0].cells;
        var propNames = [];
        var results = [];
        var obj, row, cells;

        // Use the first row for the property names
        // Could use a header section but result is the same if
        // there is only one header row
        for (var i = 0, iLen = propCells.length; i < iLen; i++) {
          propNames.push(
            (propCells[i].textContent || propCells[i].innerText).trim()
          );
        }

        // Use the rows for data
        // Could use tbody rows here to exclude header & footer
        // but starting from 1 gives required result
        for (var j = 1, jLen = rows.length; j < jLen; j++) {
          cells = rows[j].cells;
          obj = {};

          for (var k = 0; k < iLen; k++) {
            obj[propNames[k]] = (
              cells[k].textContent || cells[k].innerText
            ).trim();
          }
          results.push(obj);
        }
        return results;
      }

      function formatGpu(gpus) {
        return gpus.map(
          (g) => `${g["Product Name"]} - ${g["Memory"].split(",")[0]}`
        );
      }

      const gguf_quants = {
        "IQ1_S": 1.56,
        "IQ2_XXS": 2.06,
        "IQ2_XS": 2.31,
        "IQ2_S": 2.5,
        "IQ2_M": 2.7,
        "IQ3_XXS": 3.06,
        "IQ3_XS": 3.3,
        "Q2_K": 3.35,
        "Q3_K_S": 3.5,
        "IQ3_S": 3.5,
        "IQ3_M": 3.7,
        "Q3_K_M": 3.91,
        "Q3_K_L": 4.27,
        "IQ4_XS": 4.25,
        "IQ4_NL": 4.5,
        "Q4_0": 4.55,
        "Q4_K_S": 4.58,
        "Q4_K_M": 4.85,
        "Q5_0": 5.54,
        "Q5_K_S": 5.54,
        "Q5_K_M": 5.69,
        "Q6_K": 6.59,
        "Q8_0": 8.5,
      }
  
      async function modelConfig(hf_model) {
        let config = await fetch(
          `https://huggingface.co/${hf_model}/raw/main/config.json`
        ).then(r => r.json())
        let model_size = 0
        try {
          model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/model.safetensors.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
          if (isNaN(model_size)) {
            throw new Erorr("no size in safetensors metadata")
          }
        } catch (e) {
          try {
            model_size = (await fetch(`https://huggingface.co/${hf_model}/resolve/main/pytorch_model.bin.index.json`).then(r => r.json()))["metadata"]["total_size"] / 2
            if (isNaN(model_size)) {
              throw new Erorr("no size in pytorch metadata")
            }
          } catch {
            let model_page = await fetch(
                "https://corsproxy.io/?" + encodeURIComponent(`https://huggingface.co/${hf_model}`)
            ).then(r => r.text())
            let el = document.createElement( 'html' );
            el.innerHTML = model_page
            let params_el = el.querySelector('div[data-target="ModelSafetensorsParams"]')
            if (params_el !== null) {
              model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["safetensors"]["total"]
            } else {
              params_el = el.querySelector('div[data-target="ModelHeader"]')
              model_size = JSON.parse(params_el.attributes.getNamedItem("data-props").value)["model"]["safetensors"]["total"]
            }
          }
        }
        config.parameters = model_size
        return config
      }

      function inputBuffer(context=8192, model_config, bsz=512) {
        /* Calculation taken from github:ggerganov/llama.cpp/llama.cpp:11248
          ctx->inp_tokens  = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
          ctx->inp_embd    = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, hparams.n_embd, cparams.n_batch);
          ctx->inp_pos     = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_batch);
          ctx->inp_KQ_mask = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, cparams.n_ctx, cparams.n_batch);
          ctx->inp_K_shift = ggml_new_tensor_1d(ctx->ctx_input, GGML_TYPE_I32, cparams.n_ctx);
          ctx->inp_sum     = ggml_new_tensor_2d(ctx->ctx_input, GGML_TYPE_F32, 1, cparams.n_batch);

          n_embd is hidden size (github:ggeranov/llama.cpp/convert.py:248)
        */
        const inp_tokens = bsz
        const inp_embd = model_config["hidden_size"] * bsz
        const inp_pos = bsz
        const inp_KQ_mask = context * bsz
        const inp_K_shift = context
        const inp_sum = bsz

        return inp_tokens + inp_embd + inp_pos + inp_KQ_mask + inp_K_shift + inp_sum
      }

      function computeBuffer(context=8192, model_config, bsz=512) {
        if (bsz != 512) {
          alert("batch size other than 512 is currently not supported for the compute buffer, using batchsize 512 for compute buffer calculation, end result result will be an overestimatition")
        }
        return (context / 1024 * 2 + 0.75) * model_config["num_attention_heads"] * 1024 * 1024
      }

      function kvCache(context=8192, model_config, cache_bit=16) {
        const n_gqa = model_config["num_attention_heads"] / model_config["num_key_value_heads"]
        const n_embd_gqa = model_config["hidden_size"] / n_gqa
        const n_elements = n_embd_gqa * (model_config["num_hidden_layers"] * context)
        const size = 2 * n_elements
        return size * (cache_bit / 8)
      }

      function contextSize(context=8192, model_config, bsz=512, cache_bit=16) {
        return Number.parseFloat((inputBuffer(context, model_config, bsz) + kvCache(context, model_config, cache_bit) + computeBuffer(context, model_config, bsz)).toFixed(2))
      }

      function modelSize(model_config, bpw=4.5) {
        return Number.parseFloat((model_config["parameters"] * bpw / 8).toFixed(2))
      }

      async function calculateSizes(format) {
        try {
          const model_config = await modelConfig(document.getElementById("modelsearch").value)
          const context = parseInt(document.getElementById("contextsize").value)
          let bsz = 512
          let cache_bit = 16
          let bpw = 0
          if (format === "gguf") {
            bsz = parseInt(document.getElementById("batchsize").value)
            bpw = gguf_quants[document.getElementById("quantsize").innerText]
  
          } else if (format == "exl2") {
            cache_bit = Number.parseInt(document.getElementById("kvCache").value)
            bpw = Number.parseFloat(document.getElementById("bpw").value)
          }
  
          const model_size = modelSize(model_config, bpw)
          const context_size = contextSize(context, model_config, bsz, cache_bit)
          const total_size = ((model_size + context_size) / 2**30)
          document.getElementById("resultmodel").innerText = (model_size / 2**30).toFixed(2)
          document.getElementById("resultcontext").innerText = (context_size / 2**30).toFixed(2)
          const result_total_el = document.getElementById("resulttotal");
          result_total_el.innerText = total_size.toFixed(2)
  
          const gpu = document.getElementById("gpusearch").value
          if (gpu !== "") {
            const vram = parseFloat(gpu.split("-")[1].replace("GB", "").trim())
            if (vram - total_size > 0.5) {
              result_total_el.style.backgroundColor = "#bef264"
            } else if (vram - total_size > 0) {
              result_total_el.style.backgroundColor = "#facc15"
            } else {
              result_total_el.style.backgroundColor = "#ef4444"
            }
          }
        } catch(e) {
          alert(e);
        }
      }
    </script>
    <link href="./styles.css" rel="stylesheet">
    <title>Can I run it? - LLM VRAM Calculator</title>
  </head>
  <body class="p-8">
    <div x-data="{ format: 'gguf' }" class="flex flex-col max-h-screen items-center mt-16 gap-10">
      <h1 class="text-xl font-semibold leading-6 text-gray-900">
        LLM Model, Can I run it?
      </h1>
      <p>
        *This does not support gated or private repos
      </p>
      <div class="flex flex-col gap-10">
        <div class="w-auto flex flex-col gap-4">
          <!-- GPU Selector -->
          <div
            class="relative"
            x-data="{
              results: null,
              query: null
            }"
          >
            <label
              for="gpusearch"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >GPU (optional)</label
            >
            <input
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              placeholder="GeForce RTX 3090 - 24 GB"
              id="gpusearch"
              name="gpusearch"
              list="gpulist"
              x-model="query"
              @keypress.debounce.150ms="results = query === '' ? [] : formatGpu(tableToObj(strToHtml(await fetch('https://corsproxy.io/?https://www.techpowerup.com/gpu-specs/?ajaxsrch=' + query).then(r => r.text())).querySelector('table')))"
            />
            <datalist id="gpulist">
              <template x-for="item in results">
                <option :value="item" x-text="item"></option>
              </template>
            </datalist>
          </div>
          <!-- Model Selector -->


          <div class="flex flex-row gap-4 relative">
            <label
              for="contextsize"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Model (unquantized)
            </label>
            <div
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              x-data="{
                          open: false,
                          value: 'Nexusflow/Starling-LM-7B-beta',
                          results: null,
                          toggle() {
                            if (this.open) {
                              return this.close()
                            }

                            this.$refs.input.focus()
                
                            this.open = true
                          },
                          close(focusAfter) {
                            if (! this.open) return
              
                            this.open = false
              
                            focusAfter && focusAfter.focus()
                          }
                        }"
              x-on:keydown.escape.prevent.stop="close($refs.input)"
              x-id="['model-typeahead']"
              class="relative"
            >
              <!-- Input -->
              <input
                id="modelsearch"
                x-ref="input"
                x-on:click="toggle()"
                @keypress.debounce.150ms="results = (await
                    fetch('https://huggingface.co/api/quicksearch?type=model&q=' +
                    encodeURIComponent(value)).then(r => r.json())).models.filter(m => !m.id.includes('GGUF') && !m.id.includes('AWQ') && !m.id.includes('GPTQ') && !m.id.includes('exl2'));"
                :aria-expanded="open"
                :aria-controls="$id('model-typeahead')"
                x-model="value"
                class="flex justify-between items-center gap-2 w-full"
              />

              <!-- Panel -->
              <div
                x-ref="panel"
                x-show="open"
                x-transition.origin.top.left
                x-on:click.outside="close($refs.input)"
                :id="$id('model-typeahead')"
                style="display: none"
                class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
              >
                <template x-for="result in results">
                  <a
                    @click="value = result.id; close($refs.input)"
                    x-text="result.id"
                    class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
                  ></a>
                </template>
              </div>
            </div>
          </div>


          <!-- Context Size Selector -->
          <div class="relative">
            <label
              for="contextsize"
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Context Size
            </label>
            <input
              value="8192"
              type="number"
              name="contextsize"
              id="contextsize"
              step="1024"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            />
          </div>
          <!-- Quant Format Selector -->
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >Quant Format</label
            >
            <fieldset
              x-model="format"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >
              <legend class="sr-only">Quant format</legend>
              <div
                class="space-y-4 sm:flex sm:items-center sm:space-x-10 sm:space-y-0"
              >
                <div class="flex items-center">
                  <input
                    id="gguf-format"
                    name="quant-format"
                    type="radio"
                    value="gguf"
                    checked
                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
                  />
                  <label
                    for="gguf-format"
                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
                    >GGUF</label
                  >
                </div>
                <div class="flex items-center">
                  <input
                    id="exl2-format"
                    name="quant-format"
                    type="radio"
                    value="exl2"
                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
                  />
                  <label
                    for="exl2-format"
                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
                    >EXL2</label
                  >
                </div>
                <div class="flex items-center">
                  <input
                    id="gptq-format"
                    name="quant-format"
                    type="radio"
                    disabled
                    value="gptq"
                    class="h-4 w-4 border-gray-300 text-indigo-600 focus:ring-indigo-600"
                  />
                  <label
                    for="gptq-format"
                    class="ml-3 block text-sm font-medium leading-6 text-gray-900"
                    >GPTQ (coming soon)</label
                  >
                </div>
              </div>
            </fieldset>
          </div>
          <!-- EXL2 Options -->
          <div x-show="format === 'exl2'" class="flex flex-row gap-4">
            <div class="relative flex-grow">
              <label
                for="bpw"
                class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >
                BPW
              </label>
              <input
                value="4.5"
                type="number"
                step="0.01"
                id="bpw"
                name="bpw"
                class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              />
            </div>
            <div
              class="flex-shrink relative rounded-md"
            >
              <div
                class="w-fit p-3 h-full flex items-center gap-2 justify-center rounded-md border-0 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
              >
                <label
                  for="kvCache"
                  class="inline-block bg-white text-xs font-medium text-gray-900"
                >
                  KV Cache
                </label>
                <select id="kvCache" name="kvCache">
                  <option value="16">16 bit</option>
                  <option value="8">8 bit</option>
                  <option value="4">4 bit</option>
                </select>
              </div>
            </div>
          </div>
          <!-- GGUF Options -->
          <div x-show="format === 'gguf'" class="relative">
            <div class="flex flex-row gap-4">
              <label
                for="contextsize"
                class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
              >
                Quantization Size
              </label>
              <div
                class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
                x-data="{
                open: false,
                value: '',
                toggle() {
                  if (this.open) {
                    return this.close()
                  }

                  this.$refs.button.focus()
      
                  this.open = true
                },
                close(focusAfter) {
                  if (! this.open) return
    
                  this.open = false
    
                  focusAfter && focusAfter.focus()
                }
              }"
                x-on:keydown.escape.prevent.stop="close($refs.button)"
                x-id="['dropdown-button']"
                class="relative"
              >
                <!-- Button -->
                <button
                  x-ref="button"
                  x-on:click="toggle()"
                  :aria-expanded="open"
                  :aria-controls="$id('dropdown-button')"
                  type="button"
                  id="quantsize"
                  x-text="value.length === 0 ? 'Q4_K_S' : value"
                  class="flex justify-between items-center gap-2 w-full"
                >
                  Q4_K_S

                  <!-- Heroicon: chevron-down -->
                  <svg
                    xmlns="http://www.w3.org/2000/svg"
                    class="h-5 w-5 text-gray-400"
                    viewBox="0 0 20 20"
                    fill="currentColor"
                  >
                    <path
                      fill-rule="evenodd"
                      d="M5.293 7.293a1 1 0 011.414 0L10 10.586l3.293-3.293a1 1 0 111.414 1.414l-4 4a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414z"
                      clip-rule="evenodd"
                    />
                  </svg>
                </button>

                <!-- Panel -->
                <div
                  x-data="{ quants: [
                  'IQ1_S',
                  'IQ2_XXS',
                  'IQ2_XS',
                  'IQ2_S',
                  'IQ2_M',
                  'IQ3_XXS',
                  'IQ3_XS',
                  'Q2_K',
                  'Q3_K_S',
                  'IQ3_S',
                  'IQ3_M',
                  'Q3_K_M',
                  'Q3_K_L',
                  'IQ4_XS',
                  'IQ4_NL',
                  'Q4_0',
                  'Q4_K_S',
                  'Q4_K_M',
                  'Q5_0',
                  'Q5_K_S',
                  'Q5_K_M',
                  'Q6_K',
                  'Q8_0'
                ]}"
                  x-ref="panel"
                  x-show="open"
                  x-transition.origin.top.left
                  x-on:click.outside="close($refs.button)"
                  :id="$id('dropdown-button')"
                  style="display: none"
                  class="absolute left-0 mt-4 w-full rounded-md bg-white shadow-sm ring-1 ring-inset ring-gray-300 z-10"
                >
                  <template x-for="quant in quants">
                    <a
                      @click="value = quant; close($refs.button)"
                      x-text="quant"
                      class="flex cursor-pointer items-center gap-2 w-full first-of-type:rounded-t-md last-of-type:rounded-b-md px-4 py-2.5 text-left text-sm hover:bg-gray-500/5 disabled:text-gray-500"
                    ></a>
                  </template>
                </div>
              </div>
              <div class="relative">
                <label
                  for="batchsize"
                  class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
                >
                  Batch Size
                </label>
                <input
                  value="512"
                  type="number"
                  step="128"
                  id="batchsize"
                  class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
                />
              </div>
            </div>
          </div>
          <button
            type="button"
            class="rounded-md bg-slate-800 px-3 py-2 text-sm font-semibold text-white shadow-sm hover:bg-slate-700 focus-visible:outline focus-visible:outline-2 focus-visible:outline-offset-2 focus-visible:outline-indigo-600"
            @click="calculateSizes(format)"
          >
            Submit
          </button>
        </div>
        <div class="w-auto flex flex-col gap-4">
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Model Size (GB)
            </label>
            <div
              id="resultmodel"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >4.20</div>
          </div>
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Context Size (GB)
            </label>
            <div
              id="resultcontext"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >6.90</div>
          </div>
          <div class="relative">
            <label
              class="absolute -top-2 left-2 inline-block bg-white px-1 text-xs font-medium text-gray-900"
            >
              Total Size (GB)
            </label>
            <div
              id="resulttotal"
              class="block w-full rounded-md border-0 p-3 text-gray-900 shadow-sm ring-1 ring-inset ring-gray-300 placeholder:text-gray-400 focus:ring-2 focus:ring-inset focus:ring-indigo-600 sm:text-sm sm:leading-6"
            >420.69</div>
          </div>
        </div>
      </div>
    </div>
    <script
      src="https://cdn.jsdelivr.net/npm/[email protected]/dist/cdn.min.js"
    ></script>
    <script defer>
      calculateSizes("gguf")
    </script>
  </body>
</html>