pandora-s commited on
Commit
1f6b391
·
verified ·
1 Parent(s): 4624aaa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -33,6 +33,8 @@ import requests
33
 
34
  from huggingface_hub import snapshot_download
35
 
 
 
36
  default_max_context = 16384
37
  default_max_output = 512
38
 
@@ -48,7 +50,7 @@ available_models = [
48
  "8.0bpw"
49
  ]
50
  dirs = {}
51
- for model in available_models:
52
  dirs.update({model: snapshot_download(repo_id="turboderp/pixtral-12b-exl2", revision=model)})
53
 
54
  @spaces.GPU(duration=45)
@@ -137,15 +139,14 @@ def run_inference(message, history, model_picked, context_size, max_output):
137
  return result
138
 
139
  description="""A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
140
- The model will be loaded once the GPU is available. This space specifically will load by default Pixtral at 4bpw from the following repository: [turboderp/pixtral-12b-exl2](https://huggingface.co/turboderp/pixtral-12b-exl2). Other quantization options are available.
141
- The current version of ExllamaV2 running is the dev branch, not the master branch: [ExllamaV2](https://github.com/turboderp/exllamav2/tree/dev).
142
-
143
  The model at **4bpw and 16k context size fits in less than 12GB of VRAM**, and at **2.5bpw and short context can potentially fit in 8GB of VRAM**!
144
 
145
  The current default settings are:
146
  - Model Quant: 4.0bpw
147
  - Context Size: 16k tokens
148
- - Max Output: 512 tokens
149
  You can select other quants and experiment!
150
 
151
  Thanks, turboderp!"""
 
33
 
34
  from huggingface_hub import snapshot_download
35
 
36
+ from tqdm import tqdm
37
+
38
  default_max_context = 16384
39
  default_max_output = 512
40
 
 
50
  "8.0bpw"
51
  ]
52
  dirs = {}
53
+ for model in tqdm(available_models):
54
  dirs.update({model: snapshot_download(repo_id="turboderp/pixtral-12b-exl2", revision=model)})
55
 
56
  @spaces.GPU(duration=45)
 
139
  return result
140
 
141
  description="""A demo chat interface with Pixtral 12B EXL2 Quants, deployed using **ExllamaV2**!
142
+ The model will be loaded once the GPU is available. This space specifically will load by default Pixtral at 4bpw from the following repository: [turboderp/pixtral-12b-exl2](https://huggingface.co/turboderp/pixtral-12b-exl2). Other quantization options are available.
143
+ The current version of ExllamaV2 running is the dev branch, not the master branch: [ExllamaV2](https://github.com/turboderp/exllamav2/tree/dev).
 
144
  The model at **4bpw and 16k context size fits in less than 12GB of VRAM**, and at **2.5bpw and short context can potentially fit in 8GB of VRAM**!
145
 
146
  The current default settings are:
147
  - Model Quant: 4.0bpw
148
  - Context Size: 16k tokens
149
+ - Max Output: 512 tokens
150
  You can select other quants and experiment!
151
 
152
  Thanks, turboderp!"""