reach-vb HF staff ggerganov commited on
Commit
f4651d4
·
verified ·
1 Parent(s): ef80b76

Minor improvement suggestions (#28)

Browse files

- app : clear trailing whitespace (054f452b5cc97f343a1e3b9406be7e48006f580f)
- app : do not add traililng whitespace in prompt using llama-cli (36c74fdb2029320a98a0b09cf47b367d1e1ee52c)
- start : add -j to make command (should be faster) (21eb7b52991f49368a9fe02c2e88cf60120b8aff)


Co-authored-by: Georgi Gerganov <[email protected]>

Files changed (2) hide show
  1. app.py +13 -14
  2. start.sh +1 -1
app.py CHANGED
@@ -26,12 +26,12 @@ def script_to_use(model_id, api):
26
  def process_model(model_id, q_method, hf_token, private_repo):
27
  model_name = model_id.split('/')[-1]
28
  fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
29
-
30
  try:
31
  api = HfApi(token=hf_token)
32
 
33
  dl_pattern = ["*.md", "*.json", "*.model"]
34
-
35
  pattern = (
36
  "*.safetensors"
37
  if any(
@@ -48,7 +48,7 @@ def process_model(model_id, q_method, hf_token, private_repo):
48
 
49
  snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
50
  print("Model downloaded successully!")
51
-
52
  conversion_script = script_to_use(model_id, api)
53
  fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
54
  result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
@@ -90,13 +90,13 @@ def process_model(model_id, q_method, hf_token, private_repo):
90
  Invoke the llama.cpp server or the CLI.
91
 
92
  CLI:
93
-
94
  ```bash
95
- llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
96
  ```
97
 
98
  Server:
99
-
100
  ```bash
101
  llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
102
  ```
@@ -139,22 +139,22 @@ def process_model(model_id, q_method, hf_token, private_repo):
139
 
140
  # Create Gradio interface
141
  iface = gr.Interface(
142
- fn=process_model,
143
  inputs=[
144
  gr.Textbox(
145
- lines=1,
146
  label="Hub Model ID",
147
  info="Model repo ID",
148
  ),
149
  gr.Dropdown(
150
- ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
151
- label="Quantization Method",
152
  info="GGML quantisation type",
153
  value="Q4_K_M",
154
  filterable=False
155
  ),
156
  gr.Textbox(
157
- lines=1,
158
  label="HF Write Token",
159
  info="https://hf.co/settings/token",
160
  type="password",
@@ -164,7 +164,7 @@ iface = gr.Interface(
164
  label="Private Repo",
165
  info="Create a private repo under your username."
166
  )
167
- ],
168
  outputs=[
169
  gr.Markdown(label="output"),
170
  gr.Image(show_label=False),
@@ -172,8 +172,7 @@ iface = gr.Interface(
172
  title="Create your own GGUF Quants, blazingly fast ⚡!",
173
  description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
174
  article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
175
-
176
  )
177
 
178
  # Launch the interface
179
- iface.launch(debug=True)
 
26
  def process_model(model_id, q_method, hf_token, private_repo):
27
  model_name = model_id.split('/')[-1]
28
  fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
29
+
30
  try:
31
  api = HfApi(token=hf_token)
32
 
33
  dl_pattern = ["*.md", "*.json", "*.model"]
34
+
35
  pattern = (
36
  "*.safetensors"
37
  if any(
 
48
 
49
  snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
50
  print("Model downloaded successully!")
51
+
52
  conversion_script = script_to_use(model_id, api)
53
  fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
54
  result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
 
90
  Invoke the llama.cpp server or the CLI.
91
 
92
  CLI:
93
+
94
  ```bash
95
+ llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is"
96
  ```
97
 
98
  Server:
99
+
100
  ```bash
101
  llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
102
  ```
 
139
 
140
  # Create Gradio interface
141
  iface = gr.Interface(
142
+ fn=process_model,
143
  inputs=[
144
  gr.Textbox(
145
+ lines=1,
146
  label="Hub Model ID",
147
  info="Model repo ID",
148
  ),
149
  gr.Dropdown(
150
+ ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
151
+ label="Quantization Method",
152
  info="GGML quantisation type",
153
  value="Q4_K_M",
154
  filterable=False
155
  ),
156
  gr.Textbox(
157
+ lines=1,
158
  label="HF Write Token",
159
  info="https://hf.co/settings/token",
160
  type="password",
 
164
  label="Private Repo",
165
  info="Create a private repo under your username."
166
  )
167
+ ],
168
  outputs=[
169
  gr.Markdown(label="output"),
170
  gr.Image(show_label=False),
 
172
  title="Create your own GGUF Quants, blazingly fast ⚡!",
173
  description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
174
  article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
 
175
  )
176
 
177
  # Launch the interface
178
+ iface.launch(debug=True)
start.sh CHANGED
@@ -1,4 +1,4 @@
1
  cd llama.cpp
2
- make quantize
3
  cd ..
4
  python app.py
 
1
  cd llama.cpp
2
+ make -j quantize
3
  cd ..
4
  python app.py