reach-vb HF staff commited on
Commit
31ebe9e
·
verified ·
1 Parent(s): 1504cda

Update the default model created with the space. (#74)

Browse files

- Update app.py (9b6f1ba8f84e2d08078faacad363a09c8d64f087)
- Update app.py (472bd039f0ae12bb4c305eac4ec023f14757796a)

Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -119,31 +119,53 @@ def process_model(model_id, q_method, private_repo, split_model, split_max_tenso
119
  card.data.tags = []
120
  card.data.tags.append("llama-cpp")
121
  card.data.tags.append("gguf-my-repo")
 
 
 
122
  card.text = dedent(
123
  f"""
124
  # {new_repo_id}
125
  This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
126
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
 
127
  ## Use with llama.cpp
128
- Install llama.cpp through brew.
 
129
  ```bash
130
- brew install ggerganov/ggerganov/llama.cpp
 
131
  ```
132
  Invoke the llama.cpp server or the CLI.
133
- CLI:
 
134
  ```bash
135
- llama-cli --hf-repo {new_repo_id} --model {quantized_gguf_name} -p "The meaning to life and the universe is"
136
  ```
137
- Server:
 
138
  ```bash
139
- llama-server --hf-repo {new_repo_id} --model {quantized_gguf_name} -c 2048
140
  ```
 
141
  Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ```
143
- git clone https://github.com/ggerganov/llama.cpp && \\
144
- cd llama.cpp && \\
145
- make && \\
146
- ./main -m {quantized_gguf_name} -n 128
147
  ```
148
  """
149
  )
 
119
  card.data.tags = []
120
  card.data.tags.append("llama-cpp")
121
  card.data.tags.append("gguf-my-repo")
122
+ if card.data.base_model is None:
123
+ card.data.base_model = []
124
+ card.data.base_model.append({model_id})
125
  card.text = dedent(
126
  f"""
127
  # {new_repo_id}
128
  This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
129
  Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
130
+
131
  ## Use with llama.cpp
132
+ Install llama.cpp through brew (works on Mac and Linux)
133
+
134
  ```bash
135
+ brew install llama.cpp
136
+
137
  ```
138
  Invoke the llama.cpp server or the CLI.
139
+
140
+ ### CLI:
141
  ```bash
142
+ llama --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
143
  ```
144
+
145
+ ### Server:
146
  ```bash
147
+ llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
148
  ```
149
+
150
  Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
151
+
152
+ Step 1: Clone llama.cpp from GitHub.
153
+ ```
154
+ git clone https://github.com/ggerganov/llama.cpp
155
+ ```
156
+
157
+ Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
158
+ ```
159
+ cd llama.cpp && LLAMA_CURL=1 make
160
+ ```
161
+
162
+ Step 3: Run inference through the main binary.
163
+ ```
164
+ ./main --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
165
+ ```
166
+ or
167
  ```
168
+ ./server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
 
 
 
169
  ```
170
  """
171
  )