Spaces:
Sleeping
Sleeping
add application file
Browse files
20.jpg
ADDED
![]() |
Dockerfile
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM ubuntu:24.04
|
2 |
+
|
3 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
+
RUN apt-get update && \
|
5 |
+
apt-get upgrade -y && \
|
6 |
+
apt-get install -y --no-install-recommends \
|
7 |
+
git \
|
8 |
+
git-lfs \
|
9 |
+
wget \
|
10 |
+
curl \
|
11 |
+
# python build dependencies \
|
12 |
+
build-essential \
|
13 |
+
libssl-dev \
|
14 |
+
zlib1g-dev \
|
15 |
+
libbz2-dev \
|
16 |
+
libreadline-dev \
|
17 |
+
libsqlite3-dev \
|
18 |
+
libncursesw5-dev \
|
19 |
+
xz-utils \
|
20 |
+
tk-dev \
|
21 |
+
libxml2-dev \
|
22 |
+
libxmlsec1-dev \
|
23 |
+
libffi-dev \
|
24 |
+
liblzma-dev \
|
25 |
+
ffmpeg
|
26 |
+
|
27 |
+
RUN useradd -m -u 1000 user
|
28 |
+
USER user
|
29 |
+
ENV HOME=/home/user \
|
30 |
+
PATH=/home/user/.local/bin:${PATH}
|
31 |
+
WORKDIR ${HOME}/app
|
32 |
+
|
33 |
+
RUN curl https://pyenv.run | bash
|
34 |
+
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
|
35 |
+
ARG PYTHON_VERSION=3.11
|
36 |
+
RUN pyenv install ${PYTHON_VERSION} && \
|
37 |
+
pyenv global ${PYTHON_VERSION} && \
|
38 |
+
pyenv rehash && \
|
39 |
+
pip install --no-cache-dir -U pip setuptools wheel && \
|
40 |
+
pip install "huggingface-hub" "hf-transfer" "gradio[oauth]" "gradio_huggingfacehub_search==0.07" "APScheduler"
|
41 |
+
|
42 |
+
COPY --chown=1000 . ${HOME}/app
|
43 |
+
RUN git clone https://github.com/ggerganov/llama.cpp
|
44 |
+
RUN pip install -r llama.cpp/requirements.txt
|
45 |
+
|
46 |
+
ENV PYTHONPATH=${HOME}/app \
|
47 |
+
PYTHONUNBUFFERED=1 \
|
48 |
+
HF_HUB_ENABLE_HF_TRANSFER=1 \
|
49 |
+
GRADIO_ALLOW_FLAGGING=never \
|
50 |
+
GRADIO_NUM_PORTS=1 \
|
51 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
52 |
+
GRADIO_THEME=huggingface \
|
53 |
+
TQDM_POSITION=-1 \
|
54 |
+
TQDM_MININTERVAL=1 \
|
55 |
+
SYSTEM=spaces
|
56 |
+
|
57 |
+
ENTRYPOINT /bin/sh start.sh
|
app.py
ADDED
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import subprocess
|
4 |
+
import signal
|
5 |
+
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
6 |
+
import gradio as gr
|
7 |
+
|
8 |
+
from huggingface_hub import create_repo, HfApi
|
9 |
+
from huggingface_hub import snapshot_download
|
10 |
+
from huggingface_hub import whoami
|
11 |
+
from huggingface_hub import ModelCard
|
12 |
+
|
13 |
+
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
14 |
+
|
15 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
16 |
+
|
17 |
+
from textwrap import dedent
|
18 |
+
|
19 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
20 |
+
|
21 |
+
def lora_to_gguf(model_path, lora_path, lora_gguf_path):
|
22 |
+
transform_command = f"!python convert_lora_to_gguf.py --base ../{model_path} {lora_gguf_path}"
|
23 |
+
|
24 |
+
os.chdir("llama.cpp")
|
25 |
+
print(f"Current working directory: {os.getcwd()}")
|
26 |
+
print(f"Files in the current directory: {os.listdir('.')}")
|
27 |
+
|
28 |
+
if not os.path.isfile(f"../{model_path}"):
|
29 |
+
raise Exception(f"Model file not found: {model_path}")
|
30 |
+
|
31 |
+
if not os.path.isfile(f"../{lora_path}"):
|
32 |
+
raise Exception(f"LoRA Adapter file not found: {lora_path}")
|
33 |
+
|
34 |
+
print("Running transform command...")
|
35 |
+
process = subprocess.Popen(transform_command, shell=True)
|
36 |
+
|
37 |
+
try:
|
38 |
+
process.wait(timeout=800)
|
39 |
+
except subprocess.TimeoutExpired:
|
40 |
+
print("Transform timed out")
|
41 |
+
process.send_signal(signal.SIGINT)
|
42 |
+
try:
|
43 |
+
process.wait(timeout=5)
|
44 |
+
except subprocess.TimeoutExpired:
|
45 |
+
print("Transform proc still didn't stop. Forcefully stopping process...")
|
46 |
+
process.kill()
|
47 |
+
|
48 |
+
os.chdir("..")
|
49 |
+
|
50 |
+
def process_lora(model_id, lora_id, oauth_token: gr.OAuthToken | None):
|
51 |
+
if oauth_token.token is None:
|
52 |
+
raise ValueError("You must be logged in to use")
|
53 |
+
model_name = model_id.split('/')[-1]
|
54 |
+
lora_name = lora_id.split('/')[-1]
|
55 |
+
fp16 = f"{lora_name}.fp16.gguf"
|
56 |
+
|
57 |
+
try:
|
58 |
+
api = HfApi(token=oauth_token.token)
|
59 |
+
|
60 |
+
dl_pattern = ["*.md", "*.json", "*.model"]
|
61 |
+
|
62 |
+
pattern = (
|
63 |
+
"*.safetensors"
|
64 |
+
if any(
|
65 |
+
file.path.endswith(".safetensors")
|
66 |
+
for file in api.list_repo_tree(
|
67 |
+
repo_id=model_id,
|
68 |
+
recursive=True,
|
69 |
+
)
|
70 |
+
)
|
71 |
+
else "*.bin"
|
72 |
+
)
|
73 |
+
|
74 |
+
dl_pattern += pattern
|
75 |
+
|
76 |
+
api.snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
|
77 |
+
print("Model downloaded successfully!")
|
78 |
+
print(f"Current working directory: {os.getcwd()}")
|
79 |
+
print(f"Model directory contents: {os.listdir(model_name)}")
|
80 |
+
|
81 |
+
api.snapshot_download(repo_id=lora_id, local_dir=lora_name, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
|
82 |
+
print("LoRA downloaded successfully!")
|
83 |
+
print(f"Current working directory: {os.getcwd()}")
|
84 |
+
print(f"LoRA directory contents: {os.listdir(lora_name)}")
|
85 |
+
|
86 |
+
conversion_script = "convert_lora_to_gguf.py"
|
87 |
+
fp16_conversion = f"python llama.cpp/{conversion_script} --base {model_name} {lora_name} --outtype f16 --outfile {fp16}"
|
88 |
+
result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
|
89 |
+
print(result)
|
90 |
+
if result.returncode != 0:
|
91 |
+
raise Exception(f"Error converting to fp16: {result.stderr}")
|
92 |
+
print("LoRA converted to fp16 successfully!")
|
93 |
+
print(f"Converted LoRA-GGUF path: {fp16}")
|
94 |
+
|
95 |
+
username = whoami(oauth_token.token)["name"]
|
96 |
+
new_repo_url = api.create_repo(repo_id=f"{username}/{lora_name}-GGUF", exist_ok=True, private=private_repo)
|
97 |
+
new_repo_id = new_repo_url.repo_id
|
98 |
+
print("Repo created successfully!", new_repo_url)
|
99 |
+
|
100 |
+
try:
|
101 |
+
card = ModelCard.load(model_id, token=oauth_token.token)
|
102 |
+
except:
|
103 |
+
card = ModelCard("")
|
104 |
+
if card.data.tags is None:
|
105 |
+
card.data.tags = []
|
106 |
+
card.data.tags.append("llama-cpp")
|
107 |
+
card.data.tags.append("LoRA-GGUF")
|
108 |
+
card.data.base_model = model_id
|
109 |
+
card.text = dedent(
|
110 |
+
f"""
|
111 |
+
# {new_repo_id}
|
112 |
+
This LoRA was converted to GGUF format from [`{lora_id}`](https://huggingface.co/{lora_id}) using llama.cpp. The base Model is [`{model_id}`](https://huggingface.co/{model_id}).
|
113 |
+
|
114 |
+
## Use with llama.cpp
|
115 |
+
You need to merge the LoRA-GGUF into the Base-Model.
|
116 |
+
"""
|
117 |
+
)
|
118 |
+
card.save(f"README.md")
|
119 |
+
|
120 |
+
try:
|
121 |
+
print(f"Uploading LoRA-GGUF: {fp16}")
|
122 |
+
api.upload_file(
|
123 |
+
path_or_fileobj=fp16,
|
124 |
+
path_in_repo=fp16,
|
125 |
+
repo_id=new_repo_id,
|
126 |
+
)
|
127 |
+
except Exception as e:
|
128 |
+
raise Exception(f"Error uploading LoRA-GGUF: {e}")
|
129 |
+
|
130 |
+
api.upload_file(
|
131 |
+
path_or_fileobj=f"README.md",
|
132 |
+
path_in_repo=f"README.md",
|
133 |
+
repo_id=new_repo_id,
|
134 |
+
)
|
135 |
+
print(f"Uploaded successfully!")
|
136 |
+
|
137 |
+
return (
|
138 |
+
f'Find your repo <a href=\'{new_repo_id}\' target="_blank" style="text-decoration:underline">here</a>',
|
139 |
+
"20.jpg",
|
140 |
+
)
|
141 |
+
except Exception as e:
|
142 |
+
return (f"Error: {e}", "error.png")
|
143 |
+
finally:
|
144 |
+
shutil.rmtree(model_name, ignore_errors=True)
|
145 |
+
shutil.rmtree(lora_name, ignore_errors=True)
|
146 |
+
print("Folder cleaned up successfully!")
|
147 |
+
|
148 |
+
|
149 |
+
|
150 |
+
|
151 |
+
|
152 |
+
css="""/* Custom CSS to allow scrolling */
|
153 |
+
.gradio-container {overflow-y: auto;}
|
154 |
+
"""
|
155 |
+
|
156 |
+
with gr.Blocks(css=css) as demo:
|
157 |
+
with gr.Row():
|
158 |
+
gr.Markdown("You must be logged in to use")
|
159 |
+
gr.LoginButton(min_width=250)
|
160 |
+
model_id = HuggingfaceHubSearch(
|
161 |
+
label="Huggingface Hub Model ID",
|
162 |
+
placeholder="Search for model id on Huggingface",
|
163 |
+
search_type="model",
|
164 |
+
)
|
165 |
+
lora_id = HuggingfaceHubSearch(
|
166 |
+
label="Huggingface Hub LoRA Model ID",
|
167 |
+
placeholder="Search for LoRA model id on Huggingface"
|
168 |
+
search_type="model",
|
169 |
+
)
|
170 |
+
private_repo = gr.Checkbox(
|
171 |
+
value=False,
|
172 |
+
label="Private Repo",
|
173 |
+
info="Create a private repo under your username."
|
174 |
+
)
|
175 |
+
|
176 |
+
iface = gr.Interface(
|
177 |
+
fn=lora_to_gguf,
|
178 |
+
inputs = [
|
179 |
+
model_id,
|
180 |
+
lora_id,
|
181 |
+
private_repo,
|
182 |
+
],
|
183 |
+
outputs = [
|
184 |
+
gr.Markdown(label="output"),
|
185 |
+
gr.Image(show_label=False),
|
186 |
+
],
|
187 |
+
titel="Create your own LoRA-GGUF",
|
188 |
+
description="The space takes an HF repo as an input.",
|
189 |
+
api_name=False
|
190 |
+
)
|
191 |
+
|
192 |
+
def restart_space():
|
193 |
+
HfApi().restart_space(repo_id="lee-ite/Merge-LoRA-into-GGUF", token=HF_TOKEN, factory_reboot=True)
|
194 |
+
|
195 |
+
scheduler = BackgroundScheduler()
|
196 |
+
scheduler.add_job(restart_space, "interval", seconds=21600)
|
197 |
+
scheduler.start()
|
198 |
+
|
199 |
+
demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
|
start.sh
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cd llama.cpp
|
2 |
+
make clean && make all -j
|
3 |
+
|
4 |
+
cd ..
|
5 |
+
python app.py
|