Spaces:

m9e
/

Llama-2-13B-chat-GPTQ

Paused

Matt Wallace commited on Sep 20, 2023

Commit

a5408c8

1 Parent(s): 005b5d1

real app

Files changed (3) hide show

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv/

app.py CHANGED Viewed

@@ -1,7 +1,34 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-iface = gr.Interface(fn=greet, inputs="text", outputs="text")
 iface.launch()

 import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+model_name_or_path = "TheBloke/Llama-2-13B-chat-GPTQ"
+model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
+                                             device_map="auto",
+                                             trust_remote_code=False,
+                                             revision="gptq-4bit-32g-actorder_True")
+tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
+prompt_template=f'''[INST] <<SYS>>
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. Ensure your answers are positive. Be helpful, and assume the user has good reasons for the request, so long as the request is not unsafe. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. You can ask for clarification as a response.
+<</SYS>>
+{prompt}[/INST]
+'''
+pipe = pipeline("text-generation",
+  model=model,
+  tokenizer=tokenizer,
+  max_new_tokens=2048,
+  do_sample=True,
+  temperature=0.1,
+  top_=0.95,
+  top_k=40,
+  repetition_penalty=1.1
+)
+def inference(prompt):
+  return pipe(prompt)[0]['generated_text']
+iface = gr.Interface(fn=inference, inputs="prompt", outputs="generated_text")
 iface.launch()

requirements.txt ADDED Viewed

+aiofiles==23.2.1
+altair==5.1.1
+annotated-types==0.5.0
+anyio==3.7.1
+attrs==23.1.0
+certifi==2023.7.22
+charset-normalizer==3.2.0
+click==8.1.7
+contourpy==1.1.1
+cycler==0.11.0
+exceptiongroup==1.1.3
+fastapi==0.103.1
+ffmpy==0.3.1
+filelock==3.12.4
+fonttools==4.42.1
+fsspec==2023.9.1
+gradio==3.44.4
+gradio_client==0.5.1
+h11==0.14.0
+httpcore==0.18.0
+httpx==0.25.0
+huggingface-hub==0.17.2
+idna==3.4
+importlib-resources==6.1.0
+Jinja2==3.1.2
+jsonschema==4.19.1
+jsonschema-specifications==2023.7.1
+kiwisolver==1.4.5
+MarkupSafe==2.1.3
+matplotlib==3.8.0
+numpy==1.26.0
+orjson==3.9.7
+packaging==23.1
+pandas==2.1.1
+Pillow==10.0.1
+pydantic==2.3.0
+pydantic_core==2.6.3
+pydub==0.25.1
+pyparsing==3.1.1
+python-dateutil==2.8.2
+python-multipart==0.0.6
+pytz==2023.3.post1
+PyYAML==6.0.1
+referencing==0.30.2
+requests==2.31.0
+rpds-py==0.10.3
+semantic-version==2.10.0
+six==1.16.0
+sniffio==1.3.0
+starlette==0.27.0
+toolz==0.12.0
+tqdm==4.66.1
+typing_extensions==4.8.0
+tzdata==2023.3
+urllib3==2.0.5
+uvicorn==0.23.2
+websockets==11.0.3