Spaces:
Runtime error
Runtime error
k3ybladewielder
commited on
Commit
Β·
787eb2b
1
Parent(s):
f9b4304
Add application file
Browse files- app.py +74 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain import HuggingFacePipeline
|
2 |
+
from langchain import PromptTemplate, LLMChain
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
|
5 |
+
import transformers
|
6 |
+
import os
|
7 |
+
import torch
|
8 |
+
import gradio as gr
|
9 |
+
|
10 |
+
# check if cuda is available
|
11 |
+
torch.cuda.is_available()
|
12 |
+
|
13 |
+
# define the model id
|
14 |
+
# model_id = "tiiuae/falcon-40b-instruct"
|
15 |
+
model_id = "tiiuae/falcon-7b-instruct"
|
16 |
+
|
17 |
+
# load the tokenizer
|
18 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
19 |
+
|
20 |
+
# load the model
|
21 |
+
## params:
|
22 |
+
## cache_dir: Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. \n
|
23 |
+
## device_map: ensures the model is moved to your GPU(s)
|
24 |
+
cache_dir = "./workspace/"
|
25 |
+
torch_dtype = torch.bfloat16
|
26 |
+
trust_remote_code = True
|
27 |
+
device_map = "auto"
|
28 |
+
offload_folder = "offload"
|
29 |
+
|
30 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, torch_dtype = torch_dtype,
|
31 |
+
trust_remote_code=trust_remote_code, device_map=device_map, offload_folder=offload_folder)
|
32 |
+
# set pt model to inference mode
|
33 |
+
model.eval()
|
34 |
+
|
35 |
+
# build the hf transformers pipeline
|
36 |
+
task = "text-generation"
|
37 |
+
max_length = 400
|
38 |
+
do_sample = True
|
39 |
+
top_k = 10
|
40 |
+
num_return_sequences = 1
|
41 |
+
eos_token_id = tokenizer.eos_token_id
|
42 |
+
|
43 |
+
pipeline = transformers.pipeline("text-generation", model = model, tokenizer = tokenizer,
|
44 |
+
device_map = device_map, max_length = max_length,
|
45 |
+
do_sample = do_sample, top_k = top_k,
|
46 |
+
num_return_sequences = num_return_sequences,
|
47 |
+
eos_token_id = eos_token_id)
|
48 |
+
|
49 |
+
# setup promt template
|
50 |
+
template = PromptTemplate(input_variables = ['input'], template = f'{input}')
|
51 |
+
|
52 |
+
# pass hf pipeline to langhcain class
|
53 |
+
llm = HuggingFacePipeline(pipeline=pipeline)
|
54 |
+
|
55 |
+
# build stacked llm chain, ie prompt-formatting + llm
|
56 |
+
chain = LLMChain(llm=llm, prompt=template)
|
57 |
+
|
58 |
+
|
59 |
+
# create generate function
|
60 |
+
def generate(prommpt):
|
61 |
+
# the prompt will get passes to the llm chain
|
62 |
+
return chain.run(prompt)
|
63 |
+
# and will return responses
|
64 |
+
|
65 |
+
title = "Falcon 40-b-Instruct"
|
66 |
+
description = "Web app application using the open-source `Falcon-40b-Instruct` LLM"
|
67 |
+
|
68 |
+
# build gradio interface
|
69 |
+
gr.Interface(fn=generate,
|
70 |
+
input=["text"],
|
71 |
+
outputs=["text"],
|
72 |
+
title=title,
|
73 |
+
descrption=description).launch()
|
74 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
gradio==3.39.0
|
3 |
+
transformers==4.24.0
|
4 |
+
torch
|