k3ybladewielder commited on
Commit
787eb2b
Β·
1 Parent(s): f9b4304

Add application file

Browse files
Files changed (2) hide show
  1. app.py +74 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import HuggingFacePipeline
2
+ from langchain import PromptTemplate, LLMChain
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+
5
+ import transformers
6
+ import os
7
+ import torch
8
+ import gradio as gr
9
+
10
+ # check if cuda is available
11
+ torch.cuda.is_available()
12
+
13
+ # define the model id
14
+ # model_id = "tiiuae/falcon-40b-instruct"
15
+ model_id = "tiiuae/falcon-7b-instruct"
16
+
17
+ # load the tokenizer
18
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+
20
+ # load the model
21
+ ## params:
22
+ ## cache_dir: Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. \n
23
+ ## device_map: ensures the model is moved to your GPU(s)
24
+ cache_dir = "./workspace/"
25
+ torch_dtype = torch.bfloat16
26
+ trust_remote_code = True
27
+ device_map = "auto"
28
+ offload_folder = "offload"
29
+
30
+ model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, torch_dtype = torch_dtype,
31
+ trust_remote_code=trust_remote_code, device_map=device_map, offload_folder=offload_folder)
32
+ # set pt model to inference mode
33
+ model.eval()
34
+
35
+ # build the hf transformers pipeline
36
+ task = "text-generation"
37
+ max_length = 400
38
+ do_sample = True
39
+ top_k = 10
40
+ num_return_sequences = 1
41
+ eos_token_id = tokenizer.eos_token_id
42
+
43
+ pipeline = transformers.pipeline("text-generation", model = model, tokenizer = tokenizer,
44
+ device_map = device_map, max_length = max_length,
45
+ do_sample = do_sample, top_k = top_k,
46
+ num_return_sequences = num_return_sequences,
47
+ eos_token_id = eos_token_id)
48
+
49
+ # setup promt template
50
+ template = PromptTemplate(input_variables = ['input'], template = f'{input}')
51
+
52
+ # pass hf pipeline to langhcain class
53
+ llm = HuggingFacePipeline(pipeline=pipeline)
54
+
55
+ # build stacked llm chain, ie prompt-formatting + llm
56
+ chain = LLMChain(llm=llm, prompt=template)
57
+
58
+
59
+ # create generate function
60
+ def generate(prommpt):
61
+ # the prompt will get passes to the llm chain
62
+ return chain.run(prompt)
63
+ # and will return responses
64
+
65
+ title = "Falcon 40-b-Instruct"
66
+ description = "Web app application using the open-source `Falcon-40b-Instruct` LLM"
67
+
68
+ # build gradio interface
69
+ gr.Interface(fn=generate,
70
+ input=["text"],
71
+ outputs=["text"],
72
+ title=title,
73
+ descrption=description).launch()
74
+
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain
2
+ gradio==3.39.0
3
+ transformers==4.24.0
4
+ torch