File size: 2,346 Bytes
19b5891
3a7fb07
 
 
 
 
 
 
 
c390e36
3a7fb07
 
19b5891
3a7fb07
 
562ef9f
 
19b5891
562ef9f
 
3a7fb07
 
 
 
 
 
 
 
0076d51
 
f8f5e3c
3a7fb07
 
 
 
 
a8cc6dd
3a7fb07
 
 
 
 
a8cc6dd
 
87a94a4
3a7fb07
 
 
 
a8cc6dd
3a7fb07
 
 
b6fb1dd
3a7fb07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19b5891
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import gradio as gr
import pandas as pd
import json
import os
from pprint import pprint
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
import accelerate
from datasets import load_dataset, Dataset
from huggingface_hub import notebook_login

from peft import LoraConfig, PeftConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from huggingface_hub import login
import os

access_token = os.environ["HF_Token"]
login(token=access_token)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

print("claim")

PEFT_MODEL = "dpaul93/falcon-7b-qlora-chat-claim-finetune" #"/content/trained-model"

config = PeftConfig.from_pretrained(PEFT_MODEL)

config.base_model_name_or_path = "tiiuae/falcon-7b"

'''model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)'''

model = AutoModelForCausalLM.from_pretrained(PEFT_MODEL, device_map="auto",offload_folder="offload")

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

#model = PeftModel.from_pretrained(model, PEFT_MODEL)

def generate_test_prompt(text):
    return f"""Given the following claim:
            {text}
            pick one of the following option
            (a) true
            (b) false
            (c) mixture
            (d) unknown
            (e) not_applicable?""".strip()

def generate_and_tokenize_prompt(text):
  prompt = generate_test_prompt(text)
  device = "cuda"
  encoding = tokenizer(prompt, return_tensors="pt").to(device)
  with torch.inference_mode():
    outputs = model.generate(
        input_ids = encoding.input_ids,
        attention_mask = encoding.attention_mask,
        generation_config = generation_config
    )

    

  return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Answer:")[1].split("\n")[0].split(".")[0]


def classifyUsingLLAMA(text):

    return generate_and_tokenize_prompt(text)

iface = gr.Interface(fn=classifyUsingLLAMA, inputs="text", outputs="text")
iface.launch()