Training procedure

The following bitsandbytes quantization config was used during training:

  • quant_method: bitsandbytes
  • load_in_8bit: False
  • load_in_4bit: True
  • llm_int8_threshold: 6.0
  • llm_int8_skip_modules: None
  • llm_int8_enable_fp32_cpu_offload: False
  • llm_int8_has_fp16_weight: False
  • bnb_4bit_quant_type: nf4
  • bnb_4bit_use_double_quant: False
  • bnb_4bit_compute_dtype: float16

Framework versions

  • PEFT 0.5.0

Get it started

import torch
from datasets import Dataset
from huggingface_hub import login
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, AddedToken

# load model and tokenizer
login("[YOUR HF TOKEN HERE FOR USING LLAMA]")
config = PeftConfig.from_pretrained("ChangeIsKey/llama-7b-lexical-substitution")
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", device_map='auto')

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", use_fast=False, trust_remote_code=True)
tokenizer.add_special_tokens({ "additional_special_tokens":[AddedToken("<|s|>"), AddedToken("<|answer|>"), AddedToken("<|end|>")]})
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({'pad_token': '[PAD]'})
tokenizer.padding_side = 'left'
base_model.resize_token_embeddings(len(tokenizer))

model = PeftModel.from_pretrained(base_model, "ChangeIsKey/llama-7b-lexical-substitution")
model.eval()

# let's use this model
def formatting_func(records):
    text_batch = []
    
    for i in range(len(records['example'])):
        example = records[i]['example']
        start, end = records[i]['start'], records[i]['end']
        
        target = f'**{example[start:end]}**'
        input_text = f'{example[:start]} {target} {example[end:]}'
        text_batch.append(f"{input_text}<|answer|>")
    
    return text_batch

def tokenization(dataset):
    return tokenizer(formatting_func(dataset),
                       truncation=True,
                       max_length=512,
                       padding=True,
                       return_tensors="pt").to("cuda")


# a toy example
examples = [{'example': 'The traffic jam on the highway made everyone late for work.', 'start': 12, 'end': 15},
            {'example': 'I spread a generous layer of strawberry jam on my toast this morning', 'start': 40, 'end': 43}]
dataset = Dataset.from_list(examples)


batch_size = 32
output = list()

with torch.no_grad():
    for i in range(0, len(dataset), batch_size):
        model_input = tokenization(dataset.select(range(i, min(dataset.num_rows, i + batch_size))))

        output_ids = model.generate(**model_input,
                                    do_sample=True,
                                    num_return_sequences=1, 
                                    max_new_tokens=30,
                                    temperature=0.00001, 
                                    repetition_penalty=1/0.85,
                                    top_k=40,
                                    top_p=0.1)

        answers = tokenizer.batch_decode(output_ids, skip_special_tokens=False)

        for answer in answers:
            answer = " ".join(answer.split('<|answer|>')[1:])
            substitutes = [s.strip() for s in answer.split('<|end|>')[:-1] if s.strip() != ""]
            output.append(", ".join(substitutes))

# output
dataset = dataset.add_column('substitutes', output)
for row in dataset:
    target = row['example'][row['start']:row['end']]
    print(f"Target: {target}\nExample: {row['example']}\nSubstitutes: {row['substitutes']}\n")
Downloads last month
3
Inference API
Unable to determine this model’s pipeline type. Check the docs .