King-Harry commited on
Commit
decf714
·
verified ·
1 Parent(s): 739c510

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +21 -9
README.md CHANGED
@@ -63,21 +63,27 @@ The model is designed for responsible data management, ensuring that sensitive i
63
  To use this model, you can load it from the Hugging Face Hub and integrate it into your Python or API-based applications. Below is an example of how to load and use the model:
64
 
65
  ```python
66
- # Install necessary packages
 
 
 
67
  !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
68
  !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
69
 
 
70
  from transformers import AutoModelForCausalLM, AutoTokenizer
71
  from unsloth import FastLanguageModel
72
 
73
- # Load the fine-tuned model from Hugging Face Hub
 
74
  model_name = "King-Harry/Ninja-Masker-2-PII-Redaction"
75
  model, tokenizer = FastLanguageModel.from_pretrained(model_name, load_in_4bit=True)
76
 
77
- # Ensure the model is ready for inference
78
  FastLanguageModel.for_inference(model)
79
 
80
- # Define the Alpaca-style prompt
 
81
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
82
 
83
  ### Instruction:
@@ -89,7 +95,8 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
89
  ### Response:
90
  {}"""
91
 
92
- # Define the input text using the Alpaca prompt
 
93
  inputs = tokenizer(
94
  [
95
  alpaca_prompt.format(
@@ -98,15 +105,20 @@ inputs = tokenizer(
98
  "" # output - leave this blank for generation!
99
  )
100
  ],
101
- return_tensors="pt"
102
- ).to("cuda")
103
 
104
- # Generate the redacted output
 
105
  outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
106
 
107
- # Decode and print the output
 
108
  redacted_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
 
 
109
  print(redacted_text[0])
 
110
 
111
  ```
112
 
 
63
  To use this model, you can load it from the Hugging Face Hub and integrate it into your Python or API-based applications. Below is an example of how to load and use the model:
64
 
65
  ```python
66
+ Here's the code with comments explaining each line:
67
+
68
+ ```python
69
+ # Install necessary packages from the unsloth GitHub repository and others required for model handling and optimizations.
70
  !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
71
  !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
72
 
73
+ # Import the necessary classes from the transformers and unsloth libraries.
74
  from transformers import AutoModelForCausalLM, AutoTokenizer
75
  from unsloth import FastLanguageModel
76
 
77
+ # Specify the name of the fine-tuned model hosted on Hugging Face Hub and load it along with its tokenizer.
78
+ # The model is loaded in 4-bit precision to optimize memory usage and speed.
79
  model_name = "King-Harry/Ninja-Masker-2-PII-Redaction"
80
  model, tokenizer = FastLanguageModel.from_pretrained(model_name, load_in_4bit=True)
81
 
82
+ # Prepare the model for inference mode, ensuring it's optimized for generating predictions.
83
  FastLanguageModel.for_inference(model)
84
 
85
+ # Define a prompt template in the style of the Alpaca instruction-based format.
86
+ # This template will be used to format the input text for the model.
87
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
88
 
89
  ### Instruction:
 
95
  ### Response:
96
  {}"""
97
 
98
+ # Format the input text using the Alpaca-style prompt with a specific instruction and input.
99
+ # The tokenizer encodes the formatted text into tensors suitable for the model to process.
100
  inputs = tokenizer(
101
  [
102
  alpaca_prompt.format(
 
105
  "" # output - leave this blank for generation!
106
  )
107
  ],
108
+ return_tensors="pt" # Return the encoded inputs as PyTorch tensors.
109
+ ).to("cuda") # Move the tensors to the GPU (CUDA) for faster processing.
110
 
111
+ # Generate the model's output based on the input prompt, limiting the output to a maximum of 64 new tokens.
112
+ # The use_cache parameter is set to True to utilize past key values for faster generation.
113
  outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
114
 
115
+ # Decode the generated output from the model, converting the tokenized output back into human-readable text.
116
+ # The skip_special_tokens argument ensures that special tokens used by the model (like padding or start tokens) are omitted.
117
  redacted_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
118
+
119
+ # Print the first item in the list of decoded texts, which should be the redacted version of the input text.
120
  print(redacted_text[0])
121
+ ```
122
 
123
  ```
124