King-Harry
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -63,21 +63,27 @@ The model is designed for responsible data management, ensuring that sensitive i
|
|
63 |
To use this model, you can load it from the Hugging Face Hub and integrate it into your Python or API-based applications. Below is an example of how to load and use the model:
|
64 |
|
65 |
```python
|
66 |
-
|
|
|
|
|
|
|
67 |
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
68 |
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
|
69 |
|
|
|
70 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
71 |
from unsloth import FastLanguageModel
|
72 |
|
73 |
-
#
|
|
|
74 |
model_name = "King-Harry/Ninja-Masker-2-PII-Redaction"
|
75 |
model, tokenizer = FastLanguageModel.from_pretrained(model_name, load_in_4bit=True)
|
76 |
|
77 |
-
#
|
78 |
FastLanguageModel.for_inference(model)
|
79 |
|
80 |
-
# Define the Alpaca-
|
|
|
81 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
82 |
|
83 |
### Instruction:
|
@@ -89,7 +95,8 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
|
|
89 |
### Response:
|
90 |
{}"""
|
91 |
|
92 |
-
#
|
|
|
93 |
inputs = tokenizer(
|
94 |
[
|
95 |
alpaca_prompt.format(
|
@@ -98,15 +105,20 @@ inputs = tokenizer(
|
|
98 |
"" # output - leave this blank for generation!
|
99 |
)
|
100 |
],
|
101 |
-
return_tensors="pt"
|
102 |
-
).to("cuda")
|
103 |
|
104 |
-
# Generate the
|
|
|
105 |
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
|
106 |
|
107 |
-
# Decode
|
|
|
108 |
redacted_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
|
|
|
|
109 |
print(redacted_text[0])
|
|
|
110 |
|
111 |
```
|
112 |
|
|
|
63 |
To use this model, you can load it from the Hugging Face Hub and integrate it into your Python or API-based applications. Below is an example of how to load and use the model:
|
64 |
|
65 |
```python
|
66 |
+
Here's the code with comments explaining each line:
|
67 |
+
|
68 |
+
```python
|
69 |
+
# Install necessary packages from the unsloth GitHub repository and others required for model handling and optimizations.
|
70 |
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
71 |
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
|
72 |
|
73 |
+
# Import the necessary classes from the transformers and unsloth libraries.
|
74 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
75 |
from unsloth import FastLanguageModel
|
76 |
|
77 |
+
# Specify the name of the fine-tuned model hosted on Hugging Face Hub and load it along with its tokenizer.
|
78 |
+
# The model is loaded in 4-bit precision to optimize memory usage and speed.
|
79 |
model_name = "King-Harry/Ninja-Masker-2-PII-Redaction"
|
80 |
model, tokenizer = FastLanguageModel.from_pretrained(model_name, load_in_4bit=True)
|
81 |
|
82 |
+
# Prepare the model for inference mode, ensuring it's optimized for generating predictions.
|
83 |
FastLanguageModel.for_inference(model)
|
84 |
|
85 |
+
# Define a prompt template in the style of the Alpaca instruction-based format.
|
86 |
+
# This template will be used to format the input text for the model.
|
87 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
88 |
|
89 |
### Instruction:
|
|
|
95 |
### Response:
|
96 |
{}"""
|
97 |
|
98 |
+
# Format the input text using the Alpaca-style prompt with a specific instruction and input.
|
99 |
+
# The tokenizer encodes the formatted text into tensors suitable for the model to process.
|
100 |
inputs = tokenizer(
|
101 |
[
|
102 |
alpaca_prompt.format(
|
|
|
105 |
"" # output - leave this blank for generation!
|
106 |
)
|
107 |
],
|
108 |
+
return_tensors="pt" # Return the encoded inputs as PyTorch tensors.
|
109 |
+
).to("cuda") # Move the tensors to the GPU (CUDA) for faster processing.
|
110 |
|
111 |
+
# Generate the model's output based on the input prompt, limiting the output to a maximum of 64 new tokens.
|
112 |
+
# The use_cache parameter is set to True to utilize past key values for faster generation.
|
113 |
outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
|
114 |
|
115 |
+
# Decode the generated output from the model, converting the tokenized output back into human-readable text.
|
116 |
+
# The skip_special_tokens argument ensures that special tokens used by the model (like padding or start tokens) are omitted.
|
117 |
redacted_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
118 |
+
|
119 |
+
# Print the first item in the list of decoded texts, which should be the redacted version of the input text.
|
120 |
print(redacted_text[0])
|
121 |
+
```
|
122 |
|
123 |
```
|
124 |
|