commit files to HF hub
Browse files- config.json +60 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +7 -0
- tokenizer.json +0 -0
- tokenizer_config.json +15 -0
- vocab.txt +0 -0
- warmup.py +137 -0
config.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "ProsusAI/finbert",
|
3 |
+
"architectures": [
|
4 |
+
"BertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"custom_pipelines": {
|
9 |
+
"multi-length-text-classification": {
|
10 |
+
"default": {
|
11 |
+
"model": {
|
12 |
+
"pt": [
|
13 |
+
"ProsusAI/finbert",
|
14 |
+
"54bddcea"
|
15 |
+
],
|
16 |
+
"tf": [
|
17 |
+
"ProsusAI/finbert",
|
18 |
+
"54bddcea"
|
19 |
+
]
|
20 |
+
}
|
21 |
+
},
|
22 |
+
"impl": "warmup.MultiLengthTextClassificationPipeline",
|
23 |
+
"pt": [
|
24 |
+
"AutoModelForSequenceClassification"
|
25 |
+
],
|
26 |
+
"tf": [
|
27 |
+
"TFAutoModelForSequenceClassification"
|
28 |
+
],
|
29 |
+
"type": "text"
|
30 |
+
}
|
31 |
+
},
|
32 |
+
"gradient_checkpointing": false,
|
33 |
+
"hidden_act": "gelu",
|
34 |
+
"hidden_dropout_prob": 0.1,
|
35 |
+
"hidden_size": 768,
|
36 |
+
"id2label": {
|
37 |
+
"0": "positive",
|
38 |
+
"1": "negative",
|
39 |
+
"2": "neutral"
|
40 |
+
},
|
41 |
+
"initializer_range": 0.02,
|
42 |
+
"intermediate_size": 3072,
|
43 |
+
"label2id": {
|
44 |
+
"negative": 1,
|
45 |
+
"neutral": 2,
|
46 |
+
"positive": 0
|
47 |
+
},
|
48 |
+
"layer_norm_eps": 1e-12,
|
49 |
+
"max_position_embeddings": 512,
|
50 |
+
"model_type": "bert",
|
51 |
+
"num_attention_heads": 12,
|
52 |
+
"num_hidden_layers": 12,
|
53 |
+
"pad_token_id": 0,
|
54 |
+
"position_embedding_type": "absolute",
|
55 |
+
"torch_dtype": "float32",
|
56 |
+
"transformers_version": "4.27.0.dev0",
|
57 |
+
"type_vocab_size": 2,
|
58 |
+
"use_cache": true,
|
59 |
+
"vocab_size": 30522
|
60 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f7ddf3926a879903a62c8afb28af97b2b1d0639c6308c078442faa5cf1623ee3
|
3 |
+
size 438008181
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"do_basic_tokenize": true,
|
4 |
+
"do_lower_case": true,
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"model_max_length": 512,
|
7 |
+
"never_split": null,
|
8 |
+
"pad_token": "[PAD]",
|
9 |
+
"sep_token": "[SEP]",
|
10 |
+
"special_tokens_map_file": "/Users/aarnphm/.cache/huggingface/hub/models--ProsusAI--finbert/snapshots/54bddcea2cca580dd1df6a88d33242dcf4c61a71/special_tokens_map.json",
|
11 |
+
"strip_accents": null,
|
12 |
+
"tokenize_chinese_chars": true,
|
13 |
+
"tokenizer_class": "BertTokenizer",
|
14 |
+
"unk_token": "[UNK]"
|
15 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
warmup.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
|
3 |
+
import typing as t
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import transformers
|
7 |
+
from transformers.utils import is_tf_available
|
8 |
+
from transformers.utils import is_torch_available
|
9 |
+
from transformers.pipelines import PIPELINE_REGISTRY
|
10 |
+
from transformers.pipelines import TextClassificationPipeline
|
11 |
+
|
12 |
+
if t.TYPE_CHECKING:
|
13 |
+
from transformers.pipelines.base import GenericTensor
|
14 |
+
|
15 |
+
MAX_LENGTH = 3000
|
16 |
+
|
17 |
+
text = """\
|
18 |
+
The three words that best describe Hunter Schafer's Vanity Fair Oscars party look? Less is more.
|
19 |
+
Dressed in a bias-cut white silk skirt, a single ivory-colored feather and β crucially β nothing else, Schafer was bound to raise a few eyebrows. Google searches for the actor and model skyrocketed on Sunday night as her look hit social media. On Twitter, pictures of Schafer immediately received tens of thousands of likes, while her own Instagram post has now been liked more than 2 million times.
|
20 |
+
Look of the Week: Zendaya steals the show at Louis Vuitton in head-to-toe tiger print
|
21 |
+
But more than just creating a headline-grabbing moment, Schafer's ensemble was clearly considered. Fresh off the Fall-Winter 2023 runway, the look debuted earlier this month at fashion house Ann Demeulemeester's show in Paris. It was designed by Ludovic de Saint Sernin, the label's creative director since December.
|
22 |
+
Celebrity fashion works best when there's a story behind a look. For example, the plausible Edie Sedgwick reference in Kendall Jenner's Bottega Veneta tights, or Paul Mescal winking at traditional masculinity in a plain white tank top.
|
23 |
+
For his first Ann Demeulemeester collection, De Saint Sernin was inspired by "fashion-making as an authentic act of self-involvement." It was a love letter β almost literally β to the Belgian label's founder, with imagery of "authorship and autobiography" baked into the clothes (Sernin called his feather bandeaus "quills" in the show notes).
|
24 |
+
Hunter Schafer's barely-there Oscars after party look was more poetic than it first seemed.
|
25 |
+
These ideas of self-expression, self-love and self-definition took on new meaning when worn by Schafer. As a trans woman whose ascent to fame was inextricably linked to her gender identity β her big break was playing trans teenager Jules in HBO's "Euphoria" β Schafer's body is subjected to constant scrutiny online. The comment sections on her Instagram posts often descend into open forums, where users feel entitled (and seemingly compelled) to ask intimate questions about the trans experience or challenge Schafer's womanhood.
|
26 |
+
Fittingly, there is a long lineage of gender-defying sentiments stitched into Schafer's outfit. Founded in 1985 by Ann Demeulemeester and her husband Patrick Robyn, the brand boasts a long legacy of gender-non-conforming fashion.
|
27 |
+
"I was interested in the tension between masculine and feminine, but also the tension between masculine and feminine within one person," Demeulemeester told Vogue ahead of a retrospective exhibition of her work in Florence, Italy, last year. "That is what makes every person really interesting to me because everybody is unique."
|
28 |
+
In his latest co-ed collection, De Saint Sernin β who is renowned in the industry for his eponymous, gender-fluid label β brought his androgynous world view to Ann Demeulemeester with fitted, romantic menswear silhouettes and sensual fabrics for all (think skin-tight mesh tops, leather, and open shirts made from a translucent organza material).
|
29 |
+
Celebrity stylist Law Roach on dressing Zendaya and 'faking it 'till you make it'
|
30 |
+
A quill strapped across her chest, Schafer let us know she is still writing her narrative β and defining herself on her own terms. There's an entire story contained in those two garments. As De Saint Sernin said in the show notes: "Thirty-six looks, each one a heartfelt sentence."
|
31 |
+
The powerful ensemble may become one of Law Roach's last celebrity styling credits. Roach announced over social media on Tuesday that he would be retiring from the industry after 14 years of creating conversation-driving looks for the likes of Zendaya, Bella Hadid, Anya Taylor-Joy, Ariana Grande and Megan Thee Stallion."""
|
32 |
+
|
33 |
+
categories = [
|
34 |
+
"business",
|
35 |
+
"entertainment",
|
36 |
+
"politics",
|
37 |
+
"sport",
|
38 |
+
"technology",
|
39 |
+
"world",
|
40 |
+
"healthcare",
|
41 |
+
"infrastructure",
|
42 |
+
"education",
|
43 |
+
"economy",
|
44 |
+
"legal",
|
45 |
+
"defence",
|
46 |
+
"parliament",
|
47 |
+
]
|
48 |
+
|
49 |
+
|
50 |
+
class MultiLengthTextClassificationPipeline(TextClassificationPipeline):
|
51 |
+
_split_chunk_length = 510
|
52 |
+
|
53 |
+
def preprocess(
|
54 |
+
self, inputs: str, **tokenizer_kwargs: t.Any
|
55 |
+
) -> dict[str, GenericTensor]:
|
56 |
+
assert isinstance(
|
57 |
+
inputs, str
|
58 |
+
), f"inputs currently only supports string as inputs (got {type(inputs)})"
|
59 |
+
tokens = self.tokenizer.encode_plus(
|
60 |
+
inputs, add_special_tokens=False, return_tensors="pt"
|
61 |
+
)
|
62 |
+
# NOTE: egh we are copying this to list here, but we need to mutate these chunks, so tuple won't do it.
|
63 |
+
input_id_chunks = list(tokens["input_ids"][0].split(self._split_chunk_length))
|
64 |
+
mask_chunks = list(tokens["attention_mask"][0].split(self._split_chunk_length))
|
65 |
+
# NOTE: we need to pad the last chunk to match the max length
|
66 |
+
for i, (id_chunk, mask_chunk) in enumerate(zip(input_id_chunks, mask_chunks)):
|
67 |
+
# get required padding length
|
68 |
+
# bert length is usually 512
|
69 |
+
pad_len = 512 - id_chunk.shape[0]
|
70 |
+
if pad_len > 0:
|
71 |
+
# if padding length is more than 0, then pad
|
72 |
+
input_id_chunks[i] = torch.cat([id_chunk, torch.Tensor([0] * pad_len)])
|
73 |
+
mask_chunks[i] = torch.cat([mask_chunk, torch.Tensor([0] * pad_len)])
|
74 |
+
input_ids = torch.stack(input_id_chunks)
|
75 |
+
attention_mask = torch.stack(mask_chunks)
|
76 |
+
return {"input_ids": input_ids.long(), "attention_mask": attention_mask.int()}
|
77 |
+
|
78 |
+
|
79 |
+
PIPELINE_REGISTRY.register_pipeline(
|
80 |
+
"multi-length-text-classification",
|
81 |
+
pipeline_class=MultiLengthTextClassificationPipeline,
|
82 |
+
pt_model=transformers.AutoModelForSequenceClassification
|
83 |
+
if is_torch_available()
|
84 |
+
else None,
|
85 |
+
tf_model=transformers.TFAutoModelForSequenceClassification
|
86 |
+
if is_tf_available()
|
87 |
+
else None,
|
88 |
+
default={
|
89 |
+
"pt": ("ProsusAI/finbert", "54bddcea"),
|
90 |
+
"tf": ("ProsusAI/finbert", "54bddcea"),
|
91 |
+
},
|
92 |
+
type="text",
|
93 |
+
)
|
94 |
+
|
95 |
+
if __name__ == "__main__":
|
96 |
+
from transformers import pipeline
|
97 |
+
|
98 |
+
import bentoml
|
99 |
+
|
100 |
+
# NOTE: Summarization models suggestions:
|
101 |
+
# - sshleifer/distilbart-cnn-12-6 (default)
|
102 |
+
# - google/pegasus-cnn_dailymail if you have a beefy GPU
|
103 |
+
summarization_model = "sshleifer/distilbart-cnn-12-6"
|
104 |
+
summarizer = pipeline("summarization", model=summarization_model)
|
105 |
+
print("Summarized:", summarizer(text, max_length=MAX_LENGTH)[0]["summary_text"])
|
106 |
+
print(
|
107 |
+
f"Saved summarizer model: {bentoml.transformers.save_model('summarizer-pipeline', summarizer, metadata=dict(model_name=summarization_model))}",
|
108 |
+
)
|
109 |
+
|
110 |
+
print("\n", "=" * 50, "\n")
|
111 |
+
|
112 |
+
# NOTE: Zero-shot classification models suggestions:
|
113 |
+
# - facebook/bart-large-mnli (default)
|
114 |
+
classification_model = "facebook/bart-large-mnli"
|
115 |
+
classifier = pipeline("zero-shot-classification", model=classification_model)
|
116 |
+
predicted = classifier(text, categories, multi_label=True)
|
117 |
+
print(
|
118 |
+
"Categories prediction:",
|
119 |
+
{c: p for c, p in zip(predicted["labels"], predicted["scores"])},
|
120 |
+
)
|
121 |
+
print(
|
122 |
+
f"Saved categorizer model: {bentoml.transformers.save_model('categorizer-pipeline', classifier, metadata=dict(model_name=classification_model))}",
|
123 |
+
)
|
124 |
+
|
125 |
+
print("\n", "=" * 50, "\n")
|
126 |
+
# NOTE: Sentiment analysis models suggestions:
|
127 |
+
# - distilbert-base-uncased-finetuned-sst-2-english (default)
|
128 |
+
# - bhadresh-savani/distilbert-base-uncased-emotion
|
129 |
+
# - ProsusAI/finbert
|
130 |
+
sentiment_model = "ProsusAI/finbert"
|
131 |
+
sentimenter = pipeline(
|
132 |
+
"multi-length-text-classification", model=sentiment_model, top_k=None
|
133 |
+
)
|
134 |
+
print("Sentiment prediction:", sentimenter(text, max_length=MAX_LENGTH))
|
135 |
+
print(
|
136 |
+
f"Saved sentimenter model: {bentoml.transformers.save_model('sentimenter-pipeline', sentimenter, metadata=dict(model_name=sentiment_model))}",
|
137 |
+
)
|