aarnphm commited on
Commit
adb681e
Β·
unverified Β·
1 Parent(s): e45c951

commit files to HF hub

Browse files
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ProsusAI/finbert",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "custom_pipelines": {
9
+ "multi-length-text-classification": {
10
+ "default": {
11
+ "model": {
12
+ "pt": [
13
+ "ProsusAI/finbert",
14
+ "54bddcea"
15
+ ],
16
+ "tf": [
17
+ "ProsusAI/finbert",
18
+ "54bddcea"
19
+ ]
20
+ }
21
+ },
22
+ "impl": "warmup.MultiLengthTextClassificationPipeline",
23
+ "pt": [
24
+ "AutoModelForSequenceClassification"
25
+ ],
26
+ "tf": [
27
+ "TFAutoModelForSequenceClassification"
28
+ ],
29
+ "type": "text"
30
+ }
31
+ },
32
+ "gradient_checkpointing": false,
33
+ "hidden_act": "gelu",
34
+ "hidden_dropout_prob": 0.1,
35
+ "hidden_size": 768,
36
+ "id2label": {
37
+ "0": "positive",
38
+ "1": "negative",
39
+ "2": "neutral"
40
+ },
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 3072,
43
+ "label2id": {
44
+ "negative": 1,
45
+ "neutral": 2,
46
+ "positive": 0
47
+ },
48
+ "layer_norm_eps": 1e-12,
49
+ "max_position_embeddings": 512,
50
+ "model_type": "bert",
51
+ "num_attention_heads": 12,
52
+ "num_hidden_layers": 12,
53
+ "pad_token_id": 0,
54
+ "position_embedding_type": "absolute",
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.27.0.dev0",
57
+ "type_vocab_size": 2,
58
+ "use_cache": true,
59
+ "vocab_size": 30522
60
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7ddf3926a879903a62c8afb28af97b2b1d0639c6308c078442faa5cf1623ee3
3
+ size 438008181
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "never_split": null,
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "special_tokens_map_file": "/Users/aarnphm/.cache/huggingface/hub/models--ProsusAI--finbert/snapshots/54bddcea2cca580dd1df6a88d33242dcf4c61a71/special_tokens_map.json",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
warmup.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import typing as t
4
+
5
+ import torch
6
+ import transformers
7
+ from transformers.utils import is_tf_available
8
+ from transformers.utils import is_torch_available
9
+ from transformers.pipelines import PIPELINE_REGISTRY
10
+ from transformers.pipelines import TextClassificationPipeline
11
+
12
+ if t.TYPE_CHECKING:
13
+ from transformers.pipelines.base import GenericTensor
14
+
15
+ MAX_LENGTH = 3000
16
+
17
+ text = """\
18
+ The three words that best describe Hunter Schafer's Vanity Fair Oscars party look? Less is more.
19
+ Dressed in a bias-cut white silk skirt, a single ivory-colored feather and β€” crucially β€” nothing else, Schafer was bound to raise a few eyebrows. Google searches for the actor and model skyrocketed on Sunday night as her look hit social media. On Twitter, pictures of Schafer immediately received tens of thousands of likes, while her own Instagram post has now been liked more than 2 million times.
20
+ Look of the Week: Zendaya steals the show at Louis Vuitton in head-to-toe tiger print
21
+ But more than just creating a headline-grabbing moment, Schafer's ensemble was clearly considered. Fresh off the Fall-Winter 2023 runway, the look debuted earlier this month at fashion house Ann Demeulemeester's show in Paris. It was designed by Ludovic de Saint Sernin, the label's creative director since December.
22
+ Celebrity fashion works best when there's a story behind a look. For example, the plausible Edie Sedgwick reference in Kendall Jenner's Bottega Veneta tights, or Paul Mescal winking at traditional masculinity in a plain white tank top.
23
+ For his first Ann Demeulemeester collection, De Saint Sernin was inspired by "fashion-making as an authentic act of self-involvement." It was a love letter β€” almost literally β€” to the Belgian label's founder, with imagery of "authorship and autobiography" baked into the clothes (Sernin called his feather bandeaus "quills" in the show notes).
24
+ Hunter Schafer's barely-there Oscars after party look was more poetic than it first seemed.
25
+ These ideas of self-expression, self-love and self-definition took on new meaning when worn by Schafer. As a trans woman whose ascent to fame was inextricably linked to her gender identity β€” her big break was playing trans teenager Jules in HBO's "Euphoria" β€” Schafer's body is subjected to constant scrutiny online. The comment sections on her Instagram posts often descend into open forums, where users feel entitled (and seemingly compelled) to ask intimate questions about the trans experience or challenge Schafer's womanhood.
26
+ Fittingly, there is a long lineage of gender-defying sentiments stitched into Schafer's outfit. Founded in 1985 by Ann Demeulemeester and her husband Patrick Robyn, the brand boasts a long legacy of gender-non-conforming fashion.
27
+ "I was interested in the tension between masculine and feminine, but also the tension between masculine and feminine within one person," Demeulemeester told Vogue ahead of a retrospective exhibition of her work in Florence, Italy, last year. "That is what makes every person really interesting to me because everybody is unique."
28
+ In his latest co-ed collection, De Saint Sernin β€” who is renowned in the industry for his eponymous, gender-fluid label β€” brought his androgynous world view to Ann Demeulemeester with fitted, romantic menswear silhouettes and sensual fabrics for all (think skin-tight mesh tops, leather, and open shirts made from a translucent organza material).
29
+ Celebrity stylist Law Roach on dressing Zendaya and 'faking it 'till you make it'
30
+ A quill strapped across her chest, Schafer let us know she is still writing her narrative β€” and defining herself on her own terms. There's an entire story contained in those two garments. As De Saint Sernin said in the show notes: "Thirty-six looks, each one a heartfelt sentence."
31
+ The powerful ensemble may become one of Law Roach's last celebrity styling credits. Roach announced over social media on Tuesday that he would be retiring from the industry after 14 years of creating conversation-driving looks for the likes of Zendaya, Bella Hadid, Anya Taylor-Joy, Ariana Grande and Megan Thee Stallion."""
32
+
33
+ categories = [
34
+ "business",
35
+ "entertainment",
36
+ "politics",
37
+ "sport",
38
+ "technology",
39
+ "world",
40
+ "healthcare",
41
+ "infrastructure",
42
+ "education",
43
+ "economy",
44
+ "legal",
45
+ "defence",
46
+ "parliament",
47
+ ]
48
+
49
+
50
+ class MultiLengthTextClassificationPipeline(TextClassificationPipeline):
51
+ _split_chunk_length = 510
52
+
53
+ def preprocess(
54
+ self, inputs: str, **tokenizer_kwargs: t.Any
55
+ ) -> dict[str, GenericTensor]:
56
+ assert isinstance(
57
+ inputs, str
58
+ ), f"inputs currently only supports string as inputs (got {type(inputs)})"
59
+ tokens = self.tokenizer.encode_plus(
60
+ inputs, add_special_tokens=False, return_tensors="pt"
61
+ )
62
+ # NOTE: egh we are copying this to list here, but we need to mutate these chunks, so tuple won't do it.
63
+ input_id_chunks = list(tokens["input_ids"][0].split(self._split_chunk_length))
64
+ mask_chunks = list(tokens["attention_mask"][0].split(self._split_chunk_length))
65
+ # NOTE: we need to pad the last chunk to match the max length
66
+ for i, (id_chunk, mask_chunk) in enumerate(zip(input_id_chunks, mask_chunks)):
67
+ # get required padding length
68
+ # bert length is usually 512
69
+ pad_len = 512 - id_chunk.shape[0]
70
+ if pad_len > 0:
71
+ # if padding length is more than 0, then pad
72
+ input_id_chunks[i] = torch.cat([id_chunk, torch.Tensor([0] * pad_len)])
73
+ mask_chunks[i] = torch.cat([mask_chunk, torch.Tensor([0] * pad_len)])
74
+ input_ids = torch.stack(input_id_chunks)
75
+ attention_mask = torch.stack(mask_chunks)
76
+ return {"input_ids": input_ids.long(), "attention_mask": attention_mask.int()}
77
+
78
+
79
+ PIPELINE_REGISTRY.register_pipeline(
80
+ "multi-length-text-classification",
81
+ pipeline_class=MultiLengthTextClassificationPipeline,
82
+ pt_model=transformers.AutoModelForSequenceClassification
83
+ if is_torch_available()
84
+ else None,
85
+ tf_model=transformers.TFAutoModelForSequenceClassification
86
+ if is_tf_available()
87
+ else None,
88
+ default={
89
+ "pt": ("ProsusAI/finbert", "54bddcea"),
90
+ "tf": ("ProsusAI/finbert", "54bddcea"),
91
+ },
92
+ type="text",
93
+ )
94
+
95
+ if __name__ == "__main__":
96
+ from transformers import pipeline
97
+
98
+ import bentoml
99
+
100
+ # NOTE: Summarization models suggestions:
101
+ # - sshleifer/distilbart-cnn-12-6 (default)
102
+ # - google/pegasus-cnn_dailymail if you have a beefy GPU
103
+ summarization_model = "sshleifer/distilbart-cnn-12-6"
104
+ summarizer = pipeline("summarization", model=summarization_model)
105
+ print("Summarized:", summarizer(text, max_length=MAX_LENGTH)[0]["summary_text"])
106
+ print(
107
+ f"Saved summarizer model: {bentoml.transformers.save_model('summarizer-pipeline', summarizer, metadata=dict(model_name=summarization_model))}",
108
+ )
109
+
110
+ print("\n", "=" * 50, "\n")
111
+
112
+ # NOTE: Zero-shot classification models suggestions:
113
+ # - facebook/bart-large-mnli (default)
114
+ classification_model = "facebook/bart-large-mnli"
115
+ classifier = pipeline("zero-shot-classification", model=classification_model)
116
+ predicted = classifier(text, categories, multi_label=True)
117
+ print(
118
+ "Categories prediction:",
119
+ {c: p for c, p in zip(predicted["labels"], predicted["scores"])},
120
+ )
121
+ print(
122
+ f"Saved categorizer model: {bentoml.transformers.save_model('categorizer-pipeline', classifier, metadata=dict(model_name=classification_model))}",
123
+ )
124
+
125
+ print("\n", "=" * 50, "\n")
126
+ # NOTE: Sentiment analysis models suggestions:
127
+ # - distilbert-base-uncased-finetuned-sst-2-english (default)
128
+ # - bhadresh-savani/distilbert-base-uncased-emotion
129
+ # - ProsusAI/finbert
130
+ sentiment_model = "ProsusAI/finbert"
131
+ sentimenter = pipeline(
132
+ "multi-length-text-classification", model=sentiment_model, top_k=None
133
+ )
134
+ print("Sentiment prediction:", sentimenter(text, max_length=MAX_LENGTH))
135
+ print(
136
+ f"Saved sentimenter model: {bentoml.transformers.save_model('sentimenter-pipeline', sentimenter, metadata=dict(model_name=sentiment_model))}",
137
+ )