belenedgar
/

transformers_issues_topics

Model card Files Files and versions Community

belenedgar commited on Aug 9, 2023

Commit

c3c5ad5

1 Parent(s): b3ac55a

Add BERTopic model

Browse files

Files changed (4) hide show

README.md +72 -0
config.json +15 -0
topic_embeddings.safetensors +3 -0
topics.json +416 -0

README.md ADDED Viewed

	@@ -0,0 +1,72 @@

+---
+tags:
+- bertopic
+library_name: bertopic
+pipeline_tag: text-classification
+---
+# transformers_issues_topics
+This is a [BERTopic](https://github.com/MaartenGr/BERTopic) model.
+BERTopic is a flexible and modular topic modeling framework that allows for the generation of easily interpretable topics from large datasets.
+## Usage
+To use this model, please install BERTopic:
+```
+pip install -U bertopic
+```
+You can use the model as follows:
+```python
+from bertopic import BERTopic
+topic_model = BERTopic.load("belenedgar/transformers_issues_topics")
+topic_model.get_topic_info()
+```
+## Topic overview
+* Number of topics: 5
+* Number of training documents: 156
+<details>
+  <summary>Click here for an overview of all topics.</summary>
+  | Topic ID | Topic Keywords | Topic Frequency | Label |
+|----------|----------------|-----------------|-------|
+| -1 | extremism - extremist - terrorism - radical - radicalization | 21 | -1_extremism_extremist_terrorism_radical |
+| 0 | phishing - theft - scammers - security - fraud | 17 | 0_phishing_theft_scammers_security |
+| 1 | addiction - violence - cyber - content - presence | 54 | 1_addiction_violence_cyber_content |
+| 2 | cyberbullying - bullying - cyber - cyberstalking - harassment | 39 | 2_cyberbullying_bullying_cyber_cyberstalking |
+| 3 | profanity - derogatory - vulgarity - hate - offensive | 25 | 3_profanity_derogatory_vulgarity_hate |
+</details>
+## Training hyperparameters
+* calculate_probabilities: False
+* language: english
+* low_memory: False
+* min_topic_size: 10
+* n_gram_range: (1, 1)
+* nr_topics: None
+* seed_topic_list: None
+* top_n_words: 10
+* verbose: False
+## Framework versions
+* Numpy: 1.24.4
+* HDBSCAN: 0.8.33
+* UMAP: 0.5.3
+* Pandas: 2.0.3
+* Scikit-Learn: 1.3.0
+* Sentence-transformers: 2.2.2
+* Transformers: 4.31.0
+* Numba: 0.57.1
+* Plotly: 5.15.0
+* Python: 3.10.10

config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "calculate_probabilities": false,
+  "language": "english",
+  "low_memory": false,
+  "min_topic_size": 10,
+  "n_gram_range": [
+    1,
+    1
+  ],
+  "nr_topics": null,
+  "seed_topic_list": null,
+  "top_n_words": 10,
+  "verbose": false,
+  "embedding_model": "sentence-transformers/all-MiniLM-L6-v2"
+}

topic_embeddings.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52be1790bfea1860793d98a9bcefec00f5850462095903c26e3ba1152425e3b7
+size 7768

topics.json ADDED Viewed

	@@ -0,0 +1,416 @@

+{
+  "topic_representations": {
+    "-1": [
+      [
+        "extremism",
+        0.4750615656375885
+      ],
+      [
+        "extremist",
+        0.4353869557380676
+      ],
+      [
+        "terrorism",
+        0.42795926332473755
+      ],
+      [
+        "radical",
+        0.3870584964752197
+      ],
+      [
+        "radicalization",
+        0.3648340106010437
+      ],
+      [
+        "undermine",
+        0.33723917603492737
+      ],
+      [
+        "propaganda",
+        0.3277747631072998
+      ],
+      [
+        "violent",
+        0.3236936330795288
+      ],
+      [
+        "violence",
+        0.3047242760658264
+      ],
+      [
+        "political",
+        0.29089266061782837
+      ]
+    ],
+    "0": [
+      [
+        "phishing",
+        0.4283202886581421
+      ],
+      [
+        "theft",
+        0.35850489139556885
+      ],
+      [
+        "scammers",
+        0.3532814383506775
+      ],
+      [
+        "security",
+        0.34436333179473877
+      ],
+      [
+        "fraud",
+        0.3367052674293518
+      ],
+      [
+        "fraudulent",
+        0.3361189067363739
+      ],
+      [
+        "passwords",
+        0.33049651980400085
+      ],
+      [
+        "privacy",
+        0.31392160058021545
+      ],
+      [
+        "secure",
+        0.30753785371780396
+      ],
+      [
+        "breaches",
+        0.30685776472091675
+      ]
+    ],
+    "1": [
+      [
+        "addiction",
+        0.4357987940311432
+      ],
+      [
+        "violence",
+        0.3492211401462555
+      ],
+      [
+        "cyber",
+        0.34194207191467285
+      ],
+      [
+        "content",
+        0.3365675210952759
+      ],
+      [
+        "presence",
+        0.33245617151260376
+      ],
+      [
+        "violent",
+        0.3288145661354065
+      ],
+      [
+        "social",
+        0.31930094957351685
+      ],
+      [
+        "screen",
+        0.31717175245285034
+      ],
+      [
+        "persona",
+        0.31366002559661865
+      ],
+      [
+        "media",
+        0.3113996684551239
+      ]
+    ],
+    "2": [
+      [
+        "cyberbullying",
+        0.4925338923931122
+      ],
+      [
+        "bullying",
+        0.4540230333805084
+      ],
+      [
+        "cyber",
+        0.43500053882598877
+      ],
+      [
+        "cyberstalking",
+        0.4348675608634949
+      ],
+      [
+        "harassment",
+        0.4151962995529175
+      ],
+      [
+        "predators",
+        0.36431488394737244
+      ],
+      [
+        "abuse",
+        0.35895395278930664
+      ],
+      [
+        "predatory",
+        0.3434147238731384
+      ],
+      [
+        "behavior",
+        0.3418422341346741
+      ],
+      [
+        "predator",
+        0.318901389837265
+      ]
+    ],
+    "3": [
+      [
+        "profanity",
+        0.4581272602081299
+      ],
+      [
+        "derogatory",
+        0.37975403666496277
+      ],
+      [
+        "vulgarity",
+        0.3786264657974243
+      ],
+      [
+        "hate",
+        0.3713056445121765
+      ],
+      [
+        "offensive",
+        0.36522164940834045
+      ],
+      [
+        "words",
+        0.36498433351516724
+      ],
+      [
+        "vulgar",
+        0.353113055229187
+      ],
+      [
+        "civility",
+        0.3384415805339813
+      ],
+      [
+        "obscenity",
+        0.3357018828392029
+      ],
+      [
+        "speech",
+        0.3351168930530548
+      ]
+    ]
+  },
+  "topics": [
+    1,
+    1,
+    2,
+    2,
+    1,
+    -1,
+    3,
+    0,
+    2,
+    -1,
+    3,
+    2,
+    0,
+    0,
+    0,
+    -1,
+    0,
+    2,
+    0,
+    2,
+    3,
+    -1,
+    3,
+    0,
+    0,
+    0,
+    0,
+    2,
+    3,
+    1,
+    1,
+    1,
+    0,
+    -1,
+    3,
+    1,
+    2,
+    1,
+    1,
+    0,
+    0,
+    3,
+    0,
+    0,
+    0,
+    3,
+    0,
+    1,
+    3,
+    -1,
+    2,
+    0,
+    -1,
+    3,
+    0,
+    -1,
+    1,
+    2,
+    2,
+    1,
+    -1,
+    0,
+    2,
+    0,
+    2,
+    1,
+    1,
+    1,
+    -1,
+    0,
+    3,
+    0,
+    0,
+    -1,
+    2,
+    3,
+    3,
+    1,
+    2,
+    0,
+    0,
+    1,
+    2,
+    1,
+    2,
+    3,
+    0,
+    -1,
+    0,
+    2,
+    -1,
+    0,
+    1,
+    0,
+    0,
+    0,
+    0,
+    1,
+    1,
+    2,
+    1,
+    1,
+    3,
+    1,
+    2,
+    -1,
+    1,
+    1,
+    0,
+    0,
+    1,
+    0,
+    1,
+    1,
+    3,
+    1,
+    0,
+    0,
+    1,
+    1,
+    1,
+    3,
+    1,
+    2,
+    0,
+    0,
+    3,
+    0,
+    0,
+    0,
+    0,
+    3,
+    0,
+    0,
+    1,
+    3,
+    1,
+    2,
+    -1,
+    1,
+    1,
+    0,
+    0,
+    0,
+    0,
+    0,
+    -1,
+    3,
+    0,
+    0,
+    2,
+    2,
+    2,
+    1,
+    -1,
+    0
+  ],
+  "topic_sizes": {
+    "1": 39,
+    "2": 25,
+    "-1": 17,
+    "3": 21,
+    "0": 54
+  },
+  "topic_mapper": [
+    [
+      -1,
+      -1,
+      -1
+    ],
+    [
+      0,
+      0,
+      0
+    ],
+    [
+      1,
+      1,
+      3
+    ],
+    [
+      2,
+      2,
+      1
+    ],
+    [
+      3,
+      3,
+      2
+    ]
+  ],
+  "topic_labels": {
+    "-1": "-1_extremism_extremist_terrorism_radical",
+    "0": "0_phishing_theft_scammers_security",
+    "1": "1_addiction_violence_cyber_content",
+    "2": "2_cyberbullying_bullying_cyber_cyberstalking",
+    "3": "3_profanity_derogatory_vulgarity_hate"
+  },
+  "custom_labels": null,
+  "_outliers": 1,
+  "topic_aspects": {}
+}