Spaces:
Runtime error
Runtime error
Thomas De Decker
commited on
Commit
·
f6df2a0
1
Parent(s):
cf55b94
Fix truncation bug + Update description
Browse files
README.md
CHANGED
@@ -8,15 +8,6 @@ sdk_version: 1.2.0
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
models:
|
12 |
-
- DeDeckerThomas/keyphrase-extraction-kbir-inspec
|
13 |
-
- DeDeckerThomas/keyphrase-extraction-distilbert-openkp
|
14 |
-
- DeDeckerThomas/keyphrase-extraction-distilbert-kptimes
|
15 |
-
- DeDeckerThomas/keyphrase-extraction-distilbert-inspec
|
16 |
-
- DeDeckerThomas/keyphrase-extraction-kbir-kpcrowd
|
17 |
-
- DeDeckerThomas/keyphrase-generation-keybart-inspec
|
18 |
-
- DeDeckerThomas/keyphrase-generation-t5-small-inspec
|
19 |
-
- DeDeckerThomas/keyphrase-generation-t5-small-openkp
|
20 |
---
|
21 |
|
22 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
|
app.py
CHANGED
@@ -35,7 +35,6 @@ def get_annotated_text(text, keyphrases, color="#d294ff"):
|
|
35 |
rf"$K:{keyphrases.index(keyphrase)}\2",
|
36 |
text,
|
37 |
flags=re.I,
|
38 |
-
count=1,
|
39 |
)
|
40 |
|
41 |
result = []
|
@@ -131,8 +130,7 @@ from a text. Since this is a time-consuming process, Artificial Intelligence is
|
|
131 |
Currently, classical machine learning methods, that use statistics and linguistics, are widely used
|
132 |
for the extraction process. The fact that these methods have been widely used in the community has
|
133 |
the advantage that there are many easy-to-use libraries. Now with the recent innovations in
|
134 |
-
|
135 |
-
keyphrase extraction can be improved. These new methods also focus on the semantics and
|
136 |
context of a document, which is quite an improvement.
|
137 |
|
138 |
This space gives you the ability to test around with some keyphrase extraction and generation models.
|
|
|
35 |
rf"$K:{keyphrases.index(keyphrase)}\2",
|
36 |
text,
|
37 |
flags=re.I,
|
|
|
38 |
)
|
39 |
|
40 |
result = []
|
|
|
130 |
Currently, classical machine learning methods, that use statistics and linguistics, are widely used
|
131 |
for the extraction process. The fact that these methods have been widely used in the community has
|
132 |
the advantage that there are many easy-to-use libraries. Now with the recent innovations in
|
133 |
+
NLP, transformers can be used to improve keyphrase extraction. Transformers also focus on the semantics and
|
|
|
134 |
context of a document, which is quite an improvement.
|
135 |
|
136 |
This space gives you the ability to test around with some keyphrase extraction and generation models.
|
pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc
CHANGED
Binary files a/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_extraction_pipeline.cpython-39.pyc differ
|
|
pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc
CHANGED
Binary files a/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc and b/pipelines/__pycache__/keyphrase_generation_pipeline.cpython-39.pyc differ
|
|
pipelines/keyphrase_extraction_pipeline.py
CHANGED
@@ -11,7 +11,9 @@ class KeyphraseExtractionPipeline(TokenClassificationPipeline):
|
|
11 |
def __init__(self, model, *args, **kwargs):
|
12 |
super().__init__(
|
13 |
model=AutoModelForTokenClassification.from_pretrained(model),
|
14 |
-
tokenizer=AutoTokenizer.from_pretrained(
|
|
|
|
|
15 |
*args,
|
16 |
**kwargs
|
17 |
)
|
|
|
11 |
def __init__(self, model, *args, **kwargs):
|
12 |
super().__init__(
|
13 |
model=AutoModelForTokenClassification.from_pretrained(model),
|
14 |
+
tokenizer=AutoTokenizer.from_pretrained(
|
15 |
+
model, truncate=True
|
16 |
+
),
|
17 |
*args,
|
18 |
**kwargs
|
19 |
)
|
pipelines/keyphrase_generation_pipeline.py
CHANGED
@@ -8,7 +8,7 @@ class KeyphraseGenerationPipeline(Text2TextGenerationPipeline):
|
|
8 |
def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
|
9 |
super().__init__(
|
10 |
model=AutoModelForSeq2SeqLM.from_pretrained(model),
|
11 |
-
tokenizer=AutoTokenizer.from_pretrained(model),
|
12 |
*args,
|
13 |
**kwargs
|
14 |
)
|
|
|
8 |
def __init__(self, model, keyphrase_sep_token=";", *args, **kwargs):
|
9 |
super().__init__(
|
10 |
model=AutoModelForSeq2SeqLM.from_pretrained(model),
|
11 |
+
tokenizer=AutoTokenizer.from_pretrained(model, truncate=True),
|
12 |
*args,
|
13 |
**kwargs
|
14 |
)
|