HalteroXHunter commited on
Commit
f8130b1
·
1 Parent(s): f6c494a

remove uneeded mod

Browse files
Files changed (1) hide show
  1. preprocessing.py +0 -115
preprocessing.py DELETED
@@ -1,115 +0,0 @@
1
- from itertools import chain
2
- from random import choice
3
- from typing import Any, Dict, List, Optional, Tuple
4
-
5
- from datasets import Dataset
6
-
7
-
8
- def adjust_predictions(refs, preds, choices):
9
- """Adjust predictions to match the length of references with either a special token or random choice."""
10
- adjusted_preds = []
11
- for ref, pred in zip(refs, preds):
12
- if len(pred) < len(ref):
13
- missing_count = len(ref) - len(pred)
14
- pred.extend([choice(choices) for _ in range(missing_count)])
15
- adjusted_preds.append(pred)
16
- return adjusted_preds
17
-
18
-
19
- def extract_aspects(data, specific_key, specific_val):
20
- """Extracts and returns a list of specified aspect details from the nested 'aspects' data."""
21
- return [item[specific_key][specific_val] for item in data]
22
-
23
-
24
- def absa_term_preprocess(references, predictions, subtask_key, subtask_value):
25
- """
26
- Preprocess the terms and polarities for aspect-based sentiment analysis.
27
-
28
- Args:
29
- references (List[Dict]): A list of dictionaries containing the actual terms and polarities under 'aspects'.
30
- predictions (List[Dict]): A list of dictionaries containing predicted aspect categories to terms and their sentiments.
31
-
32
- Returns:
33
- Tuple[List[str], List[str], List[str], List[str]]: A tuple containing lists of true aspect terms,
34
- adjusted predicted aspect terms, true polarities, and adjusted predicted polarities.
35
- """
36
-
37
- # Extract aspect terms and polarities
38
- truth_aspect_terms = extract_aspects(references, subtask_key, subtask_value)
39
- pred_aspect_terms = extract_aspects(predictions, subtask_key, subtask_value)
40
- truth_polarities = extract_aspects(references, subtask_key, "polarity")
41
- pred_polarities = extract_aspects(predictions, subtask_key, "polarity")
42
-
43
- # Define adjustment parameters
44
- special_token = "NONE" # For missing aspect terms
45
- sentiment_choices = [
46
- "positive",
47
- "negative",
48
- "neutral",
49
- "conflict",
50
- ] # For missing polarities
51
-
52
- # Adjust the predictions to match the length of references
53
- adjusted_pred_terms = adjust_predictions(
54
- truth_aspect_terms, pred_aspect_terms, [special_token]
55
- )
56
- adjusted_pred_polarities = adjust_predictions(
57
- truth_polarities, pred_polarities, sentiment_choices
58
- )
59
-
60
- return (
61
- flatten_list(truth_aspect_terms),
62
- flatten_list(adjusted_pred_terms),
63
- flatten_list(truth_polarities),
64
- flatten_list(adjusted_pred_polarities),
65
- )
66
-
67
-
68
- def flatten_list(nested_list):
69
- """Flatten a nested list into a single-level list."""
70
- return list(chain.from_iterable(nested_list))
71
-
72
-
73
- def extract_pred_terms(
74
- all_predictions: List[Dict[str, Dict[str, str]]]
75
- ) -> List[List]:
76
- """Extract and organize predicted terms from the sentiment analysis results."""
77
- pred_aspect_terms = []
78
- for pred in all_predictions:
79
- terms = [term for cat in pred.values() for term in cat.keys()]
80
- pred_aspect_terms.append(terms)
81
- return pred_aspect_terms
82
-
83
-
84
- def merge_aspects_and_categories(aspects, categories):
85
- result = []
86
-
87
- # Assuming both lists are of the same length and corresponding indices match
88
- for aspect, category in zip(aspects, categories):
89
- combined_entry = {
90
- "aspects": {"term": [], "polarity": []},
91
- "category": {"category": [], "polarity": []},
92
- }
93
-
94
- # Process aspect entries
95
- for cat_key, terms_dict in aspect.items():
96
- for term, polarity in terms_dict.items():
97
- combined_entry["aspects"]["term"].append(term)
98
- combined_entry["aspects"]["polarity"].append(polarity)
99
-
100
- # Add category details based on the aspect's key if available in categories
101
- if cat_key in category:
102
- combined_entry["category"]["category"].append(cat_key)
103
- combined_entry["category"]["polarity"].append(
104
- category[cat_key]
105
- )
106
-
107
- # Ensure all keys in category are accounted for
108
- for cat_key, polarity in category.items():
109
- if cat_key not in combined_entry["category"]["category"]:
110
- combined_entry["category"]["category"].append(cat_key)
111
- combined_entry["category"]["polarity"].append(polarity)
112
-
113
- result.append(combined_entry)
114
-
115
- return result