IPA-Transcription-EN / phone_metrics.py
arunasrivastava's picture
without eval library
3a023fb
raw
history blame
4.1 kB
# phone_metrics.py
"""
This module implements phone error metrics based on the work from ginic/phone_errors.
Original implementation: https://huggingface.co/spaces/ginic/phone_errors
Citation:
@inproceedings{Mortensen-et-al:2016,
author = {David R. Mortensen and
Patrick Littell and
Akash Bharadwaj and
Kartik Goyal and
Chris Dyer and
Lori S. Levin},
title = {PanPhon: {A} Resource for Mapping {IPA} Segments to Articulatory Feature Vectors},
booktitle = {Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
pages = {3475--3484},
publisher = {{ACL}},
year = {2016}
}
"""
import numpy as np
import panphon.distance
from typing import List, Dict
class PhoneErrorMetrics:
def __init__(self, feature_model: str = "segment"):
"""Initialize the phone error metrics calculator.
Args:
feature_model (str): panphon feature parsing model ("strict", "permissive", or "segment")
"""
self.distance_computer = panphon.distance.Distance(feature_model=feature_model)
def _phone_error_rate(self, prediction: str, reference: str) -> float:
"""Compute phone error rate between prediction and reference.
Args:
prediction (str): Predicted IPA string
reference (str): Reference IPA string
Returns:
float: Phone error rate
"""
if not reference:
raise ValueError("Reference string cannot be empty")
pred_phones = self.distance_computer.fm.ipa_segs(prediction)
ref_phones = self.distance_computer.fm.ipa_segs(reference)
phone_edits = self.distance_computer.min_edit_distance(
lambda x: 1, # deletion cost
lambda x: 1, # insertion cost
lambda x, y: 0 if x == y else 1, # substitution cost
[[]],
pred_phones,
ref_phones
)
return phone_edits / len(ref_phones)
def compute(self,
predictions: List[str],
references: List[str],
is_normalize_pfer: bool = False) -> Dict:
"""Compute phone error metrics between predictions and references.
Args:
predictions (List[str]): List of predicted IPA strings
references (List[str]): List of reference IPA strings
is_normalize_pfer (bool): Whether to normalize phone feature error rates
Returns:
Dict containing:
- phone_error_rates: List of PER for each pair
- mean_phone_error_rate: Average PER
- phone_feature_error_rates: List of PFER for each pair
- mean_phone_feature_error_rate: Average PFER
- feature_error_rates: List of FER for each pair
- mean_feature_error_rate: Average FER
"""
phone_error_rates = []
feature_error_rates = []
hamming_distances = []
for pred, ref in zip(predictions, references):
if is_normalize_pfer:
hd = self.distance_computer.hamming_feature_edit_distance_div_maxlen(pred, ref)
else:
hd = self.distance_computer.hamming_feature_edit_distance(pred, ref)
hamming_distances.append(hd)
per = self._phone_error_rate(pred, ref)
phone_error_rates.append(per)
fer = self.distance_computer.feature_error_rate(pred, ref)
feature_error_rates.append(fer)
return {
"phone_error_rates": phone_error_rates,
"mean_phone_error_rate": float(np.mean(phone_error_rates)),
"phone_feature_error_rates": hamming_distances,
"mean_phone_feature_error_rate": float(np.mean(hamming_distances)),
"feature_error_rates": feature_error_rates,
"mean_feature_error_rate": float(np.mean(feature_error_rates))
}