get_error_rate for WER

MikeSmithEU · MikeSmithEU · commit 20b95706bba6 · 2021-01-23T17:05:24.000+01:00
diff --git a/setup.py b/setup.py
@@ -62,6 +62,7 @@ def filter_requirements(line):
         'jsonrpcserver>=4.0.1',
         'gunicorn>=19.9.0',
         'docutils>=0.14',
+        'edit_distance>=1.0.4',
         'editdistance>=0.5.3',
         'Unidecode>=1.1.2',
     ],
diff --git a/src/benchmarkstt/diff/__init__.py b/src/benchmarkstt/diff/__init__.py
@@ -64,4 +64,22 @@ def get_opcodes(self):
     def get_opcode_counts(self):
         raise NotImplementedError()
 
+    @abstractmethod
+    def get_error_rate(self):
+        raise NotImplementedError()
+
+
+class Differ(DifferInterface, metaclass=ABCMeta):
+    def get_opcode_counts(self):
+        return get_opcode_counts(self.get_opcodes())
+
+    def get_error_rate(self):
+        counts = self.get_opcode_counts()
+
+        changes = counts.replace + counts.delete + counts.insert
+        total = counts.equal + counts.replace + counts.delete
+
+        return changes / total
+
+
 factory = CoreFactory(DifferInterface, False)
diff --git a/src/benchmarkstt/diff/core.py b/src/benchmarkstt/diff/core.py
@@ -5,6 +5,7 @@
 from difflib import SequenceMatcher
 from benchmarkstt.diff import Differ
 import edit_distance
+import editdistance
 
 
 class RatcliffObershelp(Differ):
@@ -55,6 +56,11 @@ def __init__(self, a, b, **kwargs):
     def get_opcodes(self):
         return self.simplify_opcodes(self._matcher.get_opcodes())
 
+    def get_error_rate(self):
+        a = self._kwargs['a']
+        b = self._kwargs['b']
+        return editdistance.eval(a, b) / len(a)
+
     @staticmethod
     def simplify_opcodes(opcodes):
         new_codes = []
diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py
@@ -1,6 +1,6 @@
-from benchmarkstt.schema import Schema
+from benchmarkstt.schema import Schema, Item
 import logging
-from benchmarkstt.diff import Differ
+from benchmarkstt.diff import Differ, factory as differ_factory
 from benchmarkstt.diff.core import RatcliffObershelp
 from benchmarkstt.diff.formatter import format_diff
 from benchmarkstt.metrics import Metric
@@ -12,11 +12,13 @@
 OpcodeCounts = namedtuple('OpcodeCounts',
                           ('equal', 'replace', 'insert', 'delete'))
 
+type_schema = Union[Schema, list]
+
 
 def traversible(schema, key=None):
     if key is None:
         key = 'item'
-    return [word[key] for word in schema]
+    return [item if type(item) is str else item[key] for item in schema]
 
 
 def get_differ(a, b, differ_class: Differ):
@@ -41,7 +43,7 @@ def __init__(self, differ_class: Differ = None, dialect: str = None):
         self._differ_class = differ_class
         self._dialect = dialect
 
-    def compare(self, ref: Schema, hyp: Schema):
+    def compare(self, ref: type_schema, hyp: type_schema):
         differ = get_differ(ref, hyp, differ_class=self._differ_class)
         a = traversible(ref)
         b = traversible(hyp)
@@ -82,24 +84,17 @@ class WER(Metric):
     INS_PENALTY = 1
     SUB_PENALTY = 1
 
-    def __init__(self, mode=None, differ_class: Differ = None):
+    def __init__(self, mode=None, differ_class: Union[str, Differ, None] = None):
         self._mode = mode
 
         if differ_class is None:
             differ_class = RatcliffObershelp
         self._differ_class = differ_class
+
         if mode == self.MODE_HUNT:
             self.DEL_PENALTY = self.INS_PENALTY = .5
 
-    def compare(self, ref: Schema, hyp: Schema) -> float:
-        if self._mode == self.MODE_LEVENSHTEIN:
-            ref_list = [i['item'] for i in ref]
-            hyp_list = [i['item'] for i in hyp]
-            total_ref = len(ref_list)
-            if total_ref == 0:
-                return 1
-            return editdistance.eval(ref_list, hyp_list) / total_ref
-
+    def compare(self, ref: type_schema, hyp: type_schema) -> float:
         diffs = get_differ(ref, hyp, differ_class=self._differ_class)
 
         counts = diffs.get_opcode_counts()
@@ -141,25 +136,21 @@ class CER(Metric):
     will first be split into words, ['aa','bb','cc'], and
     then merged into a final string for evaluation: 'aabbcc'.
 
-    :param mode: 'levenshtein' (default).
-    :param differ_class: For future use.
+    :param differ_class: see :py:mod:`benchmarkstt.Differ.core`
     """
 
-    # CER modes
-    MODE_LEVENSHTEIN = 'levenshtein'
+    def __init__(self, differ_class: Union[str, Differ, None] = None):
+        self._differ_class = Levenshtein if differ_class is None else differ_class
 
-    def __init__(self, mode=None, differ_class=None):
-        self._mode = mode
-
-    def compare(self, ref: Schema, hyp: Schema):
-        ref_str = ''.join([i['item'] for i in ref])
-        hyp_str = ''.join([i['item'] for i in hyp])
-        total_ref = len(ref_str)
+    def compare(self, ref: type_schema, hyp: type_schema):
+        ref_str = ''.join(traversible(ref))
+        hyp_str = ''.join(traversible(hyp))
 
-        if total_ref == 0:
+        if len(ref_str) == 0:
             return 0 if len(hyp_str) == 0 else 1
 
-        return editdistance.eval(ref_str, hyp_str) / total_ref
+        diffs = get_differ(ref_str, hyp_str, differ_class=self._differ_class)
+        return diffs.get_error_rate()
 
 
 class DiffCounts(Metric):
@@ -169,12 +160,10 @@ class DiffCounts(Metric):
     :param differ_class: see :py:mod:`benchmarkstt.Differ.core`
     """
 
-    def __init__(self, differ_class: Differ = None):
-        if differ_class is None:
-            differ_class = RatcliffObershelp
+    def __init__(self, differ_class: Union[str, Differ, None] = None):
         self._differ_class = differ_class
 
-    def compare(self, ref: Schema, hyp: Schema) -> OpcodeCounts:
+    def compare(self, ref: type_schema, hyp: type_schema) -> OpcodeCounts:
         diffs = get_differ(ref, hyp, differ_class=self._differ_class)
         return diffs.get_opcode_counts()
 
diff --git a/src/benchmarkstt/schema.py b/src/benchmarkstt/schema.py
@@ -3,7 +3,6 @@
 """
 import json
 from collections.abc import Mapping
-from typing import Union
 from collections import defaultdict
 
 
@@ -51,6 +50,9 @@ def __iter__(self):
     def __repr__(self):
         return 'Item(%s)' % (self.json(),)
 
+    def __hash__(self):
+        return hash(self._val['item'])
+
     def json(self, **kwargs):
         return Schema.dumps(self, **kwargs)
 
diff --git a/tests/benchmarkstt/test_diff.py b/tests/benchmarkstt/test_diff.py
@@ -25,8 +25,8 @@ def clean_opcodes(opcodes):
 def test_simple_levenshtein_ratcliff_similarity():
     a = list('012345')
     b = list('023x45')
-    assert clean_opcodes(Levenshtein(a, b).get_opcodes()) == \
-           clean_opcodes(RatcliffObershelp(a, b).get_opcodes())
+    assert(clean_opcodes(Levenshtein(a, b).get_opcodes()) ==
+           clean_opcodes(RatcliffObershelp(a, b).get_opcodes()))
 
 
 @differs_decorator
@@ -35,42 +35,43 @@ def test_simple(differ):
         '0123456HIJkopq',
         '0123456HIJKlmnopq'
     )
-    assert clean_opcodes(sm.get_opcodes()) == \
+    assert(clean_opcodes(sm.get_opcodes()) ==
            clean_opcodes([('equal', 0, 10, 0, 10),
                           ('replace', 10, 11, 10, 14),
-                          ('equal', 11, 14, 14, 17)])
+                          ('equal', 11, 14, 14, 17)]))
 
 
 @differs_decorator
 def test_one_insert(differ):
     sm = differ('b' * 100, 'a' + 'b' * 100)
-    assert clean_opcodes(sm.get_opcodes()) == \
+    assert(clean_opcodes(sm.get_opcodes()) ==
            clean_opcodes([('insert', 0, 0, 0, 1),
-                          ('equal', 0, 100, 1, 101)])
+                          ('equal', 0, 100, 1, 101)]))
+
     sm = differ('b' * 100, 'b' * 50 + 'a' + 'b' * 50)
-    assert clean_opcodes(sm.get_opcodes()) == \
+    assert(clean_opcodes(sm.get_opcodes()) ==
            clean_opcodes([('equal', 0, 50, 0, 50),
                           ('insert', 50, 50, 50, 51),
-                          ('equal', 50, 100, 51, 101)])
+                          ('equal', 50, 100, 51, 101)]))
 
 
 @differs_decorator
 def test_one_delete(differ):
     sm = differ('a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
-    assert clean_opcodes(sm.get_opcodes()) == \
+    assert(clean_opcodes(sm.get_opcodes()) ==
            clean_opcodes([('equal', 0, 40, 0, 40),
                           ('delete', 40, 41, 40, 40),
-                          ('equal', 41, 81, 40, 80)])
+                          ('equal', 41, 81, 40, 80)]))
 
 
 def test_ratcliffobershelp():
     ref = "a b c d e f"
     hyp = "a b d e kfmod fgdjn idf giudfg diuf dufg idgiudgd"
     sm = RatcliffObershelp(ref, hyp)
-    assert clean_opcodes(sm.get_opcodes()) == \
+    assert(clean_opcodes(sm.get_opcodes()) ==
            clean_opcodes([('equal', 0, 3, 0, 3),
                           ('delete', 3, 5, 3, 3),
                           ('equal', 5, 10, 3, 8),
                           ('insert', 10, 10, 8, 9),
                           ('equal', 10, 11, 9, 10),
-                          ('insert', 11, 11, 10, 49)])
+                          ('insert', 11, 11, 10, 49)]))
diff --git a/tests/benchmarkstt/test_metrics_core.py b/tests/benchmarkstt/test_metrics_core.py
@@ -41,7 +41,6 @@ def test_wer(a, b, exp):
     assert WER(differ_class='levenshtein').compare(PlainText(a), PlainText(b)) == wer_levenshtein
 
 
-
 @pytest.mark.parametrize('a,b,entities_list,weights,exp_beer,exp_occ', [
     ['madam is here', 'adam is here', ['madam', 'here'], [100, 10], (1.0, 0.0), (1, 1)],
     ['theresa may is here', 'theresa may is there', ['theresa may', 'here'], [10, 100], (0.0, 1.0), (1, 1)],
@@ -113,5 +112,4 @@ def test_wa_beer(a, b, entities_list, weights, exp):
 def test_cer(a, b, exp):
     cer_levenshtein, = exp
 
-    assert CER(mode=CER.MODE_LEVENSHTEIN).compare(PlainText(a), PlainText(b)) == cer_levenshtein
-
+    assert CER(differ_class='levenshtein').compare(PlainText(a), PlainText(b)) == cer_levenshtein