ebu · MikeSmithEU · Nov 5, 2020 · Nov 7, 2020 · Nov 7, 2020 · Nov 7, 2020
diff --git a/docs/ext/autoclassmembersdiagram.py b/docs/ext/autoclassmembersdiagram.py
@@ -39,6 +39,7 @@ class MagicTraits(object):
         "__getattr__": "attributes",
         "__getattribute__": "attributes",
         "__len__": "len",
+        "__hash__": "hashable",
         "__subclasshook__": False,
         "__repr__": False,
         "__str__": False,

diff --git a/setup.py b/setup.py
@@ -62,6 +62,7 @@ def filter_requirements(line):
         'jsonrpcserver>=4.0.1',
         'gunicorn>=19.9.0',
         'docutils>=0.14',
+        'edit_distance>=1.0.4',
         'editdistance>=0.5.3',
         'Unidecode>=1.1.2',
     ],

diff --git a/src/benchmarkstt/diff/__init__.py b/src/benchmarkstt/diff/__init__.py
@@ -2,11 +2,39 @@
 Responsible for calculating differences.
 """
 
-from abc import ABC, abstractmethod
+from abc import ABC, ABCMeta, abstractmethod
 from benchmarkstt.factory import CoreFactory
+from collections import namedtuple
 
 
-class Differ(ABC):
+OpcodeCounts = namedtuple('OpcodeCounts',
+                          ('equal', 'replace', 'insert', 'delete'))
+
+
+def get_opcode_counts(opcodes) -> OpcodeCounts:
+    counts = OpcodeCounts(0, 0, 0, 0)._asdict()
+    for tag, alo, ahi, blo, bhi in opcodes:
+        if tag == 'equal':
+            counts[tag] += ahi - alo
+        elif tag == 'insert':
+            counts[tag] += bhi - blo
+        elif tag == 'delete':
+            counts[tag] += ahi - alo
+        elif tag == 'replace':
+            ca = ahi - alo
+            cb = bhi - blo
+            if ca < cb:
+                counts['insert'] += cb - ca
+                counts['replace'] += ca
+            elif ca > cb:
+                counts['delete'] += ca - cb
+                counts['replace'] += cb
+            else:
+                counts[tag] += ahi - alo
+    return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete'])
+
+
+class DifferInterface(ABC):
     @abstractmethod
     def __init__(self, a, b):
         """
@@ -32,5 +60,31 @@ def get_opcodes(self):
         """
         raise NotImplementedError()
 
+    @abstractmethod
+    def get_opcode_counts(self):
+        raise NotImplementedError()
+
+    @abstractmethod
+    def get_error_rate(self):
+        raise NotImplementedError()
+
+
+class Differ(DifferInterface, metaclass=ABCMeta):
+    """
+    Provides pre-made (probably sub-optimal) implementations of
+    get_opcode_counts() and get_error_rate()
+    """
+
+    def get_opcode_counts(self):
+        return get_opcode_counts(self.get_opcodes())
+
+    def get_error_rate(self):
+        counts = self.get_opcode_counts()
+
+        changes = counts.replace + counts.delete + counts.insert
+        total = counts.equal + counts.replace + counts.delete
+
+        return changes / total
+
 
-factory = CoreFactory(Differ, False)
+factory = CoreFactory(DifferInterface, False)
diff --git a/src/benchmarkstt/diff/core.py b/src/benchmarkstt/diff/core.py
@@ -4,6 +4,8 @@
 
 from difflib import SequenceMatcher
 from benchmarkstt.diff import Differ
+import edit_distance
+import editdistance
 
 
 class RatcliffObershelp(Differ):
@@ -12,9 +14,7 @@ class RatcliffObershelp(Differ):
 
     From difflib.SequenceMatcher_ (Copyright_ 2001-2020, Python Software Foundation.)
 
-        SequenceMatcher is a flexible class for comparing pairs of sequences of
-        any type, so long as the sequence elements are hashable.  The basic
-        algorithm predates, and is a little fancier than, an algorithm
+        The basic algorithm predates, and is a little fancier than, an algorithm
         published in the late 1980's by Ratcliff and Obershelp under the
         hyperbolic name "gestalt pattern matching".  The basic idea is to find
         the longest contiguous matching subsequence that contains no "junk"
@@ -29,11 +29,56 @@ class RatcliffObershelp(Differ):
     """
 
     def __init__(self, a, b, **kwargs):
-        if 'autojunk' not in kwargs:
-            kwargs['autojunk'] = False
         kwargs['a'] = a
         kwargs['b'] = b
-        self.matcher = SequenceMatcher(**kwargs)
+        self._kwargs = kwargs
+        self._matcher = SequenceMatcher(**self._kwargs)
 
     def get_opcodes(self):
-        return self.matcher.get_opcodes()
+        return self._matcher.get_opcodes()
+
+
+class Levenshtein(Differ):
+    """
+    Levenshtein_ distance is the minimum edit distance.
+
+    .. _Levenshtein: https://en.wikipedia.org/wiki/Levenshtein_distance
+    """
+
+    def __init__(self, a, b, **kwargs):
+        kwargs['a'] = a
+        kwargs['b'] = b
+        if 'action_function' not in kwargs:
+            kwargs['action_function'] = edit_distance.highest_match_action
+        self._kwargs = kwargs
+        self._matcher = edit_distance.SequenceMatcher(**self._kwargs)
+
+    def get_opcodes(self):
+        raise NotImplementedError("not supported by %r" % (self,))
+
+    def get_error_rate(self):
+        a = self._kwargs['a']
+        b = self._kwargs['b']
+        len_a = len(a)
+        if len_a == 0:
+            return 0 if len(b) == 0 else 1
+        return editdistance.eval(a, b) / len_a
+
+    @staticmethod
+    def simplify_opcodes(opcodes):
+        new_codes = []
+        prev = None
+        for cur in opcodes:
+            if prev is None:
+                prev = cur
+            elif cur[0] == prev[0]:
+                prev[2] = cur[2]
+                prev[4] = cur[4]
+            else:
+                new_codes.append(tuple(prev))
+                prev = cur
+
+        if prev is not None:
+            new_codes.append(tuple(prev))
+
+        return new_codes
diff --git a/src/benchmarkstt/metrics/core.py b/src/benchmarkstt/metrics/core.py
@@ -1,8 +1,9 @@
 from benchmarkstt.schema import Schema
 import logging
-import json
-from benchmarkstt.diff import Differ
-from benchmarkstt.diff.core import RatcliffObershelp
+from collections import namedtuple
+from typing import Union
+from benchmarkstt.diff import DifferInterface, factory as differ_factory
+from benchmarkstt.diff.core import RatcliffObershelp, Levenshtein
 from benchmarkstt.diff.formatter import format_diff
 from benchmarkstt.metrics import Metric
 from collections import namedtuple
@@ -13,57 +14,39 @@
 OpcodeCounts = namedtuple('OpcodeCounts',
                           ('equal', 'replace', 'insert', 'delete'))
 
+type_schema = Union[Schema, list]
+type_differ = DifferInterface
+
 
 def traversible(schema, key=None):
     if key is None:
         key = 'item'
-    return [word[key] for word in schema]
-
-
-def get_opcode_counts(opcodes) -> OpcodeCounts:
-    counts = OpcodeCounts(0, 0, 0, 0)._asdict()
-    for tag, alo, ahi, blo, bhi in opcodes:
-        if tag == 'equal':
-            counts[tag] += ahi - alo
-        elif tag == 'insert':
-            counts[tag] += bhi - blo
-        elif tag == 'delete':
-            counts[tag] += ahi - alo
-        elif tag == 'replace':
-            ca = ahi - alo
-            cb = bhi - blo
-            if ca < cb:
-                counts['insert'] += cb - ca
-                counts['replace'] += ca
-            elif ca > cb:
-                counts['delete'] += ca - cb
-                counts['replace'] += cb
-            else:
-                counts[tag] += ahi - alo
-    return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete'])
+    return [item if type(item) is str else item[key] for item in schema]
 
 
-def get_differ(a, b, differ_class: Differ):
-    if differ_class is None:
-        # differ_class = HuntMcIlroy
+def get_differ(a, b, differ_class: type_differ):
+    if differ_class is None or differ_class == '':
         differ_class = RatcliffObershelp
+    elif type(differ_class) is str:
+        differ_class = differ_factory[differ_class]
     return differ_class(traversible(a), traversible(b))
 
 
 class WordDiffs(Metric):
     """
     Present differences on a per-word basis
 
+    :param differ_class: see :py:mod:`benchmarkstt.diff.core`
     :param dialect: Presentation format. Default is 'ansi'.
+    :example differ_class: 'levenshtein'
     :example dialect: 'html'
-    :param differ_class: For future use.
     """
 
-    def __init__(self, dialect=None, differ_class: Differ = None):
+    def __init__(self, differ_class: type_differ = None, dialect: str = None):
         self._differ_class = differ_class
         self._dialect = dialect
 
-    def compare(self, ref: Schema, hyp: Schema):
+    def compare(self, ref: type_schema, hyp: type_schema):
         differ = get_differ(ref, hyp, differ_class=self._differ_class)
         a = traversible(ref)
         b = traversible(hyp)
@@ -92,58 +75,44 @@ class WER(Metric):
 
     See https://docs.python.org/3/library/difflib.html
 
-    [Mode: 'levenshtein'] In the context of WER, Levenshtein
-    distance is the minimum edit distance computed at the
-    word level. This implementation uses the Editdistance
-    c++ implementation by Hiroyuki Tanaka:
-    https://github.com/aflc/editdistance. See:
-    https://en.wikipedia.org/wiki/Levenshtein_distance
-
     :param mode: 'strict' (default), 'hunt' or 'levenshtein'.
-    :param differ_class: For future use.
+    :param differ_class: see :py:mod:`benchmarkstt.diff.core`
     """
 
     # WER modes
     MODE_STRICT = 'strict'
     MODE_HUNT = 'hunt'
-    MODE_LEVENSHTEIN = 'levenshtein'
 
     DEL_PENALTY = 1
     INS_PENALTY = 1
     SUB_PENALTY = 1
 
-    def __init__(self, mode=None, differ_class: Differ = None):
+    def __init__(self, mode=None, differ_class: Union[str, type_differ, None] = None):
         self._mode = mode
-        if mode == self.MODE_LEVENSHTEIN:
-            return
 
         if differ_class is None:
             differ_class = RatcliffObershelp
         self._differ_class = differ_class
+
         if mode == self.MODE_HUNT:
             self.DEL_PENALTY = self.INS_PENALTY = .5
 
-    def compare(self, ref: Schema, hyp: Schema) -> float:
-        if self._mode == self.MODE_LEVENSHTEIN:
-            ref_list = [i['item'] for i in ref]
-            hyp_list = [i['item'] for i in hyp]
-            total_ref = len(ref_list)
-            if total_ref == 0:
-                return 0 if len(hyp_list) == 0 else 1
-            return editdistance.eval(ref_list, hyp_list) / total_ref
-
+    def compare(self, ref: type_schema, hyp: type_schema) -> float:
         diffs = get_differ(ref, hyp, differ_class=self._differ_class)
 
-        counts = get_opcode_counts(diffs.get_opcodes())
+        try:
+            counts = diffs.get_opcode_counts()
 
-        changes = counts.replace * self.SUB_PENALTY + \
-            counts.delete * self.DEL_PENALTY + \
-            counts.insert * self.INS_PENALTY
+            changes = counts.replace * self.SUB_PENALTY + \
+                counts.delete * self.DEL_PENALTY + \
+                counts.insert * self.INS_PENALTY
 
-        total = counts.equal + counts.replace + counts.delete
-        if total == 0:
-            return 1 if changes else 0
-        return changes / total
+            total = counts.equal + counts.replace + counts.delete
+            if total == 0:
+                return 1 if changes else 0
+            return changes / total
+        except NotImplementedError:
+            return diffs.get_error_rate()
 
 
 class CER(Metric):
@@ -173,40 +142,36 @@ class CER(Metric):
     will first be split into words, ['aa','bb','cc'], and
     then merged into a final string for evaluation: 'aabbcc'.
 
-    :param mode: 'levenshtein' (default).
-    :param differ_class: For future use.
+    :param differ_class: see :py:mod:`benchmarkstt.diff.core`
     """
 
-    # CER modes
-    MODE_LEVENSHTEIN = 'levenshtein'
+    def __init__(self, differ_class: Union[str, type_differ, None] = None):
+        self._differ_class = Levenshtein if differ_class is None else differ_class
 
-    def __init__(self, mode=None, differ_class=None):
-        self._mode = mode
-
-    def compare(self, ref: Schema, hyp: Schema):
-        ref_str = ''.join([i['item'] for i in ref])
-        hyp_str = ''.join([i['item'] for i in hyp])
-        total_ref = len(ref_str)
+    def compare(self, ref: type_schema, hyp: type_schema):
+        ref_str = ''.join(traversible(ref))
+        hyp_str = ''.join(traversible(hyp))
 
-        if total_ref == 0:
+        if len(ref_str) == 0:
             return 0 if len(hyp_str) == 0 else 1
 
-        return editdistance.eval(ref_str, hyp_str) / total_ref
+        diffs = get_differ(ref_str, hyp_str, differ_class=self._differ_class)
+        return diffs.get_error_rate()
 
 
 class DiffCounts(Metric):
     """
     Get the amount of differences between reference and hypothesis
+
+    :param differ_class: see :py:mod:`benchmarkstt.diff.core`
     """
 
-    def __init__(self, differ_class: Differ = None):
-        if differ_class is None:
-            differ_class = RatcliffObershelp
+    def __init__(self, differ_class: Union[str, type_differ, None] = None):
         self._differ_class = differ_class
 
-    def compare(self, ref: Schema, hyp: Schema) -> OpcodeCounts:
+    def compare(self, ref: type_schema, hyp: type_schema) -> OpcodeCounts:
         diffs = get_differ(ref, hyp, differ_class=self._differ_class)
-        return get_opcode_counts(diffs.get_opcodes())
+        return diffs.get_opcode_counts()
 
 
 class BEER(Metric):