Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/ext/autoclassmembersdiagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class MagicTraits(object):
"__getattr__": "attributes",
"__getattribute__": "attributes",
"__len__": "len",
"__hash__": "hashable",
"__subclasshook__": False,
"__repr__": False,
"__str__": False,
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def filter_requirements(line):
'jsonrpcserver>=4.0.1',
'gunicorn>=19.9.0',
'docutils>=0.14',
'edit_distance>=1.0.4',
'editdistance>=0.5.3',
'Unidecode>=1.1.2',
],
Expand Down
60 changes: 57 additions & 3 deletions src/benchmarkstt/diff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,39 @@
Responsible for calculating differences.
"""

from abc import ABC, abstractmethod
from abc import ABC, ABCMeta, abstractmethod
from benchmarkstt.factory import CoreFactory
from collections import namedtuple


class Differ(ABC):
OpcodeCounts = namedtuple('OpcodeCounts',
('equal', 'replace', 'insert', 'delete'))


def get_opcode_counts(opcodes) -> OpcodeCounts:
counts = OpcodeCounts(0, 0, 0, 0)._asdict()
for tag, alo, ahi, blo, bhi in opcodes:
if tag == 'equal':
counts[tag] += ahi - alo
elif tag == 'insert':
counts[tag] += bhi - blo
elif tag == 'delete':
counts[tag] += ahi - alo
elif tag == 'replace':
ca = ahi - alo
cb = bhi - blo
if ca < cb:
counts['insert'] += cb - ca
counts['replace'] += ca
elif ca > cb:
counts['delete'] += ca - cb
counts['replace'] += cb
else:
counts[tag] += ahi - alo
return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete'])


class DifferInterface(ABC):
@abstractmethod
def __init__(self, a, b):
"""
Expand All @@ -32,5 +60,31 @@ def get_opcodes(self):
"""
raise NotImplementedError()

@abstractmethod
def get_opcode_counts(self):
raise NotImplementedError()

@abstractmethod
def get_error_rate(self):
raise NotImplementedError()


class Differ(DifferInterface, metaclass=ABCMeta):
"""
Provides pre-made (probably sub-optimal) implementations of
get_opcode_counts() and get_error_rate()
"""

def get_opcode_counts(self):
return get_opcode_counts(self.get_opcodes())

def get_error_rate(self):
counts = self.get_opcode_counts()

changes = counts.replace + counts.delete + counts.insert
total = counts.equal + counts.replace + counts.delete

return changes / total


factory = CoreFactory(Differ, False)
factory = CoreFactory(DifferInterface, False)
59 changes: 52 additions & 7 deletions src/benchmarkstt/diff/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from difflib import SequenceMatcher
from benchmarkstt.diff import Differ
import edit_distance
import editdistance


class RatcliffObershelp(Differ):
Expand All @@ -12,9 +14,7 @@ class RatcliffObershelp(Differ):

From difflib.SequenceMatcher_ (Copyright_ 2001-2020, Python Software Foundation.)

SequenceMatcher is a flexible class for comparing pairs of sequences of
any type, so long as the sequence elements are hashable. The basic
algorithm predates, and is a little fancier than, an algorithm
The basic algorithm predates, and is a little fancier than, an algorithm
published in the late 1980's by Ratcliff and Obershelp under the
hyperbolic name "gestalt pattern matching". The basic idea is to find
the longest contiguous matching subsequence that contains no "junk"
Expand All @@ -29,11 +29,56 @@ class RatcliffObershelp(Differ):
"""

def __init__(self, a, b, **kwargs):
if 'autojunk' not in kwargs:
kwargs['autojunk'] = False
kwargs['a'] = a
kwargs['b'] = b
self.matcher = SequenceMatcher(**kwargs)
self._kwargs = kwargs
self._matcher = SequenceMatcher(**self._kwargs)

def get_opcodes(self):
return self.matcher.get_opcodes()
return self._matcher.get_opcodes()


class Levenshtein(Differ):
"""
Levenshtein_ distance is the minimum edit distance.

.. _Levenshtein: https://en.wikipedia.org/wiki/Levenshtein_distance
"""

def __init__(self, a, b, **kwargs):
kwargs['a'] = a
kwargs['b'] = b
if 'action_function' not in kwargs:
kwargs['action_function'] = edit_distance.highest_match_action
self._kwargs = kwargs
self._matcher = edit_distance.SequenceMatcher(**self._kwargs)

def get_opcodes(self):
raise NotImplementedError("not supported by %r" % (self,))

def get_error_rate(self):
a = self._kwargs['a']
b = self._kwargs['b']
len_a = len(a)
if len_a == 0:
return 0 if len(b) == 0 else 1
return editdistance.eval(a, b) / len_a

@staticmethod
def simplify_opcodes(opcodes):
new_codes = []
prev = None
for cur in opcodes:
if prev is None:
prev = cur
elif cur[0] == prev[0]:
prev[2] = cur[2]
prev[4] = cur[4]
else:
new_codes.append(tuple(prev))
prev = cur

if prev is not None:
new_codes.append(tuple(prev))

return new_codes
125 changes: 45 additions & 80 deletions src/benchmarkstt/metrics/core.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from benchmarkstt.schema import Schema
import logging
import json
from benchmarkstt.diff import Differ
from benchmarkstt.diff.core import RatcliffObershelp
from collections import namedtuple
from typing import Union
from benchmarkstt.diff import DifferInterface, factory as differ_factory
from benchmarkstt.diff.core import RatcliffObershelp, Levenshtein
from benchmarkstt.diff.formatter import format_diff
from benchmarkstt.metrics import Metric
from collections import namedtuple
Expand All @@ -13,57 +14,39 @@
OpcodeCounts = namedtuple('OpcodeCounts',
('equal', 'replace', 'insert', 'delete'))

type_schema = Union[Schema, list]
type_differ = DifferInterface


def traversible(schema, key=None):
if key is None:
key = 'item'
return [word[key] for word in schema]


def get_opcode_counts(opcodes) -> OpcodeCounts:
counts = OpcodeCounts(0, 0, 0, 0)._asdict()
for tag, alo, ahi, blo, bhi in opcodes:
if tag == 'equal':
counts[tag] += ahi - alo
elif tag == 'insert':
counts[tag] += bhi - blo
elif tag == 'delete':
counts[tag] += ahi - alo
elif tag == 'replace':
ca = ahi - alo
cb = bhi - blo
if ca < cb:
counts['insert'] += cb - ca
counts['replace'] += ca
elif ca > cb:
counts['delete'] += ca - cb
counts['replace'] += cb
else:
counts[tag] += ahi - alo
return OpcodeCounts(counts['equal'], counts['replace'], counts['insert'], counts['delete'])
return [item if type(item) is str else item[key] for item in schema]


def get_differ(a, b, differ_class: Differ):
if differ_class is None:
# differ_class = HuntMcIlroy
def get_differ(a, b, differ_class: type_differ):
if differ_class is None or differ_class == '':
differ_class = RatcliffObershelp
elif type(differ_class) is str:
differ_class = differ_factory[differ_class]
return differ_class(traversible(a), traversible(b))


class WordDiffs(Metric):
"""
Present differences on a per-word basis

:param differ_class: see :py:mod:`benchmarkstt.diff.core`
:param dialect: Presentation format. Default is 'ansi'.
:example differ_class: 'levenshtein'
:example dialect: 'html'
:param differ_class: For future use.
"""

def __init__(self, dialect=None, differ_class: Differ = None):
def __init__(self, differ_class: type_differ = None, dialect: str = None):
self._differ_class = differ_class
self._dialect = dialect

def compare(self, ref: Schema, hyp: Schema):
def compare(self, ref: type_schema, hyp: type_schema):
differ = get_differ(ref, hyp, differ_class=self._differ_class)
a = traversible(ref)
b = traversible(hyp)
Expand Down Expand Up @@ -92,58 +75,44 @@ class WER(Metric):

See https://docs.python.org/3/library/difflib.html

[Mode: 'levenshtein'] In the context of WER, Levenshtein
distance is the minimum edit distance computed at the
word level. This implementation uses the Editdistance
c++ implementation by Hiroyuki Tanaka:
https://github.com/aflc/editdistance. See:
https://en.wikipedia.org/wiki/Levenshtein_distance

:param mode: 'strict' (default), 'hunt' or 'levenshtein'.
:param differ_class: For future use.
:param differ_class: see :py:mod:`benchmarkstt.diff.core`
"""

# WER modes
MODE_STRICT = 'strict'
MODE_HUNT = 'hunt'
MODE_LEVENSHTEIN = 'levenshtein'

DEL_PENALTY = 1
INS_PENALTY = 1
SUB_PENALTY = 1

def __init__(self, mode=None, differ_class: Differ = None):
def __init__(self, mode=None, differ_class: Union[str, type_differ, None] = None):
self._mode = mode
if mode == self.MODE_LEVENSHTEIN:
return

if differ_class is None:
differ_class = RatcliffObershelp
self._differ_class = differ_class

if mode == self.MODE_HUNT:
self.DEL_PENALTY = self.INS_PENALTY = .5

def compare(self, ref: Schema, hyp: Schema) -> float:
if self._mode == self.MODE_LEVENSHTEIN:
ref_list = [i['item'] for i in ref]
hyp_list = [i['item'] for i in hyp]
total_ref = len(ref_list)
if total_ref == 0:
return 0 if len(hyp_list) == 0 else 1
return editdistance.eval(ref_list, hyp_list) / total_ref

def compare(self, ref: type_schema, hyp: type_schema) -> float:
diffs = get_differ(ref, hyp, differ_class=self._differ_class)

counts = get_opcode_counts(diffs.get_opcodes())
try:
counts = diffs.get_opcode_counts()

changes = counts.replace * self.SUB_PENALTY + \
counts.delete * self.DEL_PENALTY + \
counts.insert * self.INS_PENALTY
changes = counts.replace * self.SUB_PENALTY + \
counts.delete * self.DEL_PENALTY + \
counts.insert * self.INS_PENALTY

total = counts.equal + counts.replace + counts.delete
if total == 0:
return 1 if changes else 0
return changes / total
total = counts.equal + counts.replace + counts.delete
if total == 0:
return 1 if changes else 0
return changes / total
except NotImplementedError:
return diffs.get_error_rate()


class CER(Metric):
Expand Down Expand Up @@ -173,40 +142,36 @@ class CER(Metric):
will first be split into words, ['aa','bb','cc'], and
then merged into a final string for evaluation: 'aabbcc'.

:param mode: 'levenshtein' (default).
:param differ_class: For future use.
:param differ_class: see :py:mod:`benchmarkstt.diff.core`
"""

# CER modes
MODE_LEVENSHTEIN = 'levenshtein'
def __init__(self, differ_class: Union[str, type_differ, None] = None):
self._differ_class = Levenshtein if differ_class is None else differ_class

def __init__(self, mode=None, differ_class=None):
self._mode = mode

def compare(self, ref: Schema, hyp: Schema):
ref_str = ''.join([i['item'] for i in ref])
hyp_str = ''.join([i['item'] for i in hyp])
total_ref = len(ref_str)
def compare(self, ref: type_schema, hyp: type_schema):
ref_str = ''.join(traversible(ref))
hyp_str = ''.join(traversible(hyp))

if total_ref == 0:
if len(ref_str) == 0:
return 0 if len(hyp_str) == 0 else 1

return editdistance.eval(ref_str, hyp_str) / total_ref
diffs = get_differ(ref_str, hyp_str, differ_class=self._differ_class)
return diffs.get_error_rate()


class DiffCounts(Metric):
"""
Get the amount of differences between reference and hypothesis

:param differ_class: see :py:mod:`benchmarkstt.diff.core`
"""

def __init__(self, differ_class: Differ = None):
if differ_class is None:
differ_class = RatcliffObershelp
def __init__(self, differ_class: Union[str, type_differ, None] = None):
self._differ_class = differ_class

def compare(self, ref: Schema, hyp: Schema) -> OpcodeCounts:
def compare(self, ref: type_schema, hyp: type_schema) -> OpcodeCounts:
diffs = get_differ(ref, hyp, differ_class=self._differ_class)
return get_opcode_counts(diffs.get_opcodes())
return diffs.get_opcode_counts()


class BEER(Metric):
Expand Down
Loading