Skip to content

Commit c23c4a4

Browse files
committed
Don't support opcodes for levenshtein (missing or incorrectly implemented in all tested libraries)
1 parent ab2217f commit c23c4a4

File tree

3 files changed

+23
-17
lines changed

3 files changed

+23
-17
lines changed

src/benchmarkstt/diff/core.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,15 @@ def __init__(self, a, b, **kwargs):
5454
self._matcher = edit_distance.SequenceMatcher(**self._kwargs)
5555

5656
def get_opcodes(self):
57-
return self.simplify_opcodes(self._matcher.get_opcodes())
57+
raise NotImplementedError("not supported by %r" % (self,))
5858

5959
def get_error_rate(self):
6060
a = self._kwargs['a']
6161
b = self._kwargs['b']
62-
return editdistance.eval(a, b) / len(a)
62+
len_a = len(a)
63+
if len_a == 0:
64+
return 0 if len(b) == 0 else 1
65+
return editdistance.eval(a, b) / len_a
6366

6467
@staticmethod
6568
def simplify_opcodes(opcodes):

src/benchmarkstt/metrics/core.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,19 @@ def __init__(self, mode=None, differ_class: Union[str, type_differ, None] = None
100100
def compare(self, ref: type_schema, hyp: type_schema) -> float:
101101
diffs = get_differ(ref, hyp, differ_class=self._differ_class)
102102

103-
counts = diffs.get_opcode_counts()
104-
105-
changes = counts.replace * self.SUB_PENALTY + \
106-
counts.delete * self.DEL_PENALTY + \
107-
counts.insert * self.INS_PENALTY
108-
109-
total = counts.equal + counts.replace + counts.delete
110-
if total == 0:
111-
return 1 if changes else 0
112-
return changes / total
103+
try:
104+
counts = diffs.get_opcode_counts()
105+
106+
changes = counts.replace * self.SUB_PENALTY + \
107+
counts.delete * self.DEL_PENALTY + \
108+
counts.insert * self.INS_PENALTY
109+
110+
total = counts.equal + counts.replace + counts.delete
111+
if total == 0:
112+
return 1 if changes else 0
113+
return changes / total
114+
except NotImplementedError:
115+
return diffs.get_error_rate()
113116

114117

115118
class CER(Metric):

tests/benchmarkstt/test_diff.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ def clean_opcodes(opcodes):
2222
return list(map(clean_opcode, opcodes))
2323

2424

25-
def test_simple_levenshtein_ratcliff_similarity():
26-
a = list('012345')
27-
b = list('023x45')
28-
assert(clean_opcodes(Levenshtein(a, b).get_opcodes()) ==
29-
clean_opcodes(RatcliffObershelp(a, b).get_opcodes()))
25+
# def test_simple_levenshtein_ratcliff_similarity():
26+
# a = list('012345')
27+
# b = list('023x45')
28+
# assert(clean_opcodes(Levenshtein(a, b).get_opcodes()) ==
29+
# clean_opcodes(RatcliffObershelp(a, b).get_opcodes()))
3030

3131

3232
@differs_decorator

0 commit comments

Comments
 (0)