Skip to content

Commit 9440fa2

Browse files
committed
Fix #333: Make DMRX more compatible with the DTD
1 parent ce1d935 commit 9440fa2

File tree

3 files changed

+33
-6
lines changed

3 files changed

+33
-6
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
* SimpleDMRS no longer requires `index` or `top` to be specified when
1212
decoding ([#334])
13+
* DMRX codec now conforms to the DTD regarding the upper/lower case of
14+
attributes ([#333])
1315

1416
### Changed
1517

@@ -1514,4 +1516,5 @@ information about changes, except for
15141516
[#323]: https://github.com/delph-in/pydelphin/issues/323
15151517
[#324]: https://github.com/delph-in/pydelphin/issues/324
15161518
[#331]: https://github.com/delph-in/pydelphin/issues/331
1519+
[#333]: https://github.com/delph-in/pydelphin/issues/333
15171520
[#334]: https://github.com/delph-in/pydelphin/issues/334

delphin/codecs/dmrx.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def _decode_node(elem):
183183
sortinfo = _decode_sortinfo(elem.find('sortinfo'))
184184
type = None
185185
if CVARSORT in sortinfo:
186-
type = sortinfo.pop(CVARSORT)
186+
type = sortinfo.pop(CVARSORT).lower()
187187
return Node(id=int(elem.get('nodeid')),
188188
predicate=_decode_pred(elem.find('*[1]')),
189189
type=type,
@@ -202,11 +202,12 @@ def _decode_pred(elem):
202202
# sense CDATA #IMPLIED >
203203
# <!ELEMENT gpred (#PCDATA)>
204204
if elem.tag == 'gpred':
205-
return elem.text
205+
pred = elem.text
206206
elif elem.tag == 'realpred':
207-
return predicate.create(elem.get('lemma'),
207+
pred = predicate.create(elem.get('lemma'),
208208
elem.get('pos'),
209209
elem.get('sense'))
210+
return predicate.normalize(pred)
210211

211212

212213
def _decode_sortinfo(elem):
@@ -224,7 +225,8 @@ def _decode_sortinfo(elem):
224225
# perf (plus|minus|u) #IMPLIED
225226
# ind (plus|minus|u) #IMPLIED >
226227
# note: Just accept any properties, since these are ERG-specific
227-
return elem.attrib
228+
return {(key.upper() if key != CVARSORT else key): val.lower()
229+
for key, val in elem.attrib.items()}
228230

229231

230232
def _decode_link(elem):
@@ -295,12 +297,17 @@ def _encode_node(node, properties, lnk):
295297
attributes['carg'] = node.carg
296298
e = etree.Element('node', attrib=attributes)
297299
e.append(_encode_pred(node.predicate))
298-
e.append(etree.Element('sortinfo',
299-
attrib=node.sortinfo if properties else {}))
300+
if properties:
301+
sortinfo = {key.lower(): val.lower()
302+
for key, val in node.sortinfo.items()}
303+
else:
304+
sortinfo = {}
305+
e.append(etree.Element('sortinfo', attrib=sortinfo))
300306
return e
301307

302308

303309
def _encode_pred(pred):
310+
pred = predicate.normalize(pred)
304311
if predicate.is_surface(pred):
305312
lemma, pos, sense = predicate.split(pred)
306313
attributes = {'lemma': lemma, 'pos': pos}

tests/codecs/dmrx_test.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,26 @@ def test_round_trip(empty_dmrs, it_rains_dmrs):
2626
assert dmrx.decode(dmrx.encode(it_rains_dmrs)) == it_rains_dmrs
2727
assert dmrx.decode(dmrx.encode(it_rains_dmrs)) == it_rains_dmrs
2828

29+
2930
def test_no_properties(it_rains_dmrs):
3031
d = dmrx.decode(dmrx.encode(it_rains_dmrs))
3132
assert d.nodes[0].properties == {'TENSE': 'pres'}
3233
d = dmrx.decode(dmrx.encode(it_rains_dmrs, properties=False))
3334
assert d.nodes[0].properties == {}
3435

36+
37+
def test_case_sensitivity_issue_333(it_rains_dmrs):
38+
# https://github.com/delph-in/pydelphin/issues/333
39+
s = dmrx.encode(it_rains_dmrs)
40+
assert 'tense="pres"' in s
41+
d = dmrx.decode(
42+
'<dmrs-list>'
43+
'<dmrs cfrom="-1" cto="-1" top="10" index="10">'
44+
'<node nodeid="10" cfrom="-1" cto="-1">'
45+
'<realpred lemma="RAIN" pos="v" sense="1" />'
46+
'<sortinfo tense="PRES" cvarsort="E" />'
47+
'</node></dmrs></dmrs-list>'
48+
)
49+
assert d.nodes[0].predicate == '_rain_v_1'
50+
assert d.nodes[0].type == 'e'
51+
assert d.nodes[0].properties == {'TENSE': 'pres'}

0 commit comments

Comments
 (0)