3
3
"""
4
4
5
5
from pathlib import Path
6
+ from typing import Optional
6
7
7
8
from delphin .util import Lexer
8
9
from delphin import predicate
@@ -184,7 +185,7 @@ def _decode_mrs(lexer):
184
185
variables = {}
185
186
lexer .expect_type (LBRACK )
186
187
lnk = _decode_lnk (lexer )
187
- surface = lexer .accept_type (DQSTRING )
188
+ surface = _decode_dqstring ( lexer .accept_type (DQSTRING ) )
188
189
feature = lexer .accept_type (FEATURE )
189
190
while feature is not None :
190
191
feature = feature .upper ()
@@ -223,6 +224,12 @@ def _decode_lnk(lexer):
223
224
return lnk
224
225
225
226
227
+ def _decode_dqstring (dqstring : Optional [str ]) -> Optional [str ]:
228
+ if dqstring is not None :
229
+ dqstring = _unescape (dqstring )
230
+ return dqstring
231
+
232
+
226
233
def _decode_variable (lexer , variables ):
227
234
var = lexer .expect_type (SYMBOL ).lower ()
228
235
if var not in variables :
@@ -243,17 +250,16 @@ def _decode_rel(lexer, variables):
243
250
args = {}
244
251
surface = None
245
252
lexer .expect_type (LBRACK )
246
- pred = predicate .normalize (
247
- lexer .choice_type (DQSTRING , SQSYMBOL , PREDICATE , SYMBOL )[1 ])
253
+ pred = _decode_predicate (lexer )
248
254
lnk = _decode_lnk (lexer )
249
- surface = lexer .accept_type (DQSTRING )
255
+ surface = _decode_dqstring ( lexer .accept_type (DQSTRING ) )
250
256
_ , label = lexer .expect ((FEATURE , 'LBL' ), (SYMBOL , None ))
251
257
# any remaining are arguments or a constant
252
258
role = lexer .accept_type (FEATURE )
253
259
while role is not None :
254
260
role = role .upper ()
255
261
if role == 'CARG' :
256
- value = lexer .expect_type (DQSTRING )
262
+ value = _decode_dqstring ( lexer .expect_type (DQSTRING ) )
257
263
else :
258
264
value = _decode_variable (lexer , variables )
259
265
args [role ] = value
@@ -267,6 +273,15 @@ def _decode_rel(lexer, variables):
267
273
base = None )
268
274
269
275
276
+ def _decode_predicate (lexer ) -> str :
277
+ predstring = lexer .accept_type (DQSTRING )
278
+ if predstring is not None :
279
+ predstring = _decode_dqstring (predstring )
280
+ else :
281
+ predstring = lexer .choice_type (SQSYMBOL , PREDICATE , SYMBOL )[1 ]
282
+ return predicate .normalize (predstring )
283
+
284
+
270
285
def _decode_cons (lexer , cls , variables ):
271
286
lhs = _decode_variable (lexer , variables )
272
287
relation = lexer .expect_type (SYMBOL ).lower ()
@@ -312,7 +327,7 @@ def _encode_surface_info(m, lnk):
312
327
if m .lnk :
313
328
tokens .append (str (m .lnk ))
314
329
if m .surface is not None :
315
- tokens .append ('"{}"' .format (m .surface ))
330
+ tokens .append ('"{}"' .format (_escape ( m .surface ) ))
316
331
return tokens
317
332
318
333
@@ -351,12 +366,12 @@ def _encode_rels(rels, varprops, lnk, indent):
351
366
pred += str (rel .lnk )
352
367
reltoks = ['[' , pred ]
353
368
if lnk and rel .surface is not None :
354
- reltoks .append ('"{}"' .format (rel .surface ))
369
+ reltoks .append ('"{}"' .format (_escape ( rel .surface ) ))
355
370
reltoks .extend (('LBL:' , rel .label ))
356
371
for role in sorted (rel .args , key = role_priority ):
357
372
arg = rel .args [role ]
358
373
if role == CONSTANT_ROLE :
359
- arg = '"{}"' .format (arg )
374
+ arg = '"{}"' .format (_escape ( arg ) )
360
375
else :
361
376
arg = _encode_variable (arg , varprops )
362
377
reltoks .extend ((role + ':' , arg ))
@@ -383,3 +398,37 @@ def _encode_icons(icons, varprops):
383
398
if tokens :
384
399
tokens = ['ICONS: <' ] + [' ' .join (tokens )] + ['>' ]
385
400
return tokens
401
+
402
+
403
+ # Character Escaping
404
+
405
+
406
+ _ESCAPES = {
407
+ '\\ ' : '\\ \\ ' ,
408
+ '"' : '\\ "' ,
409
+ }
410
+
411
+
412
+ _UNESCAPES = {
413
+ '\\ \\ ' : '\\ ' ,
414
+ '\\ "' : '"' ,
415
+ }
416
+
417
+
418
+ def _escape (s : str ) -> str :
419
+ return "" .join (_ESCAPES .get (c , c ) for c in s )
420
+
421
+
422
+ def _unescape (s : str ) -> str :
423
+ if not s :
424
+ return s
425
+ cs = []
426
+ i = 0
427
+ while i < len (s ):
428
+ if s [i ] == '\\ ' and (i + 1 ) < len (s ):
429
+ cs .append (s [i + 1 ])
430
+ i += 2
431
+ else :
432
+ cs .append (s [i ])
433
+ i += 1
434
+ return "" .join (cs )
0 commit comments