Skip to content

Commit 85d7584

Browse files
committed
Merge branch 'develop' for v0.9.2
2 parents 363fa0f + 7bd2cd6 commit 85d7584

28 files changed

+1429
-368
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ dist/
77
PyDelphin.egg-info/
88
docs/_build/
99
docs/_templates/
10+
docs/env/
1011
tmp/
1112
# Ignore the compiled python files
1213
*.pyc

CHANGELOG.md

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,58 @@ these changes are prefixed with "**BREAKING**"
55

66
## [Unreleased][unreleased]
77

8+
## [v0.9.2][]
9+
10+
### Python Versions
11+
12+
* Added Python 3.7 support
13+
14+
### Added
15+
16+
* `delphin.interfaces.ace.AceProcessError` for unrecoverable ACE crashes (#181)
17+
* ACE command-line options are allowed with `delphin process --options=...`
18+
and in `delphin.commands.process(..., options=...)` (#180)
19+
* `delphin.itsdb.Record.from_dict()`
20+
* `delphin.itsdb.Table` (#186)
21+
- `write()`
22+
- `attach()`
23+
- `detach()`
24+
- `commit()`
25+
- `is_attached()`
26+
- `list_changes()`
27+
- Various methods to replace `list` functionality: `append()`, `extend()`,
28+
`__len__()`, `__getitem__()`, `__setitem__()`, and `__iter__()`
29+
* `delphin.itsdb.TestSuite.process()` the `gzip` and `buffer_size` parameters
30+
* `-z` / `--gzip` option to the `delphin process` command
31+
32+
### Fixed
33+
34+
* `delphin.mrs.eds` parsing of predicates and empty property lists (#203)
35+
* `delphin.commands.convert` wrap the inner step of conversion in a try-except
36+
block to more gracefully handle crashes on a single input. (#200)
37+
* `delphin.itsdb.TestSuite.write()` re-enable the `append` parameter
38+
39+
### Removed
40+
41+
* **BREAKING** `name` parameter on `delphin.itsdb.Table`. The `name` attribute
42+
is still available as it is taken from the table's schema.
43+
44+
### Changed
45+
46+
* **BREAKING** `delphin.itsdb.Table` is no longer a subtype of `list`, meaning
47+
that some list-like behavior is gone. Most relevant functionality is
48+
recreated (e.g., `append()`, `extend()`, `__getitem__()`, `__len__()`, etc.)
49+
* **BREAKING** `delphin.itsdb.Record` can no longer be instantiated with a dict
50+
with column data; use `Record.from_dict()`
51+
* **BREAKING** `delphin.itsdb.Record` stores data as strings, always retrieves
52+
by default as cast types. Use `Record.get(..., cast=False)` to get the raw
53+
string value.
54+
55+
### Deprecated
56+
57+
* `delphin.mrs.convert()` use `delphin.commands.convert` instead
58+
59+
860
## [v0.9.1][]
961

1062
### Fixed

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ For bug requests, please provide the following, if possible:
2323
```python
2424
>>> from delphin.__about__ import __version__
2525
>>> __version__
26-
'0.9.0'
26+
'0.9.2'
2727
```
2828
* Python version (e.g. 2.7, 3.4, etc.)
2929

delphin/__about__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# the warehouse project:
44
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
55

6-
__version__ = '0.9.1'
6+
__version__ = '0.9.2'
77
__version_info__ = __version__.replace('.', ' ').replace('-', ' ').split()
88

99
__title__ = 'PyDelphin'
@@ -16,4 +16,4 @@
1616
__maintainer__ = 'Michael Wayne Goodman and Angie McMillan-Major'
1717

1818
__license__ = 'MIT'
19-
__copyright__ = '2013--2018 %s <%s> and contributors' % (__author__, __email__)
19+
__copyright__ = '2013--2019 %s <%s> and contributors' % (__author__, __email__)

delphin/commands.py

Lines changed: 79 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@
1313
import io
1414
import json
1515
from functools import partial
16+
import logging
1617

1718
from delphin import itsdb, tsql
1819
from delphin.mrs import xmrs
1920
from delphin.util import safe_int, SExpr
21+
from delphin.exceptions import PyDelphinException
2022

2123

24+
logging.basicConfig()
25+
2226
###############################################################################
2327
### CONVERT ###################################################################
2428

@@ -91,7 +95,36 @@ def convert(path, source_fmt, target_fmt, select='result:mrs',
9195
kwargs['predicate_modifiers'] = predicate_modifiers
9296
kwargs['properties'] = properties
9397

94-
return dumps(xs, **kwargs)
98+
# this is not a great way to improve robustness when converting
99+
# many representations, but it'll do until v1.0.0. Also, it only
100+
# improves robustness on the output, not the input.
101+
# Note that all the code below is to replace the following:
102+
# return dumps(xs, **kwargs)
103+
head, joiner, tail = _get_output_details(target_fmt)
104+
parts = []
105+
if pretty_print:
106+
joiner = joiner.strip() + '\n'
107+
def _trim(s):
108+
if head and s.startswith(head):
109+
s = s[len(head):].lstrip('\n')
110+
if tail and s.endswith(tail):
111+
s = s[:-len(tail)].rstrip('\n')
112+
return s
113+
for x in xs:
114+
try:
115+
s = dumps([x], **kwargs)
116+
except (PyDelphinException, KeyError, IndexError):
117+
logging.exception('could not convert representation')
118+
else:
119+
s = _trim(s)
120+
parts.append(s)
121+
# set these after so head and tail are used correctly in _trim
122+
if pretty_print:
123+
if head:
124+
head += '\n'
125+
if tail:
126+
tail = '\n' + tail
127+
return head + joiner.join(parts) + tail
95128

96129

97130
def _get_codec(codec, load=True):
@@ -146,6 +179,21 @@ def _get_codec(codec, load=True):
146179
raise ValueError('invalid target format: ' + codec)
147180

148181

182+
def _get_output_details(codec):
183+
if codec == 'mrx':
184+
return ('<mrs-list', '', '</mrs-list>')
185+
186+
elif codec == 'dmrx':
187+
from delphin.mrs import dmrx
188+
return ('<dmrs-list>', '', '</dmrs-list>')
189+
190+
elif codec in ('mrs-json', 'dmrs-json', 'eds-json'):
191+
return ('[', ',', ']')
192+
193+
else:
194+
return ('', ' ', '')
195+
196+
149197
# simulate json codecs for MRS and DMRS
150198

151199
class _MRS_JSON(object):
@@ -329,11 +377,13 @@ def mkprof(destination, source=None, relations=None, where=None,
329377
dts = itsdb.TestSuite(path=destination, relations=relations)
330378
# input is sentences on stdin
331379
if source is None:
332-
dts.write({'item': _lines_to_rows(sys.stdin)}, gzip=gzip)
380+
dts.write({'item': _lines_to_rows(sys.stdin, dts.relations)},
381+
gzip=gzip)
333382
# input is sentence file
334383
elif os.path.isfile(source):
335384
with open(source) as fh:
336-
dts.write({'item': _lines_to_rows(fh)}, gzip=gzip)
385+
dts.write({'item': _lines_to_rows(fh, dts.relations)},
386+
gzip=gzip)
337387
# input is source testsuite
338388
elif os.path.isdir(source):
339389
sts = itsdb.TestSuite(source)
@@ -372,20 +422,32 @@ def mkprof(destination, source=None, relations=None, where=None,
372422
print(fmt.format(stat.st_size, _red(filename + '.gz')))
373423

374424

375-
def _lines_to_rows(lines):
425+
def _lines_to_rows(lines, relations):
426+
# field indices only need to be computed once, so don't use
427+
# itsdb.Record.from_dict()
428+
i_id_idx = relations['item'].index('i-id')
429+
i_wf_idx = relations['item'].index('i-wf')
430+
i_input_idx = relations['item'].index('i-input')
431+
num_fields = len(relations['item'])
432+
433+
def make_row(i_id, i_wf, i_input):
434+
row = [None] * num_fields
435+
row[i_id_idx] = i_id
436+
row[i_wf_idx] = i_wf
437+
row[i_input_idx] = i_input
438+
return itsdb.Record(relations['item'], row)
439+
376440
for i, line in enumerate(lines):
377-
i_id = i * 10
378-
i_wf = 0 if line.startswith('*') else 1
379-
i_input = line[1:].strip() if line.startswith('*') else line.strip()
380-
yield {'i-id': i_id, 'i-wf': i_wf, 'i-input': i_input}
441+
i_wf, i_input = (0, line[1:]) if line.startswith('*') else (1, line)
442+
yield make_row(i * 10, i_wf, i_input.strip())
381443

382444

383445
###############################################################################
384446
### PROCESS ###################################################################
385447

386448
def process(grammar, testsuite, source=None, select=None,
387-
generate=False, transfer=False,
388-
all_items=False, result_id=None):
449+
generate=False, transfer=False, options=None,
450+
all_items=False, result_id=None, gzip=False):
389451
"""
390452
Process (e.g., parse) a [incr tsdb()] profile.
391453
@@ -413,10 +475,15 @@ def process(grammar, testsuite, source=None, select=None,
413475
(default: `False`)
414476
transfer (bool): if `True`, transfer instead of parse
415477
(default: `False`)
478+
options (list): list of ACE command-line options to use when
479+
invoking the ACE subprocess; unsupported options will
480+
give an error message
416481
all_items (bool): if `True`, don't exclude ignored items
417482
(those with `i-wf==2`) when parsing
418483
result_id (int): if given, only keep items with the specified
419484
`result-id`
485+
gzip (bool): if `True`, non-empty tables will be compressed
486+
with gzip
420487
"""
421488
from delphin.interfaces import ace
422489

@@ -441,18 +508,14 @@ def process(grammar, testsuite, source=None, select=None,
441508
target = itsdb.TestSuite(testsuite)
442509
column, tablename, condition = _interpret_selection(select, source)
443510
table = itsdb.Table(
444-
tablename,
445511
source[tablename].fields,
446512
tsql.select(
447513
'* from {} {}'.format(tablename, condition),
448514
source,
449515
cast=False))
450516

451-
with processor(grammar) as cpu:
452-
target.process(cpu, tablename + ':' + column, source=table)
453-
454-
target.write()
455-
517+
with processor(grammar, cmdargs=options) as cpu:
518+
target.process(cpu, ':' + column, source=table, gzip=gzip)
456519

457520
def _interpret_selection(select, source):
458521
queryobj = tsql.inspect_query('select ' + select)

delphin/exceptions.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,27 +19,27 @@ class ItsdbError(PyDelphinException):
1919

2020

2121
class XmrsError(PyDelphinException):
22-
"""Raised when there is an error processing *MRS objects."""
22+
"""Raised when there is an error processing \*MRS objects."""
2323
pass
2424

2525

2626
class XmrsSerializationError(XmrsError):
27-
"""Raised when serializing *MRS objects fails."""
27+
"""Raised when serializing \*MRS objects fails."""
2828
pass
2929

3030

3131
class XmrsDeserializationError(XmrsError):
32-
"""Raised when deserializing *MRS objects fails."""
32+
"""Raised when deserializing \*MRS objects fails."""
3333
pass
3434

3535

3636
class XmrsStructureError(XmrsError):
37-
"""Raised when a *MRS object is structurally ill-formed."""
37+
"""Raised when a \*MRS object is structurally ill-formed."""
3838
pass
3939

4040

4141
class XmrsWarning(PyDelphinWarning):
42-
"""Warning class for *MRS processing."""
42+
"""Warning class for \*MRS processing."""
4343
pass
4444

4545

delphin/interfaces/ace.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@
8080
from delphin.interfaces.base import ParseResponse, Processor
8181
from delphin.util import SExpr, stringtypes
8282
from delphin.__about__ import __version__ as pydelphin_version
83+
from delphin.exceptions import PyDelphinException
84+
85+
86+
class AceProcessError(PyDelphinException):
87+
"""Raised when the ACE process has crashed and cannot be recovered."""
8388

8489

8590
class AceProcess(Processor):
@@ -166,6 +171,8 @@ def _open(self):
166171
'os': platform(),
167172
'start': datetime.now()
168173
})
174+
if self._p.poll() is not None and self._p.returncode != 0:
175+
raise AceProcessError('Process closed on startup; see <stderr>.')
169176

170177
def __enter__(self):
171178
return self
@@ -587,7 +594,7 @@ def error(self, message):
587594
_ace_argparser.add_argument('--ubertagging', nargs='?', type=float)
588595
_ace_argparser.add_argument('--pcfg', type=argparse.FileType())
589596
_ace_argparser.add_argument('--rooted-derivations', action='store_true')
590-
_ace_argparser.add_argument('--udx', nargs='?', choices=('all'))
597+
_ace_argparser.add_argument('--udx', nargs='?', choices=('all',))
591598
_ace_argparser.add_argument('--yy-rules', action='store_true')
592599
_ace_argparser.add_argument('--max-words', type=int)
593600

delphin/interfaces/base.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def tokens(self, tokenset='internal'):
176176

177177
class FieldMapper(object):
178178
"""
179-
A class for mapping respsonses to [incr tsdb()] fields.
179+
A class for mapping responses to [incr tsdb()] fields.
180180
181181
This class provides two methods for mapping responses to fields:
182182
@@ -186,8 +186,19 @@ class FieldMapper(object):
186186
* cleanup() - returns any (table, data) tuples resulting from
187187
aggregated data over all runs, then clears this data
188188
189+
In addition, the :attr:`affected_tables` attribute should list
190+
the names of tables that become invalidated by using this
191+
FieldMapper to process a profile. Generally this is the list of
192+
tables that :meth:`map` and :meth:`cleanup` create records for,
193+
but it may also include those that rely on the previous set
194+
(e.g., treebanking preferences, etc.).
195+
189196
Alternative [incr tsdb()] schema can be handled by overriding
190197
these two methods and the __init__() method.
198+
199+
Attributes:
200+
affected_tables: list of tables that are affected by the
201+
processing
191202
"""
192203
def __init__(self):
193204
# the parse keys exclude some that are handled specially
@@ -211,6 +222,11 @@ def __init__(self):
211222
self._runs = {}
212223
self._last_run_id = -1
213224

225+
self.affected_tables = '''
226+
run parse result rule output edge tree decision preference
227+
update fold score
228+
'''.split()
229+
214230
def map(self, response):
215231
"""
216232
Process *response* and return a list of (table, rowdata) tuples.

delphin/interfaces/rest.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -122,8 +122,7 @@ def parse(self, sentence, params=None, headers=None):
122122
A ParseResponse containing the results, if the request was
123123
successful.
124124
Raises:
125-
A requests.HTTPError is raised if the status code was not
126-
200.
125+
requests.HTTPError: if the status code was not 200
127126
"""
128127
if params is None:
129128
params = {}
@@ -163,7 +162,7 @@ def parse(input, server=default_erg_server, params=None, headers=None):
163162
A ParseResponse containing the results, if the request was
164163
successful.
165164
Raises:
166-
A requests.HTTPError is raised if the status code was not 200.
165+
requests.HTTPError: if the status code was not 200
167166
"""
168167
return next(parse_from_iterable([input], server, params, headers), None)
169168

@@ -184,8 +183,8 @@ def parse_from_iterable(
184183
Yields:
185184
ParseResponse objects for each successful response.
186185
Raises:
187-
A requests.HTTPError is raised for the first response with
188-
a status code that is not 200.
186+
requests.HTTPError: for the first response with a status code
187+
that is not 200
189188
"""
190189
client = DelphinRestClient(server)
191190
for input in inputs:

0 commit comments

Comments
 (0)