Skip to content

Commit abc78a7

Browse files
committed
Initial version with basic match functionality
1 parent ed074f8 commit abc78a7

File tree

9 files changed

+1209
-2
lines changed

9 files changed

+1209
-2
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ __pycache__/
88
# Distribution / packaging
99
.Python
1010
env/
11+
.virtualenv/
1112
build/
1213
develop-eggs/
1314
dist/

README.md

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,69 @@
1-
# reference-server
2-
A simple illustrative reference server for the Matchmaker Exchange API
1+
# Matchmaker Exchange Reference Server
2+
A simple illustrative reference server for the Matchmaker Exchange API.
3+
4+
The server is backed by elasticsearch, and creates local indexes of the Human Phenotype Ontology, Ensembl-Entrez-HGNC gene symbol mappings, and the MME API benchmark set of 50 rare disease patients.
5+
6+
## Dependencies
7+
- Python 3.X (not yet tested on 2.7 but should be easy to get working)
8+
- elasticsearch 2.X
9+
10+
11+
## Quickstart
12+
13+
1. Start up a local elasticsearch cluster, for example:
14+
15+
```bash
16+
$ wget https://download.elasticsearch.org/elasticsearch/release/org/elasticsearch/distribution/tar/elasticsearch/2.1.1/elasticsearch-2.1.1.tar.gz
17+
$ tar -xzf elasticsearch-2.1.1.tar.gz
18+
$ cd elasticsearch-2.1.1/
19+
$ ./bin/elasticsearch
20+
```
21+
22+
1. Set up your Python virtual environment and install necessary Python packages, for example:
23+
24+
```bash
25+
$ virtualenv -p python3 --prompt="(mme-server)" .virtualenv
26+
$ source .virtualenv/bin/activate
27+
$ pip install -r requirements.txt
28+
```
29+
30+
1. Download and index vocabularies and sample data:
31+
32+
```bash
33+
$ python datastore.py
34+
```
35+
36+
1. Run tests:
37+
38+
```bash
39+
$ python test.py
40+
```
41+
42+
1. Start up MME reference server:
43+
44+
```bash
45+
$ python server.py
46+
```
47+
48+
By default, the server listens globally (`--host 0.0.0.0`) on port 8000 (`--port 8000`).
49+
50+
1. Try it out:
51+
52+
```bash
53+
$ curl -XPOST -d '{"patient":{ \
54+
"id":"1", \
55+
"contact": {"name":"Jane Doe", "href":"mailto:[email protected]"}, \
56+
"features":[{"id":"HP:0000522"}], \
57+
"genomicFeatures":[{"gene":{"id":"NGLY1"}}] \
58+
}}' localhost:8000/match
59+
```
60+
61+
62+
## TODO
63+
- Avoid costly/redundant parsing `api.Patient` objects when generating MatchResponse objects from patients in database
64+
- Inspect `Accepts` header for API versioning
65+
- Add `Content-Type` header to responses
66+
- Handle errors with proper HTTP statuses and JSON message bodies
67+
- Add tests for gene index
68+
- Add end-to-end API query tests
69+
- Add parser tests

api.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
"""
2+
The API module:
3+
4+
Contains API methods and classes for API objects.
5+
Handles parsing of API requests into API objects, and serializing API objects into API responses.
6+
7+
Also contains some code to help convert API objects to their database representations.
8+
"""
9+
from __future__ import with_statement, division, unicode_literals
10+
11+
import json
12+
13+
from datastore import DatastoreConnection
14+
15+
16+
class Feature:
17+
# Connection to backend to validate vocabulary terms
18+
db = DatastoreConnection()
19+
20+
def __init__(self, data):
21+
self._observed = data.get('observed', 'yes') == 'yes'
22+
# TODO: parse ageOfOnset
23+
self.term = self.db.get_vocabulary_term(data['id'])
24+
25+
def _get_implied_terms(self):
26+
return self.term['term_category']
27+
28+
def _get_id(self):
29+
return self.term['id']
30+
31+
@property
32+
def observed(self):
33+
return self._observed
34+
35+
36+
class GenomicFeature:
37+
# Connection to backend to validate vocabulary terms
38+
db = DatastoreConnection()
39+
40+
def __init__(self, data):
41+
self.term = None
42+
gene_id = data.get('gene', {}).get('id')
43+
# TODO: parse additional genomicFeature fields
44+
if gene_id:
45+
self.term = self.db.get_vocabulary_term(gene_id)
46+
47+
def _get_gene_id(self):
48+
if self.term:
49+
return self.term['id']
50+
51+
52+
class Patient:
53+
def __init__(self, data):
54+
self.id = data['id']
55+
self.contact = data['contact']
56+
assert self.contact['name'] and self.contact['href']
57+
58+
features_json = data.get('features', [])
59+
genomic_features_json = data.get('genomicFeatures', [])
60+
61+
assert features_json or genomic_features_json, "At least one of 'features' or 'genomicFeatures' must be provided"
62+
63+
# Parse phenotype terms
64+
features = [Feature(feature_json) for feature_json in features_json]
65+
66+
# Parse genomic features
67+
genomic_features = [GenomicFeature(gf_json) for gf_json in genomic_features_json]
68+
69+
assert features or genomic_features, "Was unable to parse any phenotype or gene terms"
70+
71+
disorders = data.get('disorders', [])
72+
self.label = data.get('label')
73+
self.age_of_onset = data.get('ageOfOnset')
74+
self.features = features
75+
self.genomic_features = genomic_features
76+
self.disorders = disorders
77+
self.test = data.get('test', False)
78+
79+
def _get_genes(self):
80+
genes = set()
81+
for genomic_feature in self.genomic_features:
82+
gene_id = genomic_feature._get_gene_id()
83+
if gene_id:
84+
genes.add(gene_id)
85+
86+
return genes
87+
88+
def _get_present_phenotypes(self):
89+
terms = set()
90+
for feature in self.features:
91+
if feature.observed:
92+
terms.add(feature._get_id())
93+
94+
return terms
95+
96+
def _get_implied_present_phenotypes(self):
97+
terms = set()
98+
for feature in self.features:
99+
if feature.observed:
100+
terms.update(feature._get_implied_terms())
101+
102+
return terms
103+
104+
def to_json(self):
105+
data = {
106+
'id': self.id,
107+
'contact': {
108+
'name': self.contact['name'],
109+
'href': self.contact['href'],
110+
}
111+
}
112+
113+
if self.label:
114+
data['label'] = self.label
115+
116+
if self.age_of_onset:
117+
data['ageOfOnset'] = self.age_of_onset
118+
119+
phenotype_ids = self._get_present_phenotypes()
120+
if phenotype_ids:
121+
data['features'] = [{'id': id} for id in phenotype_ids]
122+
123+
gene_ids = self._get_genes()
124+
if gene_ids:
125+
data['genomicFeatures'] = [{'gene': {'id': gene_id}} for gene_id in gene_ids]
126+
127+
if self.disorders:
128+
data['disorders'] = self.disorders
129+
130+
if self.test:
131+
data['test'] = True
132+
133+
return data
134+
135+
136+
class MatchRequest:
137+
def __init__(self, request):
138+
self.patient = Patient(request['patient'])
139+
self._data = request
140+
141+
142+
class MatchResult:
143+
def __init__(self, match, score):
144+
self.match = match
145+
self.score = score
146+
147+
def to_json(self):
148+
response = {}
149+
response['score'] = {'patient': self.score}
150+
response['patient'] = self.match.to_json()
151+
return response
152+
153+
154+
def match(request, backend=None):
155+
assert isinstance(request, MatchRequest), "Argument to match must be MatchResponse object"
156+
157+
if not backend:
158+
backend = DatastoreConnection()
159+
160+
matches = []
161+
# Unpack patient and query backend
162+
patient = request.patient
163+
for score, patient in backend.find_similar_patients(patient):
164+
match = MatchResult(patient, score)
165+
matches.append(match)
166+
167+
response = MatchResponse(matches)
168+
return response
169+
170+
171+
class MatchResponse:
172+
def __init__(self, response):
173+
self._data = response
174+
175+
def to_json(self):
176+
response = {}
177+
response['results'] = [match.to_json() for match in self._data]
178+
return response

0 commit comments

Comments
 (0)