Skip to content

Commit 02f3d8d

Browse files
committed
update repository abstraction
1 parent 6ec7814 commit 02f3d8d

File tree

26 files changed

+954
-247
lines changed

26 files changed

+954
-247
lines changed

docker/entrypoint.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def launch_pycsw(pycsw_config, workers=2, reload=False):
8080
except Exception as err:
8181
LOGGER.debug(err)
8282

83-
repo = Repository(database, StaticContext(), table=table)
83+
repo = Repository(database, StaticContext())
8484

8585
repo.ping()
8686

docs/repofilters.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,9 @@ The same CSW `GetRecords` filter as per above then yields the following results:
5858

5959
Another example:
6060

61-
.. code-block:: text
61+
.. code-block:: yaml
6262
63-
repository:0
63+
repository:
6464
database: sqlite:///records.db
6565
filter: "pycsw:ParentIdentifier != '33'"
6666

pycsw/core/admin.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,11 @@
5252
def load_records(context, database, table, xml_dirpath, recursive=False, force_update=False):
5353
"""Load metadata records from directory of files to database"""
5454

55-
repo = repository.Repository(database, context, table=table)
55+
repo_config = {
56+
'database': database,
57+
'table': table
58+
}
59+
repo = repository.Repository(repo_config, context)
5660

5761
file_list = []
5862

@@ -121,7 +125,13 @@ def load_records(context, database, table, xml_dirpath, recursive=False, force_u
121125

122126
def export_records(context, database, table, xml_dirpath):
123127
"""Export metadata records from database to directory of files"""
124-
repo = repository.Repository(database, context, table=table)
128+
129+
repo_config = {
130+
'database': database,
131+
'table': table
132+
}
133+
134+
repo = repository.Repository(repo_config, context)
125135

126136
LOGGER.info('Querying database %s, table %s ....', database, table)
127137
records = repo.session.query(repo.dataset)
@@ -190,8 +200,13 @@ def refresh_harvested_records(context, database, table, url):
190200
"""refresh / harvest all non-local records in repository"""
191201
from owslib.csw import CatalogueServiceWeb
192202

203+
repo_config = {
204+
'database': database,
205+
'table': table
206+
}
207+
193208
# get configuration and init repo connection
194-
repos = repository.Repository(database, context, table=table)
209+
repos = repository.Repository(repo_config, context)
195210

196211
# get all harvested records
197212
count, records = repos.query(constraint={'where': "mdsource != 'local'", 'values': []})
@@ -228,8 +243,13 @@ def refresh_harvested_records(context, database, table, url):
228243
def gen_sitemap(context, database, table, url, output_file):
229244
"""generate an XML sitemap from all records in repository"""
230245

246+
repo_config = {
247+
'database': database,
248+
'table': table
249+
}
250+
231251
# get configuration and init repo connection
232-
repos = repository.Repository(database, context, table=table)
252+
repos = repository.Repository(repo_config, context)
233253

234254
# write out sitemap document
235255
urlset = etree.Element(util.nspath_eval('sitemap:urlset',
@@ -274,7 +294,7 @@ def post_xml(url, xml, timeout=30):
274294
from owslib.util import http_post
275295
try:
276296
with open(xml) as f:
277-
return http_post(url=url, request=f.read(), timeout=timeout)
297+
return http_post(url=url, request=f.read(), timeout=timeout).text
278298
except Exception as err:
279299
LOGGER.exception('HTTP XML POST error')
280300
raise RuntimeError(err) from err
@@ -351,7 +371,12 @@ def delete_records(context, database, table):
351371

352372
LOGGER.info('Deleting all records')
353373

354-
repo = repository.Repository(database, context, table=table)
374+
repo_config = {
375+
'database', database,
376+
'table', table
377+
}
378+
379+
repo = repository.Repository(repo_config, context)
355380
repo.delete(constraint={'where': '', 'values': []})
356381

357382

@@ -493,7 +518,7 @@ def cli_rebuild_db_indexes(ctx, config, verbosity):
493518

494519
context = pconfig.StaticContext()
495520

496-
repo = repository.Repository(cfg['repository']['database'], context, table=cfg['repository'].get('table'))
521+
repo = repository.Repository(cfg['repository'], context)
497522
repo.rebuild_db_indexes()
498523

499524

@@ -509,7 +534,7 @@ def cli_optimize_db(ctx, config, verbosity):
509534

510535
context = pconfig.StaticContext()
511536

512-
repo = repository.Repository(cfg['repository']['database'], context, table=cfg['repository'].get('table'))
537+
repo = repository.Repository(cfg['repository'], context)
513538
repo.optimize_db()
514539

515540

pycsw/core/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
LOGGER = logging.getLogger(__name__)
3636

3737

38-
class StaticContext(object):
38+
class StaticContext:
3939
"""core configuration"""
4040
def __init__(self, prefix='csw30'):
4141
"""initializer"""

pycsw/core/pygeofilter_evaluate.py renamed to pycsw/core/pygeofilter_ext.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Authors: Tom Kralidis <[email protected]>
44
#
5-
# Copyright (c) 2021 Tom Kralidis
5+
# Copyright (c) 2025 Tom Kralidis
66
#
77
# Permission is hereby granted, free of charge, to any person
88
# obtaining a copy of this software and associated documentation
@@ -35,6 +35,9 @@
3535
from pygeofilter.backends.evaluator import handle
3636
from pygeofilter.backends.sqlalchemy import filters
3737
from pygeofilter.backends.sqlalchemy.evaluate import SQLAlchemyFilterEvaluator
38+
from pygeofilter.parsers.fes.util import Element
39+
from pygeofilter.parsers.fes.util import handle as fhandle
40+
from pygeofilter.parsers.fes.v11 import FES11Parser
3841

3942
from pycsw.core.util import bbox2wktpolygon
4043

@@ -81,3 +84,16 @@ def ilike(self, node, lhs):
8184

8285
def to_filter(ast, dbtype, field_mapping=None):
8386
return PycswFilterEvaluator(field_mapping, dbtype).evaluate(ast)
87+
88+
class PycswCSWFES11Parser(FES11Parser):
89+
# def __init__(self):
90+
# super().__init__()
91+
92+
@fhandle('BBOX')
93+
def geometry_bbox(self, node: Element, lhs, rhs, crs=None):
94+
minx = rhs.geometry['coordinates'][0][0][1]
95+
miny = rhs.geometry['coordinates'][0][0][0]
96+
maxx = rhs.geometry['coordinates'][0][2][1]
97+
maxy = rhs.geometry['coordinates'][0][2][0]
98+
99+
return ast.BBox(lhs, minx, miny, maxx, maxy, crs)

pycsw/core/repository.py

Lines changed: 82 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# Angelos Tzotsos <[email protected]>
66
# Ricardo Garcia Silva <[email protected]>
77
#
8-
# Copyright (c) 2024 Tom Kralidis
8+
# Copyright (c) 2025 Tom Kralidis
99
# Copyright (c) 2015 Angelos Tzotsos
1010
# Copyright (c) 2017 Ricardo Garcia Silva
1111
#
@@ -34,6 +34,7 @@
3434

3535
import inspect
3636
import logging
37+
from operator import itemgetter
3738
import os
3839
from time import sleep
3940

@@ -49,11 +50,12 @@
4950
from pycsw.core import util
5051
from pycsw.core.etree import etree
5152
from pycsw.core.etree import PARSER
53+
from pycsw.core.pygeofilter_ext import to_filter
5254

5355
LOGGER = logging.getLogger(__name__)
5456

5557

56-
class Repository(object):
58+
class Repository:
5759
_engines = {}
5860

5961
@classmethod
@@ -87,14 +89,15 @@ def connect(dbapi_connection, connection_rec):
8789
return clazz._engines[url]
8890

8991
''' Class to interact with underlying repository '''
90-
def __init__(self, database, context, app_root=None, table='records', repo_filter=None):
92+
def __init__(self, repo_object, context, app_root=None):
9193
''' Initialize repository '''
9294

9395
self.context = context
94-
self.filter = repo_filter
96+
self.filter = repo_object.get('filter')
9597
self.fts = False
96-
self.database = database
97-
self.table = table
98+
self.database = repo_object.get('database')
99+
self.table = repo_object.get('table')
100+
self.facets = repo_object.get('facets', [])
98101

99102
# Don't use relative paths, this is hack to get around
100103
# most wsgi restriction...
@@ -110,7 +113,7 @@ def __init__(self, database, context, app_root=None, table='records', repo_filte
110113

111114
self.postgis_geometry_column = None
112115

113-
schema_name, table_name = table.rpartition(".")[::2]
116+
schema_name, table_name = self.table.rpartition(".")[::2]
114117

115118
default_table_args = {
116119
"autoload": True,
@@ -145,6 +148,7 @@ def __init__(self, database, context, app_root=None, table='records', repo_filte
145148
temp_dbtype = None
146149

147150
self.query_mappings = {
151+
# OGC API - Records mappings
148152
'identifier': self.dataset.identifier,
149153
'type': self.dataset.type,
150154
'typename': self.dataset.typename,
@@ -167,6 +171,10 @@ def __init__(self, database, context, app_root=None, table='records', repo_filte
167171
'off_nadir': self.dataset.illuminationelevationangle
168172
}
169173

174+
LOGGER.debug('adding OGC CSW mappings')
175+
for key, value in self.context.models['csw']['typenames']['csw:Record']['queryables']['SupportedDublinCoreQueryables'].items():
176+
self.query_mappings[key] = util.getqattr(self.dataset, value['dbcol'])
177+
170178
if self.dbtype == 'postgresql':
171179
# check if PostgreSQL is enabled with PostGIS 1.x
172180
try:
@@ -410,18 +418,34 @@ def query_source(self, source):
410418
query = self.session.query(self.dataset).filter(column == source)
411419
return self._get_repo_filter(query).all()
412420

413-
def query(self, constraint, sortby=None, typenames=None,
421+
def query(self, constraint=None, sortby=None, typenames=None,
414422
maxrecords=10, startposition=0):
415423
''' Query records from underlying repository '''
416424

417-
# run the raw query and get total
418-
if 'where' in constraint: # GetRecords with constraint
419-
LOGGER.debug('constraint detected')
420-
query = self.session.query(self.dataset).filter(
421-
text(constraint['where'])).params(self._create_values(constraint['values']))
422-
else: # GetRecords sans constraint
423-
LOGGER.debug('No constraint detected')
424-
query = self.session.query(self.dataset)
425+
if constraint.get('ast') is not None: # GetRecords with pygeofilter AST
426+
LOGGER.debug('pygeofilter AST detected')
427+
LOGGER.debug('Transforming AST into filters')
428+
try:
429+
filters = to_filter(constraint['ast'], self.dbtype, self.query_mappings)
430+
LOGGER.debug(f'Filter: {filters}')
431+
except Exception as err:
432+
msg = f'AST evaluator error: {str(err)}'
433+
LOGGER.exception(msg)
434+
raise RuntimeError(msg)
435+
436+
query = self.session.query(self.dataset).filter(filters)
437+
438+
else: # GetRecords sans pygeofilter AST
439+
LOGGER.debug('No pygeofilter AST detected')
440+
441+
# run the raw query and get total
442+
if 'where' in constraint: # GetRecords with constraint
443+
LOGGER.debug('constraint detected')
444+
query = self.session.query(self.dataset).filter(
445+
text(constraint['where'])).params(self._create_values(constraint['values']))
446+
else: # GetRecords sans constraint
447+
LOGGER.debug('No constraint detected')
448+
query = self.session.query(self.dataset)
425449

426450
total = self._get_repo_filter(query).count()
427451

@@ -452,9 +476,50 @@ def query(self, constraint, sortby=None, typenames=None,
452476
query = query.order_by(sortby_column)
453477

454478
# always apply limit and offset
455-
return [str(total), self._get_repo_filter(query).limit(
479+
return [total, self._get_repo_filter(query).limit(
456480
maxrecords).offset(startposition).all()]
457481

482+
def get_facets(self, ast=None) -> dict:
483+
"""
484+
Gets all facets for a given query
485+
486+
:returns: `dict` of facets
487+
"""
488+
489+
facets_results = {}
490+
491+
for facet in self.facets:
492+
LOGGER.debug(f'Running facet for {facet}')
493+
facetq = self.session.query(self.query_mappings[facet], self.func.count(facet)).group_by(facet)
494+
495+
if ast is not None:
496+
try:
497+
filters = to_filter(ast, self.dbtype, self.query_mappings)
498+
LOGGER.debug(f'Filter: {filters}')
499+
except Exception as err:
500+
msg = f'AST evaluator error: {str(err)}'
501+
LOGGER.exception(msg)
502+
raise RuntimeError(msg)
503+
504+
facetq = facetq.filter(filters)
505+
506+
LOGGER.debug('Writing facet query results')
507+
facets_results[facet] = {
508+
'type': 'terms',
509+
'property': facet,
510+
'buckets': []
511+
}
512+
513+
for fq in facetq.all():
514+
facets_results[facet]['buckets'].append({
515+
'value': fq[0],
516+
'count': fq[1]
517+
})
518+
519+
facets_results[facet]['buckets'].sort(key=itemgetter('count'), reverse=True)
520+
521+
return facets_results
522+
458523
def insert(self, record, source, insert_date):
459524
''' Insert a record into the repository '''
460525

pycsw/core/util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ def transform_mappings(queryables, typename):
269269
def getqattr(obj, name):
270270
"""Get value of an object, safely"""
271271
result = None
272+
item = None
272273
try:
273274
item = getattr(obj, name)
274275
value = item()

pycsw/oaipmh.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535

3636
LOGGER = logging.getLogger(__name__)
3737

38-
class OAIPMH(object):
38+
class OAIPMH:
3939
"""OAI-PMH wrapper class"""
4040
def __init__(self, context, config):
4141
LOGGER.debug('Initializing OAI-PMH constants')

0 commit comments

Comments
 (0)