Skip to content

Commit 269d0fc

Browse files
[Fixes #13381] Allowing remove WMS dataset to be automatically connected to existing harvesters
1 parent 8771c80 commit 269d0fc

File tree

6 files changed

+257
-17
lines changed

6 files changed

+257
-17
lines changed

geonode/harvesting/tests/test_utils.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,33 @@
1616
# along with this program. If not, see <http://www.gnu.org/licenses/>.
1717
#
1818
#########################################################################
19+
from django.conf import settings
1920
from lxml import etree
21+
from django.contrib.auth import get_user_model
22+
from django.utils import timezone
2023

21-
from geonode.tests.base import GeoNodeBaseSimpleTestSupport
24+
from geonode.base.populate_test_data import create_single_dataset
25+
from geonode.harvesting.models import HarvestableResource, Harvester
26+
from geonode.tests.base import GeoNodeBaseTestSupport
27+
from geonode.upload.handlers.remote.wms import create_harvestable_resource
2228
from geonode.utils import get_xpath_value
2329

2430

25-
class UtilsTestCase(GeoNodeBaseSimpleTestSupport):
31+
class UtilsTestCase(GeoNodeBaseTestSupport):
32+
33+
@classmethod
34+
def setUpClass(cls):
35+
super().setUpClass()
36+
cls.service_url = f"{settings.GEOSERVER_LOCATION}ows"
37+
cls.user = get_user_model().objects.get(username="admin")
38+
cls.harvester, _ = Harvester.objects.get_or_create(
39+
remote_url=cls.service_url,
40+
name="harvester1",
41+
default_owner=cls.user,
42+
harvester_type="geonode.harvesting.harvesters.wms.OgcWmsHarvester",
43+
)
44+
cls.dataset = create_single_dataset(name="example_harvestable_resource")
45+
2646
def test_get_xpath_value(self):
2747
fixtures = [
2848
(
@@ -48,3 +68,87 @@ def test_get_xpath_value(self):
4868
xml_el = etree.fromstring(element)
4969
result = get_xpath_value(xml_el, xpath_expr, nsmap=nsmap)
5070
self.assertEqual(result, expected)
71+
72+
def test_create_harvestable_resource(self):
73+
"""
74+
Given a geonode resource and a service url, should link the
75+
harvestable resource with the harvester
76+
"""
77+
78+
# be sure that the dataset was not linked to the harvester
79+
try:
80+
self.assertFalse(HarvestableResource.objects.filter(geonode_resource=self.dataset).exists())
81+
82+
result = create_harvestable_resource(self.dataset, self.service_url)
83+
# nothing is usually returned
84+
self.assertIsNone(result)
85+
86+
# evaluating that the harvestable resource has been created
87+
hresource = HarvestableResource.objects.filter(geonode_resource=self.dataset).first()
88+
self.assertIsNotNone(hresource)
89+
self.assertEqual(hresource.geonode_resource.pk, self.dataset.pk)
90+
finally:
91+
HarvestableResource.objects.filter(geonode_resource=self.dataset).delete()
92+
93+
def test_create_harvestable_resource_different_service_url(self):
94+
"""
95+
Should ignore if the service url provided does not exists in the DB
96+
"""
97+
with self.assertLogs(level="WARNING") as _log:
98+
create_harvestable_resource(self.dataset, "http://someurl.com")
99+
self.assertIn("The WMS layer does not belong to any known remote service", [x.message for x in _log.records])
100+
101+
def test_create_harvestable_resource_on_existing_harvestable_resource(self):
102+
"""
103+
If the harvestable resource already exists, it should link the newly created geonode resource
104+
with the harvestable resource, so the harvester will not harvest it again
105+
"""
106+
try:
107+
self.__create_harvestable_resource()
108+
109+
result = create_harvestable_resource(self.dataset, self.service_url)
110+
# nothing is usually returned
111+
self.assertIsNone(result)
112+
113+
# evaluating that the harvestable resource has been created
114+
hresource = HarvestableResource.objects.filter(geonode_resource=self.dataset).first()
115+
self.assertIsNotNone(hresource)
116+
self.assertEqual(hresource.geonode_resource.pk, self.dataset.pk)
117+
finally:
118+
HarvestableResource.objects.filter(geonode_resource=self.dataset).delete()
119+
120+
def test_create_harvestable_resource_different_geonode_resource(self):
121+
"""
122+
If the harvestable resource already have geonode resource aligned, it should
123+
just ignore and log the information
124+
"""
125+
try:
126+
self.__create_harvestable_resource(attach_resource=True)
127+
dataset2 = create_single_dataset("second dataset")
128+
dataset2.alternate = self.dataset.alternate
129+
dataset2.save()
130+
with self.assertLogs(level="WARNING") as _log:
131+
create_harvestable_resource(dataset2, self.service_url)
132+
self.assertIn(
133+
"The Resource assigned to the current HarvestableResource is different from the one provided, skipping...",
134+
[x.message for x in _log.records],
135+
)
136+
137+
finally:
138+
HarvestableResource.objects.filter(geonode_resource=self.dataset).delete()
139+
140+
def __create_harvestable_resource(self, attach_resource=False):
141+
HarvestableResource.objects.get_or_create(
142+
harvester=self.harvester,
143+
unique_identifier=self.dataset.alternate,
144+
geonode_resource=None if not attach_resource else self.dataset,
145+
title="Some title",
146+
defaults={
147+
"should_be_harvested": True,
148+
"remote_resource_type": self.dataset.resource_type,
149+
"last_updated": timezone.now(),
150+
"last_refreshed": timezone.now(),
151+
"last_harvested": timezone.now(),
152+
"last_harvesting_succeeded": True,
153+
},
154+
)

geonode/harvesting/utils.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#########################################################################
2+
#
3+
# Copyright (C) 2025 OSGeo
4+
#
5+
# This program is free software: you can redistribute it and/or modify
6+
# it under the terms of the GNU General Public License as published by
7+
# the Free Software Foundation, either version 3 of the License, or
8+
# (at your option) any later version.
9+
#
10+
# This program is distributed in the hope that it will be useful,
11+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
12+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13+
# GNU General Public License for more details.
14+
#
15+
# You should have received a copy of the GNU General Public License
16+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
17+
#
18+
#########################################################################
19+
import logging
20+
from django.utils import timezone
21+
from geonode.harvesting.models import HarvestableResource, Harvester
22+
23+
24+
logger = logging.getLogger(__name__)
25+
26+
27+
def create_harvestable_resource(geonode_resource, service_url):
28+
"""
29+
Will generate a Harvestable resource, if the service_url is passed
30+
it tries to connect it with an existing harvester
31+
"""
32+
harvester = Harvester.objects.filter(remote_url=service_url).first()
33+
if not harvester:
34+
logger.warning("The WMS layer does not belong to any known remote service")
35+
return
36+
37+
if hresource := HarvestableResource.objects.filter(unique_identifier=geonode_resource.alternate).first():
38+
logger.info("Harvestable resource already exists, linking geonode resource...")
39+
# if exists, we need to be sure that the resource from geonode is getting connected
40+
if not hresource.geonode_resource:
41+
hresource.geonode_resource = geonode_resource
42+
hresource.should_be_harvested = True
43+
hresource.save()
44+
elif hresource.geonode_resource.pk != geonode_resource.pk:
45+
logger.warning(
46+
"The Resource assigned to the current HarvestableResource is different from the one provided, skipping..."
47+
)
48+
return
49+
return
50+
51+
timestamp = timezone.now()
52+
_, created = HarvestableResource.objects.get_or_create(
53+
harvester=harvester,
54+
unique_identifier=geonode_resource.alternate,
55+
geonode_resource=geonode_resource,
56+
title=geonode_resource.title,
57+
defaults={
58+
"should_be_harvested": True,
59+
"remote_resource_type": geonode_resource.resource_type,
60+
"last_updated": timestamp,
61+
"last_refreshed": timestamp,
62+
"last_harvested": timestamp,
63+
"last_harvesting_succeeded": True,
64+
},
65+
)

geonode/upload/handlers/remote/serializers/wms.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ class Meta:
2525
model = RemoteResourceSerializer.Meta.model
2626
ref_name = "RemoteWMSSerializer"
2727
fields = RemoteResourceSerializer.Meta.fields + (
28-
"lookup",
28+
"identifier",
2929
"bbox",
3030
"parse_remote_metadata",
3131
)
3232

33-
lookup = serializers.CharField(required=True)
33+
identifier = serializers.CharField(required=True)
3434
bbox = serializers.ListField(required=False)
3535
parse_remote_metadata = serializers.BooleanField(required=False, default=False)

geonode/upload/handlers/remote/tests/test_wms.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,15 @@ def setUpClass(cls):
4141
"url": "http://invalid.com",
4242
"type": "invalid",
4343
"title": "This will fail",
44-
"lookup": "abc124",
44+
"identifier": "abc124",
4545
"bbox": ["1", "2", "3", "4"],
4646
"parse_remote_metadata": False,
4747
}
4848
cls.valid_payload_with_parse_false = {
4949
"url": cls.valid_url,
5050
"type": "wms",
5151
"title": "This will fail",
52-
"lookup": "abc124",
52+
"identifier": "abc124",
5353
"bbox": ["1", "2", "3", "4"],
5454
"parse_remote_metadata": False,
5555
}
@@ -58,7 +58,7 @@ def setUpClass(cls):
5858
"url": cls.valid_url,
5959
"type": "wms",
6060
"title": "This will fail",
61-
"lookup": "abc124",
61+
"identifier": "abc124",
6262
"bbox": ["1", "2", "3", "4"],
6363
"parse_remote_metadata": True,
6464
}
@@ -123,7 +123,7 @@ def test_extract_params_from_data(self):
123123
self.assertTrue("title" in actual)
124124
self.assertTrue("url" in actual)
125125
self.assertTrue("type" in actual)
126-
self.assertTrue("lookup" in actual)
126+
self.assertTrue("identifier" in actual)
127127
self.assertTrue("parse_remote_metadata" in actual)
128128
self.assertTrue("bbox" in actual)
129129

geonode/upload/handlers/remote/wms.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#########################################################################
1919
import logging
2020
from django.conf import settings
21+
from geonode.harvesting.utils import create_harvestable_resource
2122
from geonode.layers.models import Dataset
2223
from geonode.upload.handlers.common.remote import BaseRemoteResourceHandler
2324
from geonode.services import enumerations
@@ -61,7 +62,7 @@ def extract_params_from_data(_data, action=None):
6162
"""
6263
payload, original_data = BaseRemoteResourceHandler.extract_params_from_data(_data, action=action)
6364
if action != exa.COPY.value:
64-
payload["lookup"] = original_data.pop("lookup", None)
65+
payload["identifier"] = original_data.pop("identifier", None)
6566
payload["bbox"] = original_data.pop("bbox", None)
6667
payload["parse_remote_metadata"] = original_data.pop("parse_remote_metadata", None)
6768

@@ -80,7 +81,7 @@ def prepare_import(self, files, execution_id, **kwargs):
8081
to_update = {
8182
"ows_url": ows_url,
8283
"parsed_url": parsed_url,
83-
"remote_resource_id": _exec.input_params.get("lookup", None),
84+
"remote_resource_id": _exec.input_params.get("identifier", None),
8485
}
8586
if _exec.input_params.get("parse_remote_metadata", False):
8687
try:
@@ -100,7 +101,7 @@ def prepare_import(self, files, execution_id, **kwargs):
100101

101102
def get_wms_resource(self, _exec):
102103
_, wms = WebMapService(_exec.input_params.get("url"))
103-
wms_resource = wms[_exec.input_params.get("lookup")]
104+
wms_resource = wms[_exec.input_params.get("identifier")]
104105
return wms_resource
105106

106107
def generate_alternate(
@@ -114,7 +115,7 @@ def generate_alternate(
114115
):
115116
"""
116117
For WMS we dont want to generate an alternate, otherwise we cannot use
117-
the alternate to lookup the layer in the remote service
118+
the alternate to identifier the layer in the remote service
118119
"""
119120
return layer_name, payload_alternate
120121

@@ -136,6 +137,12 @@ def create_geonode_resource(
136137
if remote_bbox:
137138
resource.set_bbox_polygon(remote_bbox, "EPSG:4326")
138139
resource_manager.set_thumbnail(None, instance=resource)
140+
141+
harvester_url = _exec.input_params.get("parsed_url", None)
142+
if harvester_url:
143+
# call utils to connect harvester and resource
144+
create_harvestable_resource(resource, service_url=harvester_url)
145+
139146
return resource
140147

141148
def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs):

0 commit comments

Comments
 (0)