Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 106 additions & 2 deletions geonode/harvesting/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,33 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################
from django.conf import settings
from lxml import etree
from django.contrib.auth import get_user_model
from django.utils import timezone

from geonode.tests.base import GeoNodeBaseSimpleTestSupport
from geonode.base.populate_test_data import create_single_dataset
from geonode.harvesting.models import HarvestableResource, Harvester
from geonode.tests.base import GeoNodeBaseTestSupport
from geonode.upload.handlers.remote.wms import create_harvestable_resource
from geonode.utils import get_xpath_value


class UtilsTestCase(GeoNodeBaseSimpleTestSupport):
class UtilsTestCase(GeoNodeBaseTestSupport):

@classmethod
def setUpClass(cls):
super().setUpClass()
cls.service_url = f"{settings.GEOSERVER_LOCATION}ows"
cls.user = get_user_model().objects.get(username="admin")
cls.harvester, _ = Harvester.objects.get_or_create(
remote_url=cls.service_url,
name="harvester1",
default_owner=cls.user,
harvester_type="geonode.harvesting.harvesters.wms.OgcWmsHarvester",
)
cls.dataset = create_single_dataset(name="example_harvestable_resource")

def test_get_xpath_value(self):
fixtures = [
(
Expand All @@ -48,3 +68,87 @@ def test_get_xpath_value(self):
xml_el = etree.fromstring(element)
result = get_xpath_value(xml_el, xpath_expr, nsmap=nsmap)
self.assertEqual(result, expected)

def test_create_harvestable_resource(self):
"""
Given a geonode resource and a service url, should link the
harvestable resource with the harvester
"""

# be sure that the dataset was not linked to the harvester
try:
self.assertFalse(HarvestableResource.objects.filter(geonode_resource=self.dataset).exists())

result = create_harvestable_resource(self.dataset, self.service_url)
# nothing is usually returned
self.assertIsNone(result)

# evaluating that the harvestable resource has been created
hresource = HarvestableResource.objects.filter(geonode_resource=self.dataset).first()
self.assertIsNotNone(hresource)
self.assertEqual(hresource.geonode_resource.pk, self.dataset.pk)
finally:
HarvestableResource.objects.filter(geonode_resource=self.dataset).delete()

def test_create_harvestable_resource_different_service_url(self):
"""
Should ignore if the service url provided does not exists in the DB
"""
with self.assertLogs(level="WARNING") as _log:
create_harvestable_resource(self.dataset, "http://someurl.com")
self.assertIn("The WMS layer does not belong to any known remote service", [x.message for x in _log.records])

def test_create_harvestable_resource_on_existing_harvestable_resource(self):
"""
If the harvestable resource already exists, it should link the newly created geonode resource
with the harvestable resource, so the harvester will not harvest it again
"""
try:
self.__create_harvestable_resource()

result = create_harvestable_resource(self.dataset, self.service_url)
# nothing is usually returned
self.assertIsNone(result)

# evaluating that the harvestable resource has been created
hresource = HarvestableResource.objects.filter(geonode_resource=self.dataset).first()
self.assertIsNotNone(hresource)
self.assertEqual(hresource.geonode_resource.pk, self.dataset.pk)
finally:
HarvestableResource.objects.filter(geonode_resource=self.dataset).delete()

def test_create_harvestable_resource_different_geonode_resource(self):
"""
If the harvestable resource already have geonode resource aligned, it should
just ignore and log the information
"""
try:
self.__create_harvestable_resource(attach_resource=True)
dataset2 = create_single_dataset("second dataset")
dataset2.alternate = self.dataset.alternate
dataset2.save()
with self.assertLogs(level="WARNING") as _log:
create_harvestable_resource(dataset2, self.service_url)
self.assertIn(
"The Resource assigned to the current HarvestableResource is different from the one provided, skipping...",
[x.message for x in _log.records],
)

finally:
HarvestableResource.objects.filter(geonode_resource=self.dataset).delete()

def __create_harvestable_resource(self, attach_resource=False):
HarvestableResource.objects.get_or_create(
harvester=self.harvester,
unique_identifier=self.dataset.alternate,
geonode_resource=None if not attach_resource else self.dataset,
title="Some title",
defaults={
"should_be_harvested": True,
"remote_resource_type": self.dataset.resource_type,
"last_updated": timezone.now(),
"last_refreshed": timezone.now(),
"last_harvested": timezone.now(),
"last_harvesting_succeeded": True,
},
)
64 changes: 64 additions & 0 deletions geonode/harvesting/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#########################################################################
#
# Copyright (C) 2025 OSGeo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
#########################################################################
import logging
from django.utils import timezone
from geonode.harvesting.models import HarvestableResource, Harvester


logger = logging.getLogger(__name__)


def create_harvestable_resource(geonode_resource, service_url):
"""
Will generate a Harvestable resource, if the service_url is passed
it tries to connect it with an existing harvester
"""
harvester = Harvester.objects.filter(remote_url=service_url).first()
if not harvester:
logger.warning("The WMS layer does not belong to any known remote service")
return

if hresource := HarvestableResource.objects.filter(unique_identifier=geonode_resource.alternate).first():
logger.info("Harvestable resource already exists, linking geonode resource...")
# if exists, we need to be sure that the resource from geonode is getting connected
if not hresource.geonode_resource:
hresource.geonode_resource = geonode_resource
hresource.should_be_harvested = True
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mattiagiupponi in this line: hresource.should_be_harvested = True I think we should check if the harvester is sheduled or not (if <Harvester>.scheduling_enabled = True) Because otherwise, I think that we should keep the hresource.should_be_harvested as False. should_be_harvested is used only by a shceduled harvester in order to be aware that in the next run, this resource will be harvested.

hresource.save()
elif hresource.geonode_resource.pk != geonode_resource.pk:
logger.warning(
"The Resource assigned to the current HarvestableResource is different from the one provided, skipping..."
)
return
return

timestamp = timezone.now()

HarvestableResource.objects.create(
harvester=harvester,
unique_identifier=geonode_resource.alternate,
geonode_resource=geonode_resource,
title=geonode_resource.title,
should_be_harvested=True,
remote_resource_type=geonode_resource.resource_type,
last_updated=timestamp,
last_refreshed=timestamp,
last_harvested=timestamp,
last_harvesting_succeeded=True,
)
4 changes: 2 additions & 2 deletions geonode/upload/handlers/remote/serializers/wms.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ class Meta:
model = RemoteResourceSerializer.Meta.model
ref_name = "RemoteWMSSerializer"
fields = RemoteResourceSerializer.Meta.fields + (
"lookup",
"identifier",
"bbox",
"parse_remote_metadata",
)

lookup = serializers.CharField(required=True)
identifier = serializers.CharField(required=True)
bbox = serializers.ListField(required=False)
parse_remote_metadata = serializers.BooleanField(required=False, default=False)
8 changes: 4 additions & 4 deletions geonode/upload/handlers/remote/tests/test_wms.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,15 @@ def setUpClass(cls):
"url": "http://invalid.com",
"type": "invalid",
"title": "This will fail",
"lookup": "abc124",
"identifier": "abc124",
"bbox": ["1", "2", "3", "4"],
"parse_remote_metadata": False,
}
cls.valid_payload_with_parse_false = {
"url": cls.valid_url,
"type": "wms",
"title": "This will fail",
"lookup": "abc124",
"identifier": "abc124",
"bbox": ["1", "2", "3", "4"],
"parse_remote_metadata": False,
}
Expand All @@ -58,7 +58,7 @@ def setUpClass(cls):
"url": cls.valid_url,
"type": "wms",
"title": "This will fail",
"lookup": "abc124",
"identifier": "abc124",
"bbox": ["1", "2", "3", "4"],
"parse_remote_metadata": True,
}
Expand Down Expand Up @@ -123,7 +123,7 @@ def test_extract_params_from_data(self):
self.assertTrue("title" in actual)
self.assertTrue("url" in actual)
self.assertTrue("type" in actual)
self.assertTrue("lookup" in actual)
self.assertTrue("identifier" in actual)
self.assertTrue("parse_remote_metadata" in actual)
self.assertTrue("bbox" in actual)

Expand Down
15 changes: 11 additions & 4 deletions geonode/upload/handlers/remote/wms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#########################################################################
import logging
from django.conf import settings
from geonode.harvesting.utils import create_harvestable_resource
from geonode.layers.models import Dataset
from geonode.upload.handlers.common.remote import BaseRemoteResourceHandler
from geonode.services import enumerations
Expand Down Expand Up @@ -61,7 +62,7 @@ def extract_params_from_data(_data, action=None):
"""
payload, original_data = BaseRemoteResourceHandler.extract_params_from_data(_data, action=action)
if action != exa.COPY.value:
payload["lookup"] = original_data.pop("lookup", None)
payload["identifier"] = original_data.pop("identifier", None)
payload["bbox"] = original_data.pop("bbox", None)
payload["parse_remote_metadata"] = original_data.pop("parse_remote_metadata", None)

Expand All @@ -80,7 +81,7 @@ def prepare_import(self, files, execution_id, **kwargs):
to_update = {
"ows_url": ows_url,
"parsed_url": parsed_url,
"remote_resource_id": _exec.input_params.get("lookup", None),
"remote_resource_id": _exec.input_params.get("identifier", None),
}
if _exec.input_params.get("parse_remote_metadata", False):
try:
Expand All @@ -100,7 +101,7 @@ def prepare_import(self, files, execution_id, **kwargs):

def get_wms_resource(self, _exec):
_, wms = WebMapService(_exec.input_params.get("url"))
wms_resource = wms[_exec.input_params.get("lookup")]
wms_resource = wms[_exec.input_params.get("identifier")]
return wms_resource

def generate_alternate(
Expand All @@ -114,7 +115,7 @@ def generate_alternate(
):
"""
For WMS we dont want to generate an alternate, otherwise we cannot use
the alternate to lookup the layer in the remote service
the alternate to identifier the layer in the remote service
"""
return layer_name, payload_alternate

Expand All @@ -136,6 +137,12 @@ def create_geonode_resource(
if remote_bbox:
resource.set_bbox_polygon(remote_bbox, "EPSG:4326")
resource_manager.set_thumbnail(None, instance=resource)

harvester_url = _exec.input_params.get("parsed_url", None)
if harvester_url:
# call utils to connect harvester and resource
create_harvestable_resource(resource, service_url=harvester_url)

return resource

def generate_resource_payload(self, layer_name, alternate, asset, _exec, workspace, **kwargs):
Expand Down
Loading
Loading