From be3f82424e1e2e3a8b46fbca335170f4d8069814 Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Thu, 8 May 2025 16:36:12 +0100 Subject: [PATCH 1/2] Fix current host IP address being reported as remote host --- cylc/flow/hostuserutil.py | 99 ++++++++++++------- .../19-platform_select/flow.cylc | 2 +- tests/unit/test_hostuserutil.py | 42 ++++++-- tests/unit/test_task_remote_mgr.py | 28 ++++-- 4 files changed, 121 insertions(+), 50 deletions(-) diff --git a/cylc/flow/hostuserutil.py b/cylc/flow/hostuserutil.py index 108a9b3b842..a39bfb87b94 100644 --- a/cylc/flow/hostuserutil.py +++ b/cylc/flow/hostuserutil.py @@ -50,7 +50,12 @@ import socket import sys from time import time -from typing import List, Optional, Tuple +from typing import ( + Dict, + List, + Optional, + Tuple, +) from cylc.flow.cfgspec.glbl_cfg import glbl_cfg @@ -81,13 +86,21 @@ def get_inst(cls, new=False, expire=None): cls._instance = cls(expire) return cls._instance - def __init__(self, expire): + def __init__(self, expire: float): self.expire_time = time() + expire - self._host = None # preferred name of localhost - self._host_exs = {} # host: socket.gethostbyname_ex(host), ... - self._remote_hosts = {} # host: is_remote, ... - self.user_pwent = None - self.remote_users = {} + self._host: Optional[str] = None # preferred name of localhost + self._host_exs: Dict[ # host: socket.gethostbyname_ex(host), ... + str, Tuple[str, List[str], List[str]] + ] = {} + self._remote_hosts: Dict[str, bool] = {} # host: is_remote, ... + self.user_pwent: Optional[pwd.struct_passwd] = None + self.remote_users: Dict[str, bool] = {} + + # On MacOS we have seen different results of socket.gethostbyname_ex() + # before and after calling socket.getfqdn() for the 1st time. See + # https://github.com/actions/runner-images/issues/8649#issuecomment-1855919367 + # Call it here at init to ensure we get consistent results from now on. + socket.getfqdn() @staticmethod def get_local_ip_address(target): @@ -117,23 +130,31 @@ def get_host_ip_by_name(target): def _get_host_info( self, target: Optional[str] = None ) -> Tuple[str, List[str], List[str]]: - """Return the extended info of the current host.""" + """Return the extended info of the current host. + + This should return the same result for all possible names or + IP addresses of the same host, as well as caching the results, + unlike socket.gethostbyname_ex() alone. + """ if target is None: target = socket.getfqdn() - if IS_MAC_OS and target in { - '1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.' - '0.0.0.0.0.0.ip6.arpa', - '1.0.0.127.in-addr.arpa', - }: - # Python's socket bindings don't play nicely with mac os - # so by default we get the above ip6.arpa address from - # socket.getfqdn, note this does *not* match `hostname -f`. - # https://github.com/cylc/cylc-flow/issues/2689 - # https://github.com/cylc/cylc-flow/issues/3595 - target = socket.gethostname() if target not in self._host_exs: try: - self._host_exs[target] = socket.gethostbyname_ex(target) + if IS_MAC_OS and target in { + '1.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.0.' + '0.0.0.0.0.0.ip6.arpa', + '1.0.0.127.in-addr.arpa', + }: + # Python's socket bindings don't play nicely with mac os + # so by default we get the above ip6.arpa address from + # socket.getfqdn, note this does *not* match `hostname -f`. + # https://github.com/cylc/cylc-flow/issues/2689 + # https://github.com/cylc/cylc-flow/issues/3595 + name = socket.gethostname() + else: + # Normalise the name or IP address to a FQDN + name = socket.getfqdn(socket.gethostbyaddr(target)[0]) + self._host_exs[target] = socket.gethostbyname_ex(name) except IOError as exc: if exc.filename is None: exc.filename = target @@ -190,26 +211,32 @@ def _get_user_pwent(self): self.remote_users.update(((self.user_pwent.pw_name, False),)) return self.user_pwent - def is_remote_host(self, name): - """Return True if name has different IP address than the current host. + def is_remote_host(self, host: Optional[str]) -> bool: + """Return True if the host is not the current host. + + If the given host's primary name does not match the current host's or + 'localhost', the host is considered remote. + + Args: + host: Either a host name or an IP address. Return False if name is None. Return True if host is unknown. """ - if name not in self._remote_hosts: - if not name or name.startswith("localhost"): - # e.g. localhost4.localdomain4 - self._remote_hosts[name] = False + if not host: + return False + if host not in self._remote_hosts: + try: + host_name = self._get_host_info(host)[0].lower() + except IOError: + self._remote_hosts[host] = True else: - try: - host_info = self._get_host_info(name) - except IOError: - self._remote_hosts[name] = True - else: - self._remote_hosts[name] = ( - host_info != self._get_host_info()) - return self._remote_hosts[name] + this_name = self._get_host_info()[0].lower() + self._remote_hosts[host] = ( + host_name not in {this_name, 'localhost'} + ) + return self._remote_hosts[host] def is_remote_user(self, name): """Return True if name is not a name of the current user. @@ -281,9 +308,9 @@ def is_remote_platform(platform): return HostUtil.get_inst()._is_remote_platform(platform) -def is_remote_host(name): +def is_remote_host(host: Optional[str]) -> bool: """Shorthand for HostUtil.get_inst().is_remote_host(name).""" - return HostUtil.get_inst().is_remote_host(name) + return HostUtil.get_inst().is_remote_host(host) def is_remote_user(name): diff --git a/tests/functional/job-submission/19-platform_select/flow.cylc b/tests/functional/job-submission/19-platform_select/flow.cylc index 5211df39699..20cd2c92cc6 100644 --- a/tests/functional/job-submission/19-platform_select/flow.cylc +++ b/tests/functional/job-submission/19-platform_select/flow.cylc @@ -45,7 +45,7 @@ purpose = """ [[localhost_subshell]] [[[remote]]] - host = $(echo "localhost4.localdomain4") + host = $(echo "localhost") [[fin_platform]] script = cylc remove "${CYLC_WORKFLOW_ID}//1/platform_*" diff --git a/tests/unit/test_hostuserutil.py b/tests/unit/test_hostuserutil.py index aa21632530a..61fe282b525 100644 --- a/tests/unit/test_hostuserutil.py +++ b/tests/unit/test_hostuserutil.py @@ -16,32 +16,47 @@ import os import re +from secrets import token_hex +import socket import pytest from cylc.flow.hostuserutil import ( + HostUtil, get_fqdn_by_host, get_host, + get_host_ip_by_name, get_user, get_user_home, is_remote_host, - is_remote_user + is_remote_user, ) +LOCALHOST_ALIASES = socket.gethostbyname_ex('localhost')[1] + + def test_is_remote_user_on_current_user(): """is_remote_user with current user.""" assert not is_remote_user(None) assert not is_remote_user(os.getenv('USER')) -def test_is_remote_host_on_localhost(monkeypatch): +@pytest.mark.parametrize( + 'host', + [ + None, + 'localhost', + pytest.param(os.getenv('HOSTNAME'), id="HOSTNAME-env-var"), + pytest.param(get_host(), id="get_host()"), + pytest.param(get_host_ip_by_name('localhost'), id="localhost-ip"), + pytest.param(get_host_ip_by_name(get_host()), id="get_host-ip"), + *LOCALHOST_ALIASES, + ], +) +def test_is_remote_host__localhost(host): """is_remote_host with localhost.""" - assert not is_remote_host(None) - assert not is_remote_host('localhost') - assert not is_remote_host('localhost4.localhost42') - assert not is_remote_host(os.getenv('HOSTNAME')) - assert not is_remote_host(get_host()) + assert not is_remote_host(host) def test_get_fqdn_by_host_on_bad_host(): @@ -73,3 +88,16 @@ def test_get_user(): def test_get_user_home(): """get_user_home.""" assert os.getenv('HOME') == get_user_home() + + +def test_get_host_info__basic(): + hu = HostUtil(expire=3600) + assert hu._get_host_info() == socket.gethostbyname_ex(socket.getfqdn()) + # Check it handles IP address: + ip = get_host_ip_by_name('localhost') + assert hu._get_host_info(ip) == socket.gethostbyname_ex('localhost') + # Check raised exception for bad host: + bad_host = f'nonexist{token_hex(8)}.com' + with pytest.raises(IOError) as exc: + hu._get_host_info(bad_host) + assert bad_host in str(exc.value) diff --git a/tests/unit/test_task_remote_mgr.py b/tests/unit/test_task_remote_mgr.py index 115d3a1c729..3e196212800 100644 --- a/tests/unit/test_task_remote_mgr.py +++ b/tests/unit/test_task_remote_mgr.py @@ -17,15 +17,30 @@ from contextlib import suppress from pathlib import Path from time import sleep +from typing import ( + Any, + Optional, +) +from unittest.mock import ( + MagicMock, + Mock, +) + import pytest -from typing import (Any, Optional) -from unittest.mock import MagicMock, Mock from cylc.flow.exceptions import PlatformError from cylc.flow.network.client_factory import CommsMeth from cylc.flow.task_remote_mgr import ( - REMOTE_FILE_INSTALL_DONE, REMOTE_INIT_IN_PROGRESS, TaskRemoteMgr) -from cylc.flow.workflow_files import WorkflowFiles, get_workflow_srv_dir + REMOTE_FILE_INSTALL_DONE, + REMOTE_INIT_IN_PROGRESS, + TaskRemoteMgr, +) +from cylc.flow.workflow_files import ( + WorkflowFiles, + get_workflow_srv_dir, +) + +from .test_hostuserutil import LOCALHOST_ALIASES Fixture = Any @@ -325,8 +340,9 @@ def test_eval_platform_bad(task_remote_mgr_eval): 'eval_str, remote_cmd_map, expected', [ *shared_eval_params, - pytest.param( - 'localhost4.localdomain4', {}, 'localhost', id="localhost_variant" + *( + pytest.param(alias, {}, 'localhost', id=alias) + for alias in LOCALHOST_ALIASES ), pytest.param( '`other cmd`', {'other cmd': 'nicole brennan'}, 'nicole brennan', From b7d5fd1ad8312407b6d77cdffbc2099b027121ae Mon Sep 17 00:00:00 2001 From: Ronnie Dutta <61982285+MetRonnie@users.noreply.github.com> Date: Thu, 8 May 2025 16:46:58 +0100 Subject: [PATCH 2/2] Test `HostUtil._get_host_info()` using reference implementation of `socket` module --- tests/unit/test_hostuserutil.py | 116 ++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tests/unit/test_hostuserutil.py b/tests/unit/test_hostuserutil.py index 61fe282b525..ed052117cfd 100644 --- a/tests/unit/test_hostuserutil.py +++ b/tests/unit/test_hostuserutil.py @@ -18,6 +18,9 @@ import re from secrets import token_hex import socket +from types import SimpleNamespace +from typing import Optional +from unittest.mock import Mock import pytest @@ -36,6 +39,92 @@ LOCALHOST_ALIASES = socket.gethostbyname_ex('localhost')[1] +@pytest.fixture +def mock_socket(monkeypatch: pytest.MonkeyPatch): + """Reference implementation of socket functions, with some of their real + observed quirks. + + Yes, it is horribly quirky. It is based on one linux system, and other + systems or different /etc/hosts setup will result in different behaviour. + """ + this_name = 'NCC1701' + this_fqdn = f'{this_name}.starfleet.gov' + this_ip = '12.345.67.89' + this_ex = (this_fqdn, [this_name], [this_ip]) + + localhost = 'localhost' + localhost_fqdn = f'{localhost}.localdomain' + localhost_ip = '127.0.0.1' + localhost_aliases_v4 = [ + localhost_fqdn, + f'{localhost}4', + f'{localhost}4.localdomain4', + ] + localhost_aliases_v6 = [ + localhost_fqdn, + f'{localhost}6', + f'{localhost}6.localdomain6', + ] + localhost_ex = ( + localhost, + [*localhost_aliases_v4, *localhost_aliases_v6], + [localhost_ip, localhost_ip], + ) + + def _getfqdn(x: Optional[str] = None): + if x: + x = x.lower() + if not x or this_fqdn.lower().startswith(x) or x == this_ip: + return this_fqdn + if x in {localhost, localhost_fqdn, localhost_ip}: + return localhost_fqdn + return x + + def _gethostbyaddr(x: str): + x = x.lower() + if this_fqdn.lower().startswith(x) or x == this_ip: + return this_ex + if x in {localhost, localhost_fqdn, '::1', *localhost_aliases_v6}: + return (localhost, localhost_aliases_v6, ['::1']) + if x in {localhost_ip, *localhost_aliases_v4}: + return (localhost, localhost_aliases_v4, [localhost_ip]) + raise socket.gaierror("oopsie") + + def _gethostbyname_ex(x: str): + x = x.lower() + if x in {this_fqdn.lower(), this_name.lower()}: + return this_ex + if this_fqdn.lower().startswith(x): + return (this_fqdn, [], [this_ip]) + if x in {localhost, localhost_fqdn}: + return localhost_ex + if x in localhost_aliases_v6: + return (localhost, localhost_aliases_v6, ['::1']) + if x in localhost_aliases_v4: + return (localhost, localhost_aliases_v4, [localhost_ip]) + raise socket.gaierror("oopsie") + + mock_getfqdn = Mock(side_effect=_getfqdn) + monkeypatch.setattr('cylc.flow.hostuserutil.socket.getfqdn', mock_getfqdn) + mock_gethostbyaddr = Mock(side_effect=_gethostbyaddr) + monkeypatch.setattr( + 'cylc.flow.hostuserutil.socket.gethostbyaddr', mock_gethostbyaddr + ) + mock_gethostbyname_ex = Mock(side_effect=_gethostbyname_ex) + monkeypatch.setattr( + 'cylc.flow.hostuserutil.socket.gethostbyname_ex', mock_gethostbyname_ex + ) + return SimpleNamespace( + this_fqdn=this_fqdn, + this_ip=this_ip, + this_ex=this_ex, + localhost_ex=localhost_ex, + getfqdn=mock_getfqdn, + gethostbyaddr=mock_gethostbyaddr, + gethostbyname_ex=mock_gethostbyname_ex, + ) + + def test_is_remote_user_on_current_user(): """is_remote_user with current user.""" assert not is_remote_user(None) @@ -101,3 +190,30 @@ def test_get_host_info__basic(): with pytest.raises(IOError) as exc: hu._get_host_info(bad_host) assert bad_host in str(exc.value) + + +def test_get_host_info__advanced(mock_socket): + hu = HostUtil(expire=3600) + assert mock_socket.gethostbyname_ex.call_count == 0 + assert hu._get_host_info() == mock_socket.this_ex + assert mock_socket.gethostbyname_ex.call_count == 1 + # Test caching: + hu._get_host_info() + assert mock_socket.gethostbyname_ex.call_count == 1 + # Test variations of host name: + assert hu._get_host_info('NCC1701') == mock_socket.this_ex + assert hu._get_host_info('ncc1701.starfleet') == mock_socket.this_ex + # (Note:) + assert ( + mock_socket.gethostbyname_ex('ncc1701.starfleet') + != mock_socket.this_ex + ) + assert hu._get_host_info('localhost4') == mock_socket.localhost_ex + assert hu._get_host_info('localhost6') == mock_socket.localhost_ex + # Test IP address: + assert hu._get_host_info(mock_socket.this_ip) == mock_socket.this_ex + assert hu._get_host_info('127.0.0.1') == mock_socket.localhost_ex + # Test error: + with pytest.raises(IOError): + hu._get_host_info('nonexist') + assert 'nonexist' not in hu._host_exs