Skip to content

Commit 65e8d4d

Browse files
authored
feat: Add Python version check and warning (#114)
1 parent 3a6a218 commit 65e8d4d

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

google/cloud/dataproc_spark_connect/session.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,11 @@ def _get_dataproc_config(self):
501501
dataproc_config.runtime_config.version = (
502502
DataprocSparkSession._DEFAULT_RUNTIME_VERSION
503503
)
504+
505+
# Check for Python version mismatch with runtime for UDF compatibility
506+
self._check_python_version_compatibility(
507+
dataproc_config.runtime_config.version
508+
)
504509
if (
505510
not dataproc_config.environment_config.execution_config.authentication_config.user_workload_authentication_type
506511
and "DATAPROC_SPARK_CONNECT_AUTH_TYPE" in os.environ
@@ -586,6 +591,32 @@ def _get_dataproc_config(self):
586591
)
587592
return dataproc_config
588593

594+
def _check_python_version_compatibility(self, runtime_version):
595+
"""Check if client Python version matches server Python version for UDF compatibility."""
596+
import sys
597+
import warnings
598+
599+
# Runtime version to server Python version mapping
600+
RUNTIME_PYTHON_MAP = {
601+
"1.2": (3, 12),
602+
"2.2": (3, 12),
603+
"2.3": (3, 11),
604+
}
605+
606+
client_python = sys.version_info[:2] # (major, minor)
607+
608+
if runtime_version in RUNTIME_PYTHON_MAP:
609+
server_python = RUNTIME_PYTHON_MAP[runtime_version]
610+
611+
if client_python != server_python:
612+
warnings.warn(
613+
f"Python version mismatch detected: Client is using Python {client_python[0]}.{client_python[1]}, "
614+
f"but Dataproc runtime {runtime_version} uses Python {server_python[0]}.{server_python[1]}. "
615+
f"This mismatch may cause issues with Python UDF (User Defined Function) compatibility. "
616+
f"Consider using Python {server_python[0]}.{server_python[1]} for optimal UDF execution.",
617+
stacklevel=3,
618+
)
619+
589620
def _display_view_session_details_button(self, session_id):
590621
try:
591622
session_url = f"https://console.cloud.google.com/dataproc/interactive/sessions/{session_id}/locations/{self._region}?project={self._project_id}"

tests/unit/test_init.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import sys
15+
import unittest
16+
from unittest import mock
17+
18+
from google.cloud.dataproc_spark_connect.session import DataprocSparkSession
19+
20+
21+
class TestPythonVersionCheck(unittest.TestCase):
22+
23+
def test_python_version_mismatch_warning_for_runtime_12(self):
24+
"""Test that warning is shown when client Python doesn't match runtime 1.2 (Python 3.12)"""
25+
with mock.patch("sys.version_info", (3, 11, 0)):
26+
with mock.patch("warnings.warn") as mock_warn:
27+
session_builder = DataprocSparkSession.Builder()
28+
session_builder._check_python_version_compatibility("1.2")
29+
30+
expected_warning = (
31+
"Python version mismatch detected: Client is using Python 3.11, "
32+
"but Dataproc runtime 1.2 uses Python 3.12. "
33+
"This mismatch may cause issues with Python UDF (User Defined Function) compatibility. "
34+
"Consider using Python 3.12 for optimal UDF execution."
35+
)
36+
mock_warn.assert_called_once_with(
37+
expected_warning, stacklevel=3
38+
)
39+
40+
def test_python_version_mismatch_warning_for_runtime_22(self):
41+
"""Test that warning is shown when client Python doesn't match runtime 2.2 (Python 3.12)"""
42+
with mock.patch("sys.version_info", (3, 11, 0)):
43+
with mock.patch("warnings.warn") as mock_warn:
44+
session_builder = DataprocSparkSession.Builder()
45+
session_builder._check_python_version_compatibility("2.2")
46+
47+
expected_warning = (
48+
"Python version mismatch detected: Client is using Python 3.11, "
49+
"but Dataproc runtime 2.2 uses Python 3.12. "
50+
"This mismatch may cause issues with Python UDF (User Defined Function) compatibility. "
51+
"Consider using Python 3.12 for optimal UDF execution."
52+
)
53+
mock_warn.assert_called_once_with(
54+
expected_warning, stacklevel=3
55+
)
56+
57+
def test_python_version_mismatch_warning_for_runtime_23(self):
58+
"""Test that warning is shown when client Python doesn't match runtime 2.3 (Python 3.11)"""
59+
with mock.patch("sys.version_info", (3, 12, 0)):
60+
with mock.patch("warnings.warn") as mock_warn:
61+
session_builder = DataprocSparkSession.Builder()
62+
session_builder._check_python_version_compatibility("2.3")
63+
64+
expected_warning = (
65+
"Python version mismatch detected: Client is using Python 3.12, "
66+
"but Dataproc runtime 2.3 uses Python 3.11. "
67+
"This mismatch may cause issues with Python UDF (User Defined Function) compatibility. "
68+
"Consider using Python 3.11 for optimal UDF execution."
69+
)
70+
mock_warn.assert_called_once_with(
71+
expected_warning, stacklevel=3
72+
)
73+
74+
def test_no_warning_when_python_versions_match_runtime_12(self):
75+
"""Test that no warning is shown when client Python matches runtime 1.2 (Python 3.12)"""
76+
with mock.patch("sys.version_info", (3, 12, 0)):
77+
with mock.patch("warnings.warn") as mock_warn:
78+
session_builder = DataprocSparkSession.Builder()
79+
session_builder._check_python_version_compatibility("1.2")
80+
81+
mock_warn.assert_not_called()
82+
83+
def test_no_warning_when_python_versions_match_runtime_22(self):
84+
"""Test that no warning is shown when client Python matches runtime 2.2 (Python 3.12)"""
85+
with mock.patch("sys.version_info", (3, 12, 0)):
86+
with mock.patch("warnings.warn") as mock_warn:
87+
session_builder = DataprocSparkSession.Builder()
88+
session_builder._check_python_version_compatibility("2.2")
89+
90+
mock_warn.assert_not_called()
91+
92+
def test_no_warning_when_python_versions_match_runtime_23(self):
93+
"""Test that no warning is shown when client Python matches runtime 2.3 (Python 3.11)"""
94+
with mock.patch("sys.version_info", (3, 11, 0)):
95+
with mock.patch("warnings.warn") as mock_warn:
96+
session_builder = DataprocSparkSession.Builder()
97+
session_builder._check_python_version_compatibility("2.3")
98+
99+
mock_warn.assert_not_called()
100+
101+
def test_no_warning_for_unknown_runtime_version(self):
102+
"""Test that no warning is shown for unknown runtime versions"""
103+
with mock.patch("sys.version_info", (3, 10, 0)):
104+
with mock.patch("warnings.warn") as mock_warn:
105+
session_builder = DataprocSparkSession.Builder()
106+
session_builder._check_python_version_compatibility("unknown")
107+
108+
mock_warn.assert_not_called()
109+
110+
111+
if __name__ == "__main__":
112+
unittest.main()

0 commit comments

Comments
 (0)