Skip to content

Commit b40e4e8

Browse files
author
Tom McCormick
committed
use catalog env configs and update to use default scheme and netloc from properties
1 parent ae22e64 commit b40e4e8

File tree

2 files changed

+24
-30
lines changed

2 files changed

+24
-30
lines changed

pyiceberg/io/pyarrow.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def __init__(self, properties: Properties = EMPTY_DICT):
387387
super().__init__(properties=properties)
388388

389389
@staticmethod
390-
def parse_location(location: str) -> Tuple[str, str, str]:
390+
def parse_location(location: str, properties: Properties=EMPTY_DICT) -> Tuple[str, str, str]:
391391
"""Return (scheme, netloc, path) for the given location.
392392
393393
Uses environment variables DEFAULT_SCHEME and DEFAULT_NETLOC
@@ -396,8 +396,8 @@ def parse_location(location: str) -> Tuple[str, str, str]:
396396
uri = urlparse(location)
397397

398398
# Load defaults from environment
399-
default_scheme = os.getenv("DEFAULT_SCHEME", "file")
400-
default_netloc = os.getenv("DEFAULT_NETLOC", "")
399+
default_scheme = properties.get("DEFAULT_SCHEME", "file")
400+
default_netloc = properties.get("DEFAULT_NETLOC", "")
401401

402402
# Apply logic
403403
scheme = uri.scheme or default_scheme
@@ -599,7 +599,7 @@ def new_input(self, location: str) -> PyArrowFile:
599599
Returns:
600600
PyArrowFile: A PyArrowFile instance for the given location.
601601
"""
602-
scheme, netloc, path = self.parse_location(location)
602+
scheme, netloc, path = self.parse_location(location, self.properties)
603603
return PyArrowFile(
604604
fs=self.fs_by_scheme(scheme, netloc),
605605
location=location,
@@ -616,7 +616,7 @@ def new_output(self, location: str) -> PyArrowFile:
616616
Returns:
617617
PyArrowFile: A PyArrowFile instance for the given location.
618618
"""
619-
scheme, netloc, path = self.parse_location(location)
619+
scheme, netloc, path = self.parse_location(location, self.properties)
620620
return PyArrowFile(
621621
fs=self.fs_by_scheme(scheme, netloc),
622622
location=location,
@@ -637,7 +637,7 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None:
637637
an AWS error code 15.
638638
"""
639639
str_location = location.location if isinstance(location, (InputFile, OutputFile)) else location
640-
scheme, netloc, path = self.parse_location(str_location)
640+
scheme, netloc, path = self.parse_location(str_location, self.properties)
641641
fs = self.fs_by_scheme(scheme, netloc)
642642

643643
try:

tests/io/test_pyarrow.py

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2652,27 +2652,21 @@ def test_parse_location_environment_defaults() -> None:
26522652
assert netloc == ""
26532653
assert path == "/foo/bar"
26542654

2655-
try:
2656-
# Test with environment variables set
2657-
os.environ["DEFAULT_SCHEME"] = "scheme"
2658-
os.environ["DEFAULT_NETLOC"] = "netloc:8000"
2659-
2660-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2661-
assert scheme == "scheme"
2662-
assert netloc == "netloc:8000"
2663-
assert path == "netloc:8000/foo/bar"
2664-
2665-
# Set environment variables
2666-
os.environ["DEFAULT_SCHEME"] = "hdfs"
2667-
os.environ["DEFAULT_NETLOC"] = "netloc:8000"
2668-
2669-
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar")
2670-
assert scheme == "hdfs"
2671-
assert netloc == "netloc:8000"
2672-
assert path == "/foo/bar"
2673-
finally:
2674-
# Clean up environment variables
2675-
if "DEFAULT_SCHEME" in os.environ:
2676-
del os.environ["DEFAULT_SCHEME"]
2677-
if "DEFAULT_NETLOC" in os.environ:
2678-
del os.environ["DEFAULT_NETLOC"]
2655+
# Test with properties set
2656+
properties = dict()
2657+
properties["DEFAULT_SCHEME"] = "scheme"
2658+
properties["DEFAULT_NETLOC"] = "netloc:8000"
2659+
2660+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", properties=properties)
2661+
assert scheme == "scheme"
2662+
assert netloc == "netloc:8000"
2663+
assert path == "netloc:8000/foo/bar"
2664+
2665+
# Set properties
2666+
properties["DEFAULT_SCHEME"] = "hdfs"
2667+
properties["DEFAULT_NETLOC"] = "netloc:8000"
2668+
2669+
scheme, netloc, path = PyArrowFileIO.parse_location("/foo/bar", properties=properties)
2670+
assert scheme == "hdfs"
2671+
assert netloc == "netloc:8000"
2672+
assert path == "/foo/bar"

0 commit comments

Comments
 (0)