Skip to content

Commit c7f3140

Browse files
staintetron
authored andcommitted
Refactor Windows relative path support (#131)
* Refactor Windows relative path support
1 parent 2d94392 commit c7f3140

File tree

2 files changed

+157
-13
lines changed

2 files changed

+157
-13
lines changed

schema_salad/ref_resolver.py

Lines changed: 53 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141
DocumentOrStrType = TypeVar(
4242
'DocumentOrStrType', CommentedSeq, CommentedMap, six.text_type)
4343

44+
_re_drive = re.compile(r"/([a-zA-Z]):")
45+
4446
def file_uri(path, split_frag=False): # type: (str, bool) -> str
4547
if path.startswith("file://"):
4648
return path
@@ -177,23 +179,62 @@ def check_exists(self, url): # type: (Text) -> bool
177179
raise ValueError('Unsupported scheme in url: %s' % url)
178180

179181
def urljoin(self, base_url, url): # type: (Text, Text) -> Text
182+
basesplit = urllib.parse.urlsplit(base_url)
183+
split = urllib.parse.urlsplit(url)
184+
if (basesplit.scheme and basesplit.scheme != "file" and split.scheme == "file"):
185+
raise ValueError("Not resolving potential remote exploit %s from base %s" % (url, base_url))
180186

181-
# On windows urljoin consider drive name as scheme and forces it over base url's scheme,
182-
# here we are forcing base url's scheme over url
183187
if sys.platform == 'win32':
184188
if (base_url == url):
185189
return url
186190
basesplit = urllib.parse.urlsplit(base_url)
187-
if basesplit.scheme:
188-
split = urllib.parse.urlsplit(url)
189-
if split.scheme:
190-
if split.scheme in ['http','https','file']:
191-
url = urllib.parse.urlunsplit(('', split.netloc, split.path, split.query, split.fragment))
192-
else:
193-
url= urllib.parse.urlunsplit((basesplit.scheme, split.netloc, urllib.parse.urlunsplit((split.scheme, '', split.path,'', '')), split.query, split.fragment))
194-
return urllib.parse.urljoin(base_url, url)
195-
else:
196-
return urllib.parse.urljoin(base_url, url)
191+
# note that below might split
192+
# "C:" with "C" as URI scheme
193+
split = urllib.parse.urlsplit(url)
194+
195+
has_drive = split.scheme and len(split.scheme) == 1
196+
197+
if basesplit.scheme == "file":
198+
# Special handling of relative file references on Windows
199+
# as urllib seems to not be quite up to the job
200+
201+
# netloc MIGHT appear in equivalents of UNC Strings
202+
# \\server1.example.com\path as
203+
# file:///server1.example.com/path
204+
# https://tools.ietf.org/html/rfc8089#appendix-E.3.2
205+
# (TODO: test this)
206+
netloc = split.netloc or basesplit.netloc
207+
208+
# Check if url is a local path like "C:/Users/fred"
209+
# or actually an absolute URI like http://example.com/fred
210+
if has_drive:
211+
# Assume split.scheme is actually a drive, e.g. "C:"
212+
# so we'll recombine into a path
213+
path_with_drive = urllib.parse.urlunsplit((split.scheme, '', split.path,'', ''))
214+
# Compose new file:/// URI with path_with_drive
215+
# .. carrying over any #fragment (?query just in case..)
216+
return urllib.parse.urlunsplit(("file", netloc,
217+
path_with_drive, split.query, split.fragment))
218+
if (not split.scheme and not netloc and
219+
split.path and split.path.startswith("/")):
220+
# Relative - but does it have a drive?
221+
base_drive = _re_drive.match(basesplit.path)
222+
drive = _re_drive.match(split.path)
223+
if base_drive and not drive:
224+
# Keep drive letter from base_url
225+
# https://tools.ietf.org/html/rfc8089#appendix-E.2.1
226+
# e.g. urljoin("file:///D:/bar/a.txt", "/foo/b.txt") == file:///D:/foo/b.txt
227+
path_with_drive = "/%s:%s" % (base_drive.group(1), split.path)
228+
return urllib.parse.urlunsplit(("file", netloc, path_with_drive,
229+
split.query, split.fragment))
230+
231+
# else: fall-through to resolve as relative URI
232+
elif has_drive:
233+
# Base is http://something but url is C:/something - which urllib would wrongly
234+
# resolve as an absolute path that could later be used to access local files
235+
raise ValueError("Not resolving potential remote exploit %s from base %s" % (url, base_url))
236+
237+
return urllib.parse.urljoin(base_url, url)
197238

198239
class Loader(object):
199240
def __init__(self,
@@ -256,7 +297,6 @@ def __init__(self,
256297
else:
257298
self.fetcher_constructor = DefaultFetcher
258299
self.fetcher = self.fetcher_constructor(self.cache, self.session)
259-
260300
self.fetch_text = self.fetcher.fetch_text
261301
self.check_exists = self.fetcher.check_exists
262302

schema_salad/tests/test_ref_resolver.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,3 +53,107 @@ def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture):
5353

5454
loader = Loader(ctx={})
5555
assert isinstance(loader.session, Session)
56+
57+
def test_DefaultFetcher_urljoin_win32(tmp_dir_fixture):
58+
import os
59+
import sys
60+
from schema_salad.ref_resolver import DefaultFetcher
61+
from requests import Session
62+
63+
# Ensure HOME is set.
64+
os.environ["HOME"] = tmp_dir_fixture
65+
66+
actual_platform = sys.platform
67+
try:
68+
# For this test always pretend we're on Windows
69+
sys.platform = "win32"
70+
fetcher = DefaultFetcher({}, None)
71+
# Relative path, same folder
72+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "soup.cwl")
73+
assert url == "file:///C:/Users/fred/soup.cwl"
74+
# Relative path, sub folder
75+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "foo/soup.cwl")
76+
assert url == "file:///C:/Users/fred/foo/soup.cwl"
77+
# relative climb-up path
78+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "../alice/soup.cwl")
79+
assert url == "file:///C:/Users/alice/soup.cwl"
80+
81+
# Path with drive: should not be treated as relative to directory
82+
# Note: \ would already have been converted to / by resolve_ref()
83+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "c:/bar/soup.cwl")
84+
assert url == "file:///c:/bar/soup.cwl"
85+
# /C:/ (regular URI absolute path)
86+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "/c:/bar/soup.cwl")
87+
assert url == "file:///c:/bar/soup.cwl"
88+
# Relative, change drive
89+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "D:/baz/soup.cwl")
90+
assert url == "file:///d:/baz/soup.cwl"
91+
# Relative from root of base's D: drive
92+
url = fetcher.urljoin("file:///d:/baz/soup.cwl", "/foo/soup.cwl")
93+
assert url == "file:///d:/foo/soup.cwl"
94+
95+
# resolving absolute non-drive URIs still works
96+
url = fetcher.urljoin("file:///C:/Users/fred/foo.cwl", "http://example.com/bar/soup.cwl")
97+
assert url == "http://example.com/bar/soup.cwl"
98+
# and of course relative paths from http://
99+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "soup.cwl")
100+
assert url == "http://example.com/fred/soup.cwl"
101+
102+
# Stay on http:// and same host
103+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "/bar/soup.cwl")
104+
assert url == "http://example.com/bar/soup.cwl"
105+
106+
107+
# Security concern - can't resolve file: from http:
108+
with pytest.raises(ValueError):
109+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "file:///c:/bar/soup.cwl")
110+
# Drive-relative -- should NOT return "absolute" URI c:/bar/soup.cwl"
111+
# as that is a potential remote exploit
112+
with pytest.raises(ValueError):
113+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "c:/bar/soup.cwl")
114+
115+
finally:
116+
sys.platform = actual_platform
117+
118+
def test_DefaultFetcher_urljoin_linux(tmp_dir_fixture):
119+
import os
120+
import sys
121+
from schema_salad.ref_resolver import DefaultFetcher
122+
from requests import Session
123+
124+
# Ensure HOME is set.
125+
os.environ["HOME"] = tmp_dir_fixture
126+
127+
actual_platform = sys.platform
128+
try:
129+
# Pretend it's Linux (e.g. not win32)
130+
sys.platform = "linux2"
131+
fetcher = DefaultFetcher({}, None)
132+
url = fetcher.urljoin("file:///home/fred/foo.cwl", "soup.cwl")
133+
assert url == "file:///home/fred/soup.cwl"
134+
135+
url = fetcher.urljoin("file:///home/fred/foo.cwl", "../alice/soup.cwl")
136+
assert url == "file:///home/alice/soup.cwl"
137+
# relative from root
138+
url = fetcher.urljoin("file:///home/fred/foo.cwl", "/baz/soup.cwl")
139+
assert url == "file:///baz/soup.cwl"
140+
141+
url = fetcher.urljoin("file:///home/fred/foo.cwl", "http://example.com/bar/soup.cwl")
142+
assert url == "http://example.com/bar/soup.cwl"
143+
144+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "soup.cwl")
145+
assert url == "http://example.com/fred/soup.cwl"
146+
147+
# Root-relative -- here relative to http host, not file:///
148+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "/bar/soup.cwl")
149+
assert url == "http://example.com/bar/soup.cwl"
150+
151+
# Security concern - can't resolve file: from http:
152+
with pytest.raises(ValueError):
153+
url = fetcher.urljoin("http://example.com/fred/foo.cwl", "file:///bar/soup.cwl")
154+
155+
# But this one is not "dangerous" on Linux
156+
fetcher.urljoin("http://example.com/fred/foo.cwl", "c:/bar/soup.cwl")
157+
158+
finally:
159+
sys.platform = actual_platform

0 commit comments

Comments
 (0)