Skip to content

Commit 9a85bb2

Browse files
committed
apply patches from duckdb v1.4-andium
1 parent 0518838 commit 9a85bb2

File tree

6 files changed

+39
-19
lines changed

6 files changed

+39
-19
lines changed

src/httpfs.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ void HTTPFileHandle::LoadFileInfo() {
729729
return;
730730
} else {
731731
// HEAD request fail, use Range request for another try (read only one byte)
732-
if (flags.OpenForReading() && res->status != HTTPStatusCode::NotFound_404) {
732+
if (flags.OpenForReading() && res->status != HTTPStatusCode::NotFound_404 && res->status != HTTPStatusCode::MovedPermanently_301) {
733733
auto range_res = hfs.GetRangeRequest(*this, path, {}, 0, nullptr, 2);
734734
if (range_res->status != HTTPStatusCode::PartialContent_206 &&
735735
range_res->status != HTTPStatusCode::Accepted_202 && range_res->status != HTTPStatusCode::OK_200) {

src/httpfs_extension.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ static void LoadInternal(ExtensionLoader &loader) {
7070
config.AddExtensionOption("ca_cert_file", "Path to a custom certificate file for self-signed certificates.",
7171
LogicalType::VARCHAR, Value(""));
7272
// Global S3 config
73-
config.AddExtensionOption("s3_region", "S3 Region", LogicalType::VARCHAR, Value("us-east-1"));
73+
config.AddExtensionOption("s3_region", "S3 Region", LogicalType::VARCHAR);
7474
config.AddExtensionOption("s3_access_key_id", "S3 Access Key ID", LogicalType::VARCHAR);
7575
config.AddExtensionOption("s3_secret_access_key", "S3 Access Key", LogicalType::VARCHAR);
7676
config.AddExtensionOption("s3_session_token", "S3 Session Token", LogicalType::VARCHAR);

src/include/s3fs.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ class S3FileSystem : public HTTPFileSystem {
231231
return true;
232232
}
233233

234-
static string GetS3BadRequestError(S3AuthParams &s3_auth_params);
234+
static string GetS3BadRequestError(S3AuthParams &s3_auth_params, string correct_region = "");
235235
static string GetS3AuthError(S3AuthParams &s3_auth_params);
236236
static string GetGCSAuthError(S3AuthParams &s3_auth_params);
237237
static HTTPException GetS3Error(S3AuthParams &s3_auth_params, const HTTPResponse &response, const string &url);

src/s3fs.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -872,6 +872,7 @@ void S3FileHandle::Initialize(optional_ptr<FileOpener> opener) {
872872
ErrorData error(ex);
873873
bool refreshed_secret = false;
874874
if (error.Type() == ExceptionType::IO || error.Type() == ExceptionType::HTTP) {
875+
// legacy endpoint (no region) returns 400
875876
auto context = opener->TryGetClientContext();
876877
if (context) {
877878
auto transaction = CatalogTransaction::GetSystemCatalogTransaction(*context);
@@ -887,9 +888,13 @@ void S3FileHandle::Initialize(optional_ptr<FileOpener> opener) {
887888
auto &extra_info = error.ExtraInfo();
888889
auto entry = extra_info.find("status_code");
889890
if (entry != extra_info.end()) {
890-
if (entry->second == "400") {
891-
// 400: BAD REQUEST
892-
auto extra_text = S3FileSystem::GetS3BadRequestError(auth_params);
891+
if (entry->second == "301" || entry->second == "400") {
892+
auto new_region = extra_info.find("header_x-amz-bucket-region");
893+
string correct_region = "";
894+
if (new_region != extra_info.end()) {
895+
correct_region = new_region->second;
896+
}
897+
auto extra_text = S3FileSystem::GetS3BadRequestError(auth_params, correct_region);
893898
throw Exception(error.Type(), error.RawMessage() + extra_text, extra_info);
894899
}
895900
if (entry->second == "403") {
@@ -1138,12 +1143,15 @@ bool S3FileSystem::ListFiles(const string &directory, const std::function<void(c
11381143
return true;
11391144
}
11401145

1141-
string S3FileSystem::GetS3BadRequestError(S3AuthParams &s3_auth_params) {
1146+
string S3FileSystem::GetS3BadRequestError(S3AuthParams &s3_auth_params, string correct_region) {
11421147
string extra_text = "\n\nBad Request - this can be caused by the S3 region being set incorrectly.";
11431148
if (s3_auth_params.region.empty()) {
11441149
extra_text += "\n* No region is provided.";
11451150
} else {
1146-
extra_text += "\n* Provided region is \"" + s3_auth_params.region + "\"";
1151+
extra_text += "\n* Provided region is: \"" + s3_auth_params.region + "\"";
1152+
}
1153+
if (!correct_region.empty()) {
1154+
extra_text += "\n* Correct region is: \"" + correct_region + "\"";
11471155
}
11481156
return extra_text;
11491157
}

test/sql/copy/csv/test_csv_remote.test

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,26 @@ require httpfs
77
statement ok
88
PRAGMA enable_verification
99

10+
# Test load from url with query string
11+
query IIIIIIIIIIII
12+
FROM sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1')
13+
----
14+
, " (empty) \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': VARCHAR}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': BIGINT}, {'name': column06, 'type': BIGINT}, {'name': column07, 'type': VARCHAR}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': VARCHAR}, {'name': column11, 'type': BIGINT}, {'name': column12, 'type': BIGINT}, {'name': column13, 'type': BIGINT}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}, {'name': column16, 'type': VARCHAR}, {'name': column17, 'type': BIGINT}] NULL NULL NULL FROM read_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1', auto_detect=false, delim=',', quote='"', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'VARCHAR', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'BIGINT', 'column06': 'BIGINT', 'column07': 'VARCHAR', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'VARCHAR', 'column11': 'BIGINT', 'column12': 'BIGINT', 'column13': 'BIGINT', 'column14': 'VARCHAR', 'column15': 'VARCHAR', 'column16': 'VARCHAR', 'column17': 'BIGINT'});
15+
16+
17+
# This test abuses the LOCAL_EXTENSION_REPO env to make sure tests are only run when running extension tests
18+
# in duckdb/duckdb. Otherwise you need to pass a data dir when exex
19+
20+
require-env LOCAL_EXTENSION_REPO
1021

1122
# regular csv file
1223
query ITTTIITITTIIII nosort webpagecsv
13-
SELECT * FROM read_csv_auto('duckdb/data/csv/real/web_page.csv') ORDER BY 1;
24+
SELECT * FROM read_csv_auto('data/csv/real/web_page.csv') ORDER BY 1;
1425
----
1526

1627
# file with gzip
1728
query IIIIIIIIIIIIIII nosort lineitemcsv
18-
SELECT * FROM read_csv_auto('duckdb/data/csv/lineitem1k.tbl.gz') ORDER BY ALL;
29+
SELECT * FROM read_csv_auto('data/csv/lineitem1k.tbl.gz') ORDER BY ALL;
1930
----
2031

2132
query ITTTIITITTIIII nosort webpagecsv
@@ -25,10 +36,3 @@ SELECT * FROM read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/mai
2536
query IIIIIIIIIIIIIII nosort lineitemcsv
2637
select * from read_csv_auto('https://raw.githubusercontent.com/duckdb/duckdb/main/data/csv/lineitem1k.tbl.gz') ORDER BY ALL;
2738
----
28-
29-
30-
# Test load from url with query string
31-
query IIIIIIIIIIII
32-
FROM sniff_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1')
33-
----
34-
, " (empty) \n (empty) 0 0 [{'name': column00, 'type': BIGINT}, {'name': column01, 'type': VARCHAR}, {'name': column02, 'type': BIGINT}, {'name': column03, 'type': BIGINT}, {'name': column04, 'type': BIGINT}, {'name': column05, 'type': BIGINT}, {'name': column06, 'type': BIGINT}, {'name': column07, 'type': VARCHAR}, {'name': column08, 'type': VARCHAR}, {'name': column09, 'type': VARCHAR}, {'name': column10, 'type': VARCHAR}, {'name': column11, 'type': BIGINT}, {'name': column12, 'type': BIGINT}, {'name': column13, 'type': BIGINT}, {'name': column14, 'type': VARCHAR}, {'name': column15, 'type': VARCHAR}, {'name': column16, 'type': VARCHAR}, {'name': column17, 'type': BIGINT}] NULL NULL NULL FROM read_csv('https://github.com/duckdb/duckdb/raw/main/data/csv/customer.csv?v=1', auto_detect=false, delim=',', quote='"', escape='', new_line='\n', skip=0, comment='', header=false, columns={'column00': 'BIGINT', 'column01': 'VARCHAR', 'column02': 'BIGINT', 'column03': 'BIGINT', 'column04': 'BIGINT', 'column05': 'BIGINT', 'column06': 'BIGINT', 'column07': 'VARCHAR', 'column08': 'VARCHAR', 'column09': 'VARCHAR', 'column10': 'VARCHAR', 'column11': 'BIGINT', 'column12': 'BIGINT', 'column13': 'BIGINT', 'column14': 'VARCHAR', 'column15': 'VARCHAR', 'column16': 'VARCHAR', 'column17': 'BIGINT'});

test/sql/copy/s3/url_encode.test

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,12 +132,20 @@ set s3_endpoint='';
132132
statement error
133133
SELECT * FROM 's3://test-bucket/whatever.parquet';
134134
----
135-
<REGEX>:.*Unknown error for HTTP HEAD to 'http://test-bucket.s3.eu-west-1.amazonaws.com/whatever.parquet'.*
135+
<REGEX>:.*HTTP Error: Unable to connect to URL .*http://test-bucket.s3.eu-west-1.amazonaws.com/whatever.parquet.*: 301 .Moved Permanently..*
136+
.*
137+
.*Bad Request - this can be caused by the S3 region being set incorrectly.*
138+
.*Provided region is: .eu-west-1.*
139+
.*Correct region is: .us-east-1.*
136140

137141
statement error
138142
SELECT * FROM 'r2://test-bucket/whatever.parquet';
139143
----
140-
<REGEX>:.*Unknown error for HTTP HEAD to 'http://test-bucket.s3.eu-west-1.amazonaws.com/whatever.parquet'.*
144+
<REGEX>:.*HTTP Error: Unable to connect to URL .*http://test-bucket.s3.eu-west-1.amazonaws.com/whatever.parquet.*: 301 .Moved Permanently..*
145+
.*
146+
.*Bad Request - this can be caused by the S3 region being set incorrectly.*
147+
.*Provided region is: .eu-west-1.*
148+
.*Correct region is: .us-east-1.*
141149

142150
statement error
143151
SELECT * FROM 'gcs://test-bucket/whatever.parquet';

0 commit comments

Comments
 (0)