Skip to content

Commit b1e851e

Browse files
authored
Merge branch 'main' into add_clustering_gbq
2 parents 36e1b9e + 4340f53 commit b1e851e

File tree

3 files changed

+35
-14
lines changed

3 files changed

+35
-14
lines changed

pandas_gbq/schema/pandas_to_bigquery.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -139,14 +139,16 @@ def dataframe_to_bigquery_fields(
139139
bq_schema_out.append(bq_field)
140140
unknown_type_fields.append(bq_field)
141141

142-
# Catch any schema mismatch. The developer explicitly asked to serialize a
143-
# column, but it was not found.
142+
# Append any fields from the BigQuery schema that are not in the
143+
# DataFrame.
144144
if override_fields_unused:
145-
raise ValueError(
146-
"Provided BigQuery fields contain field(s) not present in DataFrame: {}".format(
147-
override_fields_unused
148-
)
145+
warnings.warn(
146+
"Provided BigQuery fields contain field(s) not present in "
147+
"DataFrame: {}".format(sorted(override_fields_unused)),
148+
UserWarning,
149149
)
150+
for field_name in sorted(override_fields_unused):
151+
bq_schema_out.append(override_fields_by_name[field_name])
150152

151153
# If schema detection was not successful for all columns, also try with
152154
# pyarrow, if available.

samples/snippets/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ google-cloud-bigquery-storage==2.33.1
22
google-cloud-bigquery==3.38.0
33
pandas-gbq==0.29.2
44
pandas==2.3.3
5-
pyarrow==21.0.0
5+
pyarrow==21.0.0; python_version <= "3.9"
6+
pyarrow==22.0.0; python_version > "3.9"

tests/unit/schema/test_pandas_to_bigquery.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -179,16 +179,34 @@ def test_dataframe_to_bigquery_fields_fallback_needed_w_pyarrow(module_under_tes
179179

180180

181181
def test_dataframe_to_bigquery_fields_w_extra_fields(module_under_test):
182-
with pytest.raises(ValueError) as exc_context:
183-
module_under_test.dataframe_to_bigquery_fields(
184-
pandas.DataFrame(),
185-
override_bigquery_fields=(schema.SchemaField("not_in_df", "STRING"),),
182+
dataframe = pandas.DataFrame({"in_df": [1, 2, 3]})
183+
bq_schema = (
184+
schema.SchemaField("in_df", "INTEGER"),
185+
schema.SchemaField("not_in_df", "STRING"),
186+
schema.SchemaField("also_not_in_df", "INTEGER"),
187+
)
188+
189+
with pytest.warns(UserWarning) as record:
190+
returned_schema = module_under_test.dataframe_to_bigquery_fields(
191+
dataframe, override_bigquery_fields=bq_schema
186192
)
187-
message = str(exc_context.value)
193+
194+
assert len(record) == 1
195+
message = str(record[0].message)
188196
assert (
189-
"Provided BigQuery fields contain field(s) not present in DataFrame:" in message
197+
"Provided BigQuery fields contain field(s) not present in DataFrame" in message
190198
)
191-
assert "not_in_df" in message
199+
# Note: The field names are sorted in the warning message.
200+
assert "['also_not_in_df', 'not_in_df']" in message
201+
202+
expected_schema = (
203+
schema.SchemaField("in_df", "INTEGER"),
204+
# Note: The fields are sorted by name as they are added from the set of
205+
# unused fields.
206+
schema.SchemaField("also_not_in_df", "INTEGER"),
207+
schema.SchemaField("not_in_df", "STRING"),
208+
)
209+
assert returned_schema == expected_schema
192210

193211

194212
def test_dataframe_to_bigquery_fields_geography(module_under_test):

0 commit comments

Comments
 (0)