Skip to content

Commit 5c9796a

Browse files
committed
finish refactor
1 parent 284e084 commit 5c9796a

File tree

2 files changed

+73
-71
lines changed

2 files changed

+73
-71
lines changed

bindings/python/pymongoarrow/lib.pyx

Lines changed: 66 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,9 @@ cdef class BuilderManager:
180180
builder.append_nulls(count - builder.length())
181181

182182
# Append the next value.
183-
builder.append_raw(doc_iter, value_t)
183+
status = builder.append_raw(doc_iter, value_t)
184+
if not status.ok():
185+
raise ValueError("Could not append raw value")
184186

185187
# Recurse into documents.
186188
if value_t == BSON_TYPE_DOCUMENT:
@@ -271,9 +273,11 @@ cdef class _ArrayBuilderBase:
271273
while bson_iter_next(&doc_iter):
272274
bson_iter_key(&doc_iter)
273275
value_t = bson_iter_type(&doc_iter)
274-
self.append_raw(&doc_iter, value_t)
276+
status = self.append_raw(&doc_iter, value_t)
277+
if not status.ok():
278+
raise ValueError("Could not append raw value of type", value_t)
275279

276-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
280+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
277281
pass
278282

279283
cdef shared_ptr[CArrayBuilder] get_builder(self):
@@ -309,14 +313,13 @@ cdef class StringBuilder(_ArrayBuilderBase):
309313
self.builder.reset(new CStringBuilder(pool))
310314
self.type_marker = BSON_TYPE_UTF8
311315

312-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
316+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
313317
cdef const char* value
314318
cdef uint32_t str_len
315319
if value_t == BSON_TYPE_UTF8:
316320
value = bson_iter_utf8(doc_iter, &str_len)
317-
self.builder.get().Append(value, str_len)
318-
else:
319-
self.builder.get().AppendNull()
321+
return self.builder.get().Append(value, str_len)
322+
return self.builder.get().AppendNull()
320323

321324
cdef shared_ptr[CArrayBuilder] get_builder(self):
322325
return <shared_ptr[CArrayBuilder]>self.builder
@@ -328,14 +331,13 @@ cdef class CodeBuilder(StringBuilder):
328331
self.builder.reset(new CStringBuilder(pool))
329332
self.type_marker = BSON_TYPE_CODE
330333

331-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
334+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
332335
cdef const char * bson_str
333336
cdef uint32_t str_len
334337
if value_t == BSON_TYPE_CODE:
335338
bson_str = bson_iter_code(doc_iter, &str_len)
336-
self.builder.get().Append(bson_str, str_len)
337-
else:
338-
self.builder.get().AppendNull()
339+
return self.builder.get().Append(bson_str, str_len)
340+
return self.builder.get().AppendNull()
339341

340342
cdef shared_ptr[CArrayBuilder] get_builder(self):
341343
return <shared_ptr[CArrayBuilder]>self.builder
@@ -353,11 +355,10 @@ cdef class ObjectIdBuilder(_ArrayBuilderBase):
353355
self.builder.reset(new CFixedSizeBinaryBuilder(dtype, pool))
354356
self.type_marker = BSON_TYPE_OID
355357

356-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
358+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
357359
if value_t == BSON_TYPE_OID:
358-
self.builder.get().Append(bson_iter_oid(doc_iter).bytes)
359-
else:
360-
self.builder.get().AppendNull()
360+
return self.builder.get().Append(bson_iter_oid(doc_iter).bytes)
361+
return self.builder.get().AppendNull()
361362

362363
cdef shared_ptr[CArrayBuilder] get_builder(self):
363364
return <shared_ptr[CArrayBuilder]>self.builder
@@ -374,22 +375,27 @@ cdef class Int32Builder(_ArrayBuilderBase):
374375
self.builder.reset(new CInt32Builder(pool))
375376
self.type_marker = BSON_TYPE_INT32
376377

377-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
378+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
378379
cdef double dvalue
380+
cdef int64_t ivalue
379381

380382
if (value_t == BSON_TYPE_INT32 or value_t == BSON_TYPE_BOOL or value_t == BSON_TYPE_INT64):
381-
# The builder will surface overflow errors.
382-
self.builder.get().Append(<int32_t>bson_iter_as_int64(doc_iter))
383-
elif value_t == BSON_TYPE_DOUBLE:
383+
# Check for overflow errors.
384+
ivalue = bson_iter_as_int64(doc_iter)
385+
if ivalue > INT_MAX or ivalue < INT_MIN:
386+
raise OverflowError("Overflowed Int32 value")
387+
return self.builder.get().Append(ivalue)
388+
if value_t == BSON_TYPE_DOUBLE:
384389
# Treat nan as null.
385390
dvalue = bson_iter_as_double(doc_iter)
386391
if isnan(dvalue):
387-
self.builder.get().AppendNull()
388-
else:
389-
# The builder will surface overflow errors.
390-
self.builder.get().Append(<int32_t>bson_iter_as_int64(doc_iter))
391-
else:
392-
self.builder.get().AppendNull()
392+
return self.builder.get().AppendNull()
393+
# Check for overflow errors.
394+
ivalue = bson_iter_as_int64(doc_iter)
395+
if ivalue > INT_MAX or ivalue < INT_MIN:
396+
raise OverflowError("Overflowed Int32 value")
397+
return self.builder.get().Append(ivalue)
398+
return self.builder.get().AppendNull()
393399

394400
cdef shared_ptr[CArrayBuilder] get_builder(self):
395401
return <shared_ptr[CArrayBuilder]>self.builder
@@ -403,22 +409,20 @@ cdef class Int64Builder(_ArrayBuilderBase):
403409
self.builder.reset(new CInt64Builder(pool))
404410
self.type_marker = BSON_TYPE_INT64
405411

406-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
412+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
407413
cdef double dvalue
408414

409415
if (value_t == BSON_TYPE_INT64 or
410416
value_t == BSON_TYPE_BOOL or
411417
value_t == BSON_TYPE_INT32):
412-
self.builder.get().Append(bson_iter_as_int64(doc_iter))
413-
elif value_t == BSON_TYPE_DOUBLE:
418+
return self.builder.get().Append(bson_iter_as_int64(doc_iter))
419+
if value_t == BSON_TYPE_DOUBLE:
414420
# Treat nan as null.
415421
dvalue = bson_iter_as_double(doc_iter)
416422
if isnan(dvalue):
417-
self.builder.get().AppendNull()
418-
else:
419-
self.builder.get().Append(bson_iter_as_int64(doc_iter))
420-
else:
421-
self.builder.get().AppendNull()
423+
return self.builder.get().AppendNull()
424+
return self.builder.get().Append(bson_iter_as_int64(doc_iter))
425+
return self.builder.get().AppendNull()
422426

423427
cdef shared_ptr[CArrayBuilder] get_builder(self):
424428
return <shared_ptr[CArrayBuilder]>self.builder
@@ -432,14 +436,13 @@ cdef class DoubleBuilder(_ArrayBuilderBase):
432436
self.builder.reset(new CDoubleBuilder(pool))
433437
self.type_marker = BSON_TYPE_DOUBLE
434438

435-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
439+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
436440
if (value_t == BSON_TYPE_DOUBLE or
437441
value_t == BSON_TYPE_BOOL or
438442
value_t == BSON_TYPE_INT32 or
439443
value_t == BSON_TYPE_INT64):
440-
self.builder.get().Append(bson_iter_as_double(doc_iter))
441-
else:
442-
self.builder.get().AppendNull()
444+
return self.builder.get().Append(bson_iter_as_double(doc_iter))
445+
return self.builder.get().AppendNull()
443446

444447
cdef shared_ptr[CArrayBuilder] get_builder(self):
445448
return <shared_ptr[CArrayBuilder]>self.builder
@@ -466,11 +469,10 @@ cdef class DatetimeBuilder(_ArrayBuilderBase):
466469
def unit(self):
467470
return self.dtype
468471

469-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
472+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
470473
if value_t == BSON_TYPE_DATE_TIME:
471-
self.builder.get().Append(bson_iter_date_time(doc_iter))
472-
else:
473-
self.builder.get().AppendNull()
474+
return self.builder.get().Append(bson_iter_date_time(doc_iter))
475+
return self.builder.get().AppendNull()
474476

475477
cdef shared_ptr[CArrayBuilder] get_builder(self):
476478
return <shared_ptr[CArrayBuilder]>self.builder
@@ -485,11 +487,10 @@ cdef class Date64Builder(_ArrayBuilderBase):
485487
self.builder.reset(new CDate64Builder(pool))
486488
self.type_marker = ARROW_TYPE_DATE64
487489

488-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
490+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
489491
if value_t == BSON_TYPE_DATE_TIME:
490-
self.builder.get().Append(bson_iter_date_time(doc_iter))
491-
else:
492-
self.builder.get().AppendNull()
492+
return self.builder.get().Append(bson_iter_date_time(doc_iter))
493+
return self.builder.get().AppendNull()
493494

494495
@property
495496
def unit(self):
@@ -509,17 +510,16 @@ cdef class Date32Builder(_ArrayBuilderBase):
509510
self.builder.reset(new CDate32Builder(pool))
510511
self.type_marker = ARROW_TYPE_DATE32
511512

512-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
513+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
513514
cdef int64_t value
514515
cdef int32_t seconds_val
515516

516517
if value_t == BSON_TYPE_DATE_TIME:
517518
value = bson_iter_date_time(doc_iter)
518519
# Convert from milliseconds to days (1000*60*60*24)
519520
seconds_val = value // 86400000
520-
self.builder.get().Append(seconds_val)
521-
else:
522-
self.builder.get().AppendNull()
521+
return self.builder.get().Append(seconds_val)
522+
return self.builder.get().AppendNull()
523523

524524
@property
525525
def unit(self):
@@ -539,8 +539,8 @@ cdef class NullBuilder(_ArrayBuilderBase):
539539
for i in range(count):
540540
self.append_null()
541541

542-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
543-
self.builder.get().AppendNull()
542+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
543+
return self.builder.get().AppendNull()
544544

545545
cdef shared_ptr[CArrayBuilder] get_builder(self):
546546
return <shared_ptr[CArrayBuilder]>self.builder
@@ -554,11 +554,10 @@ cdef class BoolBuilder(_ArrayBuilderBase):
554554
self.builder.reset(new CBooleanBuilder(pool))
555555
self.type_marker = BSON_TYPE_BOOL
556556

557-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
557+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
558558
if value_t == BSON_TYPE_BOOL:
559-
self.builder.get().Append(bson_iter_bool(doc_iter))
560-
else:
561-
self.builder.get().AppendNull()
559+
return self.builder.get().Append(bson_iter_bool(doc_iter))
560+
return self.builder.get().AppendNull()
562561

563562
cdef shared_ptr[CArrayBuilder] get_builder(self):
564563
return <shared_ptr[CArrayBuilder]>self.builder
@@ -577,22 +576,20 @@ cdef class Decimal128Builder(_ArrayBuilderBase):
577576
else:
578577
self.supported = 0
579578

580-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
579+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
581580
cdef uint8_t dec128_buf[16]
582581
cdef bson_decimal128_t dec128
583582

584583
if self.supported == 0:
585584
# We do not support big-endian systems.
586-
self.builder.get().AppendNull()
587-
return
585+
return self.builder.get().AppendNull()
588586

589587
if value_t == BSON_TYPE_DECIMAL128:
590588
bson_iter_decimal128(doc_iter, &dec128)
591589
memcpy(dec128_buf, &dec128.low, 8);
592590
memcpy(dec128_buf + 8, &dec128.high, 8)
593-
self.builder.get().Append(dec128_buf)
594-
else:
595-
self.builder.get().AppendNull()
591+
return self.builder.get().Append(dec128_buf)
592+
return self.builder.get().AppendNull()
596593

597594
cdef shared_ptr[CArrayBuilder] get_builder(self):
598595
return <shared_ptr[CArrayBuilder]>self.builder
@@ -615,19 +612,17 @@ cdef class BinaryBuilder(_ArrayBuilderBase):
615612
def subtype(self):
616613
return self._subtype
617614

618-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
615+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
619616
cdef const char * val_buf
620617
cdef uint32_t val_buf_len
621618
cdef bson_subtype_t subtype
622619

623620
if value_t == BSON_TYPE_BINARY:
624621
bson_iter_binary(doc_iter, &subtype, &val_buf_len, <const uint8_t **>&val_buf)
625622
if subtype != self._subtype:
626-
self.builder.get().AppendNull()
627-
else:
628-
self.builder.get().Append(val_buf, val_buf_len)
629-
else:
630-
self.builder.get().AppendNull()
623+
return self.builder.get().AppendNull()
624+
return self.builder.get().Append(val_buf, val_buf_len)
625+
return self.builder.get().AppendNull()
631626

632627
cdef shared_ptr[CArrayBuilder] get_builder(self):
633628
return <shared_ptr[CArrayBuilder]>self.builder
@@ -646,8 +641,9 @@ cdef class DocumentBuilder(_ArrayBuilderBase):
646641
self.type_marker = BSON_TYPE_DOCUMENT
647642
self.field_map = dict()
648643

649-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
644+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
650645
self.count += 1
646+
return CStatus_OK()
651647

652648
cpdef uint64_t length(self):
653649
return self.count
@@ -675,8 +671,8 @@ cdef class ListBuilder(_ArrayBuilderBase):
675671
self.count = 0
676672
self.type_marker = BSON_TYPE_ARRAY
677673

678-
cdef void append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t) except *:
679-
self.builder.get().Append(self.count)
674+
cdef CStatus append_raw(self, bson_iter_t * doc_iter, bson_type_t value_t):
675+
return self.builder.get().Append(self.count)
680676

681677
cpdef void append_count(self):
682678
self.count += 1

bindings/python/pymongoarrow/libarrow.pxd

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from libcpp.vector cimport vector
1919
from libc.stdint cimport int32_t, uint8_t
2020
from pyarrow.lib cimport *
21-
from pyarrow.includes.libarrow cimport (CStatus, CMemoryPool) # noqa: E211
21+
from pyarrow.includes.libarrow cimport (CStatus, CStatus_OK, CMemoryPool) # noqa: E211
2222

2323

2424
# libarrow type wrappings
@@ -51,3 +51,9 @@ cdef extern from "arrow/builder.h" namespace "arrow" nogil:
5151

5252
cdef extern from "arrow/type_fwd.h" namespace "arrow" nogil:
5353
shared_ptr[CDataType] fixed_size_binary(int32_t byte_width)
54+
55+
56+
# Values used to check for overflow errors.
57+
cdef extern from "limits.h":
58+
cdef int INT_MAX
59+
cdef int INT_MIN

0 commit comments

Comments
 (0)