Skip to content

Commit 9aabe6b

Browse files
committed
(improvement) query, serializers: address PR review feedback
- Replace malloc/free with PyBytes_FromStringAndSize(NULL, ...) pattern in vector fast-paths, eliminating extra buffer copy and malloc(0) edge case - Change _check_int32_range to raise OverflowError instead of struct.error, consistent with _check_float_range - Remove unused imports (struct, malloc, free) - Differentiate error messages in _raise_bind_serialize_error: 'invalid type' for TypeError vs 'value out of range' for OverflowError/struct.error - Replace unused loop variable with while loop in UNSET_VALUE fill - Expand __getitem__ docstrings explaining performance rationale - Fix copyright header in test_parameter_binding.py
1 parent 5d684d1 commit 9aabe6b

3 files changed

Lines changed: 103 additions & 92 deletions

File tree

cassandra/query.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,9 @@ def _serializers(self):
547547
548548
The column_encryption_policy check is performed on every access (not
549549
cached) so that serializers are correctly bypassed if a policy is set
550-
after construction.
550+
after construction. This means the cache never goes stale: once a CE
551+
policy is present, we always return None and fall through to the
552+
encryption-aware bind path.
551553
"""
552554
if self.column_encryption_policy:
553555
return None
@@ -667,15 +669,25 @@ def _raise_bind_serialize_error(col_spec, value, exc):
667669
668670
Called from all three bind loop paths (CE, Cython, plain Python) to
669671
provide a uniform error message that includes the column name and
670-
expected type. struct.error arises from int32 out-of-range values;
671-
OverflowError from float out-of-range values. Other exception types
672-
(e.g. ValueError from VectorType dimension mismatch) propagate
673-
without wrapping.
672+
expected type. The message distinguishes between wrong-type and
673+
out-of-range scenarios:
674+
675+
- TypeError -> "invalid type"
676+
- OverflowError -> "value out of range"
677+
- struct.error -> "value out of range"
678+
679+
Other exception types (e.g. ValueError from VectorType dimension
680+
mismatch) propagate without wrapping.
674681
"""
675682
actual_type = type(value)
683+
if isinstance(exc, (OverflowError, struct.error)):
684+
reason = "value out of range"
685+
else:
686+
reason = "invalid type"
676687
message = (
677-
'Received an argument of invalid type for column "%s". '
678-
"Expected: %s, Got: %s; (%s)" % (col_spec.name, col_spec.type, actual_type, exc)
688+
'Received an argument with %s for column "%s". '
689+
"Expected: %s, Got: %s; (%s)"
690+
% (reason, col_spec.name, col_spec.type, actual_type, exc)
679691
)
680692
raise TypeError(message) from exc
681693

@@ -888,7 +900,7 @@ def bind(self, values):
888900
# Fill remaining unbound columns with UNSET_VALUE (v4+ feature).
889901
# The pre-allocated list already has slots for these, so index
890902
# assignment works directly without trimming first.
891-
for i in range(idx, col_meta_len):
903+
while idx < col_meta_len:
892904
idx = self._append_unset_value(idx)
893905
elif idx < col_meta_len:
894906
# Pre-v4: trim trailing unused slots (no UNSET_VALUE support)

cassandra/serializers.pyx

Lines changed: 79 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,12 @@ cqltype.serialize() classmethod.
2828

2929
from libc.stdint cimport int32_t
3030
from libc.string cimport memcpy
31-
from libc.stdlib cimport malloc, free
3231
from libc.float cimport FLT_MAX
3332
from libc.math cimport isinf, isnan
34-
from cpython.bytes cimport PyBytes_FromStringAndSize
33+
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_AS_STRING
3534

3635
from cassandra import cqltypes
3736
import io
38-
import struct
3937
from cassandra.marshal import uvint_pack
4038

4139
cdef bint is_little_endian
@@ -79,15 +77,15 @@ cdef inline void _check_float_range(double value) except *:
7977
# ---------------------------------------------------------------------------
8078

8179
cdef inline void _check_int32_range(object value) except *:
82-
"""Raise struct.error for values outside the signed int32 range.
80+
"""Raise OverflowError for values outside the signed int32 range.
8381
84-
Matches the behaviour of struct.pack('>i', value), which raises
85-
struct.error for out-of-range values. The check must be done on the
86-
Python int *before* the C-level <int32_t> cast, which would silently
87-
truncate.
82+
Mirrors ``_check_float_range``: we intentionally raise OverflowError
83+
(not struct.error) so callers only need to catch one exception type
84+
for out-of-range values. The check must be done on the Python int
85+
*before* the C-level <int32_t> cast, which would silently truncate.
8886
"""
8987
if value > 2147483647 or value < -2147483648:
90-
raise struct.error(
88+
raise OverflowError(
9189
"'i' format requires -2147483648 <= number <= 2147483647"
9290
)
9391

@@ -226,116 +224,116 @@ cdef class SerVectorType(Serializer):
226224
cdef inline bytes _serialize_float(self, object values):
227225
"""Serialize a sequence of floats into a contiguous big-endian buffer.
228226
229-
Note: uses index-based access (values[i]) rather than iteration, so
230-
the input must support __getitem__ (e.g. list or tuple).
227+
Uses index-based access (values[i]) rather than iteration for
228+
performance — the input must support ``__getitem__`` (list, tuple,
229+
etc.). This is intentional: index access lets Cython emit a single
230+
``PyObject_GetItem`` call per element instead of iterator protocol
231+
overhead.
231232
"""
232233
cdef Py_ssize_t i
233234
cdef Py_ssize_t buf_size = self.vector_size * 4
234235
if buf_size == 0:
235236
return b""
236-
cdef char *buf = <char *>malloc(buf_size)
237-
if buf == NULL:
238-
raise MemoryError("Failed to allocate %d bytes for vector serialization" % buf_size)
237+
238+
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
239+
cdef char *buf = PyBytes_AS_STRING(result)
239240

240241
cdef float val
241242
cdef char *src
242243
cdef char *dst
243244

244-
try:
245-
for i in range(self.vector_size):
246-
_check_float_range(<double>values[i])
247-
val = <float>values[i]
248-
src = <char *>&val
249-
dst = buf + i * 4
250-
251-
if is_little_endian:
252-
dst[0] = src[3]
253-
dst[1] = src[2]
254-
dst[2] = src[1]
255-
dst[3] = src[0]
256-
else:
257-
memcpy(dst, src, 4)
258-
259-
return PyBytes_FromStringAndSize(buf, buf_size)
260-
finally:
261-
free(buf)
245+
for i in range(self.vector_size):
246+
_check_float_range(<double>values[i])
247+
val = <float>values[i]
248+
src = <char *>&val
249+
dst = buf + i * 4
250+
251+
if is_little_endian:
252+
dst[0] = src[3]
253+
dst[1] = src[2]
254+
dst[2] = src[1]
255+
dst[3] = src[0]
256+
else:
257+
memcpy(dst, src, 4)
258+
259+
return result
262260

263261
cdef inline bytes _serialize_double(self, object values):
264262
"""Serialize a sequence of doubles into a contiguous big-endian buffer.
265263
266-
Note: uses index-based access (values[i]) rather than iteration, so
267-
the input must support __getitem__ (e.g. list or tuple).
264+
Uses index-based access (values[i]) rather than iteration for
265+
performance — the input must support ``__getitem__`` (list, tuple,
266+
etc.). This is intentional: index access lets Cython emit a single
267+
``PyObject_GetItem`` call per element instead of iterator protocol
268+
overhead.
268269
"""
269270
cdef Py_ssize_t i
270271
cdef Py_ssize_t buf_size = self.vector_size * 8
271272
if buf_size == 0:
272273
return b""
273-
cdef char *buf = <char *>malloc(buf_size)
274-
if buf == NULL:
275-
raise MemoryError("Failed to allocate %d bytes for vector serialization" % buf_size)
274+
275+
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
276+
cdef char *buf = PyBytes_AS_STRING(result)
276277

277278
cdef double val
278279
cdef char *src
279280
cdef char *dst
280281

281-
try:
282-
for i in range(self.vector_size):
283-
val = <double>values[i]
284-
src = <char *>&val
285-
dst = buf + i * 8
286-
287-
if is_little_endian:
288-
dst[0] = src[7]
289-
dst[1] = src[6]
290-
dst[2] = src[5]
291-
dst[3] = src[4]
292-
dst[4] = src[3]
293-
dst[5] = src[2]
294-
dst[6] = src[1]
295-
dst[7] = src[0]
296-
else:
297-
memcpy(dst, src, 8)
298-
299-
return PyBytes_FromStringAndSize(buf, buf_size)
300-
finally:
301-
free(buf)
282+
for i in range(self.vector_size):
283+
val = <double>values[i]
284+
src = <char *>&val
285+
dst = buf + i * 8
286+
287+
if is_little_endian:
288+
dst[0] = src[7]
289+
dst[1] = src[6]
290+
dst[2] = src[5]
291+
dst[3] = src[4]
292+
dst[4] = src[3]
293+
dst[5] = src[2]
294+
dst[6] = src[1]
295+
dst[7] = src[0]
296+
else:
297+
memcpy(dst, src, 8)
298+
299+
return result
302300

303301
cdef inline bytes _serialize_int32(self, object values):
304302
"""Serialize a sequence of int32 values into a contiguous big-endian buffer.
305303
306-
Note: uses index-based access (values[i]) rather than iteration, so
307-
the input must support __getitem__ (e.g. list or tuple).
304+
Uses index-based access (values[i]) rather than iteration for
305+
performance — the input must support ``__getitem__`` (list, tuple,
306+
etc.). This is intentional: index access lets Cython emit a single
307+
``PyObject_GetItem`` call per element instead of iterator protocol
308+
overhead.
308309
"""
309310
cdef Py_ssize_t i
310311
cdef Py_ssize_t buf_size = self.vector_size * 4
311312
if buf_size == 0:
312313
return b""
313-
cdef char *buf = <char *>malloc(buf_size)
314-
if buf == NULL:
315-
raise MemoryError("Failed to allocate %d bytes for vector serialization" % buf_size)
314+
315+
cdef object result = PyBytes_FromStringAndSize(NULL, buf_size)
316+
cdef char *buf = PyBytes_AS_STRING(result)
316317

317318
cdef int32_t val
318319
cdef char *src
319320
cdef char *dst
320321

321-
try:
322-
for i in range(self.vector_size):
323-
_check_int32_range(values[i])
324-
val = <int32_t>values[i]
325-
src = <char *>&val
326-
dst = buf + i * 4
327-
328-
if is_little_endian:
329-
dst[0] = src[3]
330-
dst[1] = src[2]
331-
dst[2] = src[1]
332-
dst[3] = src[0]
333-
else:
334-
memcpy(dst, src, 4)
335-
336-
return PyBytes_FromStringAndSize(buf, buf_size)
337-
finally:
338-
free(buf)
322+
for i in range(self.vector_size):
323+
_check_int32_range(values[i])
324+
val = <int32_t>values[i]
325+
src = <char *>&val
326+
dst = buf + i * 4
327+
328+
if is_little_endian:
329+
dst[0] = src[3]
330+
dst[1] = src[2]
331+
dst[2] = src[1]
332+
dst[3] = src[0]
333+
else:
334+
memcpy(dst, src, 4)
335+
336+
return result
339337

340338
cdef inline bytes _serialize_generic(self, object values, int protocol_version):
341339
"""Fallback: element-by-element Python serialization for non-optimized types."""

tests/unit/test_parameter_binding.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright DataStax, Inc.
1+
# Copyright ScyllaDB, Inc.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -349,8 +349,9 @@ def test_cython_path_type_error_wrapped(self):
349349
assert "Int32Type" in msg
350350

351351
def test_plain_path_overflow_error_wrapped(self):
352-
"""Out-of-range int in the plain Python path raises struct.error (caught
353-
alongside OverflowError) and is wrapped with column context."""
352+
"""Out-of-range int in the plain Python path raises OverflowError (from
353+
the Cython serializer) or struct.error (from the pure-Python fallback)
354+
and is wrapped with column context."""
354355
column_metadata = [ColumnMetadata("keyspace", "cf", "v0", Int32Type)]
355356
# Force the plain Python path (no Cython serializers)
356357
prepared = self._make_prepared(column_metadata, serializers=None)

0 commit comments

Comments
 (0)