From bd899744b8aa43fb2c45a570cc600b4e7f407eb1 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 17 Mar 2026 16:29:17 -0400 Subject: [PATCH 01/19] PatchedArray: basics and wiring Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/mod.rs | 4 + vortex-array/src/arrays/patched/array.rs | 256 ++++++++++++ .../src/arrays/patched/compute/compare.rs | 273 ++++++++++++ .../src/arrays/patched/compute/filter.rs | 145 +++++++ .../src/arrays/patched/compute/mod.rs | 6 + .../src/arrays/patched/compute/rules.rs | 12 + vortex-array/src/arrays/patched/mod.rs | 75 ++++ .../src/arrays/patched/vtable/kernels.rs | 9 + vortex-array/src/arrays/patched/vtable/mod.rs | 389 ++++++++++++++++++ .../src/arrays/patched/vtable/operations.rs | 39 ++ .../src/arrays/patched/vtable/slice.rs | 183 ++++++++ vortex-buffer/src/buffer.rs | 28 ++ 12 files changed, 1419 insertions(+) create mode 100644 vortex-array/src/arrays/patched/array.rs create mode 100644 vortex-array/src/arrays/patched/compute/compare.rs create mode 100644 vortex-array/src/arrays/patched/compute/filter.rs create mode 100644 vortex-array/src/arrays/patched/compute/mod.rs create mode 100644 vortex-array/src/arrays/patched/compute/rules.rs create mode 100644 vortex-array/src/arrays/patched/mod.rs create mode 100644 vortex-array/src/arrays/patched/vtable/kernels.rs create mode 100644 vortex-array/src/arrays/patched/vtable/mod.rs create mode 100644 vortex-array/src/arrays/patched/vtable/operations.rs create mode 100644 vortex-array/src/arrays/patched/vtable/slice.rs diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 43f8a84d49e..5abbcb84b85 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -66,6 +66,10 @@ pub mod null; pub use null::Null; pub use null::NullArray; +pub mod patched; +pub use patched::Patched; +pub use patched::PatchedArray; + pub mod primitive; pub use primitive::Primitive; pub use primitive::PrimitiveArray; diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs new file mode 100644 index 00000000000..9f96f3c5c9f --- /dev/null +++ b/vortex-array/src/arrays/patched/array.rs @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_buffer::Buffer; +use vortex_buffer::BufferMut; +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; + +use crate::ArrayRef; +use crate::Canonical; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::arrays::patched::PatchAccessor; +use crate::arrays::patched::TransposedPatches; +use crate::arrays::patched::patch_lanes; +use crate::buffer::BufferHandle; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::dtype::PType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; +use crate::patches::Patches; +use crate::stats::ArrayStats; + +/// An array that partially "patches" another array with new values. +/// +/// Patched arrays implement the set of nodes that do this instead here...I think? +#[derive(Debug, Clone)] +pub struct PatchedArray { + /// The inner array that is being patched. This is the zeroth child. + pub(super) inner: ArrayRef, + + /// Number of 1024-element chunks. Pre-computed for convenience. + pub(super) n_chunks: usize, + + /// Number of lanes the patch indices and values have been split into. Each of the `n_chunks` + /// of 1024 values is split into `n_lanes` lanes horizontally, each lane having 1024 / n_lanes + /// values that might be patched. + pub(super) n_lanes: usize, + + /// Offset into the first chunk + pub(super) offset: usize, + /// Total length. + pub(super) len: usize, + + /// lane offsets. The PType of these MUST be u32 + pub(super) lane_offsets: BufferHandle, + /// indices within a 1024-element chunk. The PType of these MUST be u16 + pub(super) indices: BufferHandle, + /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. + pub(super) values: BufferHandle, + /// PType of the scalars in `values`. Can be any native type. + pub(super) values_ptype: PType, + + pub(super) stats_set: ArrayStats, +} + +impl PatchedArray { + pub fn from_array_and_patches( + inner: ArrayRef, + patches: &Patches, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + vortex_ensure!( + inner.dtype().eq_with_nullability_superset(patches.dtype()), + "array DType must match patches DType" + ); + + let values_ptype = patches.dtype().as_ptype(); + + let TransposedPatches { + n_chunks, + n_lanes, + lane_offsets, + indices, + values, + } = transpose_patches(patches, ctx)?; + + let len = inner.len(); + + Ok(Self { + inner, + n_chunks, + n_lanes, + values_ptype, + offset: 0, + len, + lane_offsets: BufferHandle::new_host(lane_offsets), + indices: BufferHandle::new_host(indices), + values: BufferHandle::new_host(values), + stats_set: ArrayStats::default(), + }) + } + + /// Get an accessor, which allows ranged access to patches by chunk/lane. + pub fn accessor(&self) -> PatchAccessor<'_, V> { + PatchAccessor { + n_lanes: self.n_lanes, + lane_offsets: self.lane_offsets.as_host().reinterpret::(), + indices: self.indices.as_host().reinterpret::(), + values: self.values.as_host().reinterpret::(), + } + } + + /// Slice the array to just the patches and inner values that are within the chunk range. + pub(crate) fn slice_chunks(&self, chunks: Range) -> VortexResult { + let lane_offsets_start = chunks.start * self.n_lanes; + let lane_offsets_stop = chunks.end * self.n_lanes + 1; + + let sliced_lane_offsets = self + .lane_offsets + .slice_typed::(lane_offsets_start..lane_offsets_stop); + let indices = self.indices.clone(); + let values = self.values.clone(); + + let begin = (chunks.start * 1024).max(self.offset); + let end = (chunks.end * 1024).min(self.len); + + let offset = begin % 1024; + + let inner = self.inner.slice(begin..end)?; + + let len = end - begin; + let n_chunks = (end - begin).div_ceil(1024); + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes: self.n_lanes, + offset, + len, + indices, + values, + values_ptype: self.values_ptype, + lane_offsets: sliced_lane_offsets, + stats_set: ArrayStats::default(), + }) + } +} + +/// Transpose a set of patches from the default sorted layout into the data parallel layout. +#[allow(clippy::cognitive_complexity)] +fn transpose_patches(patches: &Patches, ctx: &mut ExecutionCtx) -> VortexResult { + let array_len = patches.array_len(); + let offset = patches.offset(); + + let indices = patches + .indices() + .clone() + .execute::(ctx)? + .into_primitive(); + + let values = patches + .values() + .clone() + .execute::(ctx)? + .into_primitive(); + + let indices_ptype = indices.ptype(); + let values_ptype = values.ptype(); + + let indices = indices.buffer_handle().clone().unwrap_host(); + let values = values.buffer_handle().clone().unwrap_host(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(values_ptype, |V| { + let indices: Buffer = Buffer::from_byte_buffer(indices); + let values: Buffer = Buffer::from_byte_buffer(values); + + Ok(transpose( + indices.as_slice(), + values.as_slice(), + offset, + array_len, + )) + }) + }) +} + +#[allow(clippy::cast_possible_truncation)] +fn transpose( + indices_in: &[I], + values_in: &[V], + offset: usize, + array_len: usize, +) -> TransposedPatches { + // Total number of slots is number of chunks times number of lanes. + let n_chunks = array_len.div_ceil(1024); + assert!( + n_chunks <= u32::MAX as usize, + "Cannot transpose patches for array with >= 4 trillion elements" + ); + + let n_lanes = patch_lanes::(); + + // We know upfront how many indices and values we'll have. + let mut indices_buffer = BufferMut::with_capacity(indices_in.len()); + let mut values_buffer = BufferMut::with_capacity(values_in.len()); + + // number of patches in each chunk. + let mut lane_offsets: BufferMut = BufferMut::zeroed(n_chunks * n_lanes + 1); + + // Scan the index/values once to get chunk/lane counts + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane + 1] += 1; + } + + // Prefix-sum sizes -> offsets + for index in 1..lane_offsets.len() { + lane_offsets[index] += lane_offsets[index - 1]; + } + + // Loop over patches, writing thme to final positions + let indices_out = indices_buffer.spare_capacity_mut(); + let values_out = values_buffer.spare_capacity_mut(); + for (index, &value) in std::iter::zip(indices_in, values_in) { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + let position = &mut lane_offsets[chunk * n_lanes + lane]; + indices_out[*position as usize].write((index % 1024) as u16); + values_out[*position as usize].write(value); + *position += 1; + } + + // SAFETY: we know there are exactly indices_in.len() indices/values, and we just + // set them to the appropriate values in the loop above. + unsafe { + indices_buffer.set_len(indices_in.len()); + values_buffer.set_len(values_in.len()); + } + + // Now, pass over all the indices and values again and subtract out the position increments. + for index in indices_in { + let index = index.as_() - offset; + let chunk = index / 1024; + let lane = index % n_lanes; + + lane_offsets[chunk * n_lanes + lane] -= 1; + } + + TransposedPatches { + n_chunks, + n_lanes, + lane_offsets: lane_offsets.freeze().into_byte_buffer(), + indices: indices_buffer.freeze().into_byte_buffer(), + values: values_buffer.freeze().into_byte_buffer(), + } +} diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs new file mode 100644 index 00000000000..d1932ed1e44 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBufferMut; +use vortex_error::VortexExpect; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::Canonical; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::BoolArray; +use crate::arrays::ConstantArray; +use crate::arrays::Patched; +use crate::arrays::bool::BoolArrayParts; +use crate::arrays::patched::patch_lanes; +use crate::arrays::primitive::NativeValue; +use crate::builtins::ArrayBuiltins; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::scalar_fn::fns::binary::CompareKernel; +use crate::scalar_fn::fns::operators::CompareOperator; + +impl CompareKernel for Patched { + fn compare( + lhs: &Self::Array, + rhs: &ArrayRef, + operator: CompareOperator, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + let Some(constant) = rhs.as_constant() else { + return Ok(None); + }; + + let result = lhs + .inner + .binary( + ConstantArray::new(constant.clone(), lhs.len()).into_array(), + operator.into(), + )? + .execute::(ctx)? + .into_bool(); + + let BoolArrayParts { + bits, + offset, + len, + validity, + } = result.into_parts(); + + let mut bits = BitBufferMut::from_buffer(bits.unwrap_host().into_mut(), offset, len); + + fn apply( + bits: &mut BitBufferMut, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], + constant: V, + cmp: F, + ) -> VortexResult<()> + where + F: Fn(V, V) -> bool, + { + let n_lanes = patch_lanes::(); + + for index in 0..(lane_offsets.len() - 1) { + let chunk = index / n_lanes; + + let lane_start = lane_offsets[index] as usize; + let lane_end = lane_offsets[index + 1] as usize; + + for (&patch_index, &patch_value) in std::iter::zip( + &indices[lane_start..lane_end], + &values[lane_start..lane_end], + ) { + let bit_index = chunk * 1024 + patch_index as usize; + if cmp(patch_value, constant) { + bits.set(bit_index) + } else { + bits.unset(bit_index) + } + } + } + + Ok(()) + } + + let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); + let indices = lhs.indices.as_host().reinterpret::(); + + match_each_native_ptype!(lhs.values_ptype, |V| { + let values = lhs.values.as_host().reinterpret::(); + let constant = constant + .as_primitive() + .as_::() + .vortex_expect("compare constant not null"); + + match operator { + CompareOperator::Eq => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) == NativeValue(r), + )?; + } + CompareOperator::NotEq => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) != NativeValue(r), + )?; + } + CompareOperator::Gt => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) > NativeValue(r), + )?; + } + CompareOperator::Gte => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) >= NativeValue(r), + )?; + } + CompareOperator::Lt => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) < NativeValue(r), + )?; + } + CompareOperator::Lte => { + apply::( + &mut bits, + lane_offsets, + indices, + values, + constant, + |l, r| NativeValue(l) <= NativeValue(r), + )?; + } + } + }); + + // SAFETY: thing + let result = unsafe { BoolArray::new_unchecked(bits.freeze(), validity) }; + Ok(Some(result.into_array())) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::BoolArray; + use crate::arrays::ConstantArray; + use crate::arrays::Patched; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + use crate::scalar_fn::fns::binary::CompareKernel; + use crate::scalar_fn::fns::operators::CompareOperator; + use crate::validity::Validity; + + #[test] + fn test_basic() { + let lhs = PrimitiveArray::from_iter(0u32..512).into_array(); + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![u32::MAX; 3].into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx).unwrap(); + + let rhs = ConstantArray::new(u32::MAX, 512).into_array(); + + let result = ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx) + .unwrap() + .unwrap(); + + let expected = + BoolArray::from_indices(512, [509, 510, 511], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_subnormal_f32() -> VortexResult<()> { + // Subnormal f32 values are smaller than f32::MIN_POSITIVE but greater than 0 + let subnormal: f32 = f32::MIN_POSITIVE / 2.0; + assert!(subnormal > 0.0 && subnormal < f32::MIN_POSITIVE); + + let lhs = PrimitiveArray::from_iter((0..512).map(|i| i as f32)).into_array(); + + let patches = Patches::new( + 512, + 0, + buffer![509u16, 510, 511].into_array(), + buffer![f32::NAN, subnormal, f32::NEG_INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(subnormal, 512).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(512, [510], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + Ok(()) + } + + #[test] + fn test_pos_neg_zero() -> VortexResult<()> { + let lhs = PrimitiveArray::from_iter([-0.0f32; 10]).into_array(); + + let patches = Patches::new( + 10, + 0, + buffer![5u16, 6, 7, 8, 9].into_array(), + buffer![f32::NAN, f32::NEG_INFINITY, 0f32, -0.0f32, f32::INFINITY].into_array(), + None, + )?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + let lhs = PatchedArray::from_array_and_patches(lhs, &patches, &mut ctx)?; + + let rhs = ConstantArray::new(0.0f32, 10).into_array(); + + let result = + ::compare(&lhs, &rhs, CompareOperator::Eq, &mut ctx)? + .unwrap(); + + let expected = BoolArray::from_indices(10, [7], Validity::NonNullable).into_array(); + + assert_arrays_eq!(expected, result); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/filter.rs b/vortex-array/src/arrays/patched/compute/filter.rs new file mode 100644 index 00000000000..d045a79c38f --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/filter.rs @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; +use vortex_mask::AllOr; +use vortex_mask::Mask; + +use crate::ArrayRef; +use crate::IntoArray; +use crate::arrays::FilterArray; +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduce; + +impl FilterReduce for Patched { + fn filter(array: &Self::Array, mask: &Mask) -> VortexResult> { + // Find the contiguous chunk range that the mask covers. We use this to slice the inner + // components, then wrap the rest up with another FilterArray. + // + // This is helpful when we have a very selective filter that is clustered to a small + // range. + let (chunk_start, chunk_stop) = match mask.indices() { + AllOr::All | AllOr::None => { + // This is handled as the precondition to this method, see the FilterReduce + // documentation. + unreachable!("mask must be a MaskValues here") + } + AllOr::Some(indices) => { + let first = indices[0]; + let last = indices[indices.len() - 1]; + + (first / 1024, last.div_ceil(1024)) + } + }; + + // If all chunks already covered, there is nothing to do. + if chunk_start == 0 && chunk_stop == array.n_chunks { + return Ok(None); + } + + let sliced = array.slice_chunks(chunk_start..chunk_stop)?; + + let slice_start = chunk_start * 1024; + let slice_end = (chunk_stop * 1024).min(array.len()); + let remainder = mask.slice(slice_start..slice_end); + + Ok(Some( + FilterArray::new(sliced.into_array(), remainder).into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_mask::Mask; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::FilterArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + #[test] + fn test_filter_noop() -> VortexResult<()> { + let array = buffer![u16::MIN; 5].into_array(); + let patched_indices = buffer![3u8, 4].into_array(); + let patched_values = buffer![u16::MAX; 2].into_array(); + + let patches = Patches::new(5, 0, patched_indices, patched_values, None)?; + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + let filtered = FilterArray::new( + array.clone(), + Mask::from_iter([true, false, false, false, true]), + ) + .into_array(); + + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + // Filter does not get pushed through to child because it does not prune any chunks. + assert!(reduced.is_none()); + + Ok(()) + } + + #[test] + fn test_filter_basic() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 0, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MAX, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } + + #[test] + fn test_filter_complex() -> VortexResult<()> { + // Basic test: filter with mask that crosses boundaries. + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let array = buffer![u16::MIN; 4096].into_array(); + let patched_indices = buffer![1024u16, 1025].into_array(); + let patched_values = buffer![u16::MAX, u16::MAX].into_array(); + + let patches = Patches::new(4096, 1, patched_indices, patched_values, None)?; + + let array = PatchedArray::from_array_and_patches(array, &patches, &mut ctx)?.into_array(); + + // Filter that only touches the middle 2 chunks + let mask = Mask::from_indices(4096, vec![1024, 1025, 3000]); + + let filtered = FilterArray::new(array.clone(), mask).into_array(); + let reduced = array.vtable().reduce_parent(&array, &filtered, 0)?; + + let expected = PrimitiveArray::from_iter([u16::MAX, u16::MIN, u16::MIN]).into_array(); + + assert_arrays_eq!(expected, reduced.unwrap()); + + Ok(()) + } +} diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs new file mode 100644 index 00000000000..aa8b18199b2 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod compare; +mod filter; +pub(crate) mod rules; diff --git a/vortex-array/src/arrays/patched/compute/rules.rs b/vortex-array/src/arrays/patched/compute/rules.rs new file mode 100644 index 00000000000..3ecb25c1efa --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/rules.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::arrays::filter::FilterReduceAdaptor; +use crate::arrays::slice::SliceReduceAdaptor; +use crate::optimizer::rules::ParentRuleSet; + +pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ + ParentRuleSet::lift(&FilterReduceAdaptor(Patched)), + ParentRuleSet::lift(&SliceReduceAdaptor(Patched)), +]); diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs new file mode 100644 index 00000000000..f035204c188 --- /dev/null +++ b/vortex-array/src/arrays/patched/mod.rs @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod array; +mod compute; +mod vtable; + +pub use array::*; +use vortex_buffer::ByteBuffer; +pub use vtable::*; + +/// Patches that have been transposed into GPU format. +struct TransposedPatches { + n_chunks: usize, + n_lanes: usize, + lane_offsets: ByteBuffer, + indices: ByteBuffer, + values: ByteBuffer, +} + +/// Number of lanes used at patch time for a value of type `V`. +/// +/// This is *NOT* equal to the number of FastLanes lanes for the type `V`, rather this is going to +/// correspond to how many "lanes" we will end up copying data on. +/// +/// When applied on the CPU, this configuration doesn't really matter. On the GPU, it is based +/// on the number of patches involved here. +const fn patch_lanes() -> usize { + // For types 32-bits or smaller, we use a 32 lane configuration, and for 64-bit we use 16 lanes. + // This matches up with the number of lanes we use to execute copying results from bit-unpacking + // from shared to global memory. + if size_of::() < 8 { 32 } else { 16 } +} + +pub struct PatchAccessor<'a, V> { + n_lanes: usize, + lane_offsets: &'a [u32], + indices: &'a [u16], + values: &'a [V], +} + +impl<'a, V: Sized> PatchAccessor<'a, V> { + /// Access the patches for a particular lane + pub fn access(&'a self, chunk: usize, lane: usize) -> LanePatches<'a, V> { + let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; + let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; + + LanePatches { + indices: &self.indices[start..stop], + values: &self.values[start..stop], + } + } +} + +pub struct LanePatches<'a, V> { + pub indices: &'a [u16], + pub values: &'a [V], +} + +impl<'a, V: Copy> LanePatches<'a, V> { + pub fn len(&self) -> usize { + self.indices.len() + } + + pub fn is_empty(&self) -> bool { + self.indices.is_empty() + } + + pub fn iter(&self) -> impl Iterator { + self.indices + .iter() + .copied() + .zip(self.values.iter().copied()) + } +} diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs new file mode 100644 index 00000000000..2c60ec4a20f --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::arrays::Patched; +use crate::kernel::ParentKernelSet; +use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; + +pub(super) const PARENT_KERNELS: ParentKernelSet = + ParentKernelSet::new(&[ParentKernelSet::lift(&CompareExecuteAdaptor(Patched))]); diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs new file mode 100644 index 00000000000..bcc87d869ce --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod kernels; +mod operations; +mod slice; + +use std::hash::Hash; +use std::hash::Hasher; + +use vortex_buffer::Buffer; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::Canonical; +use crate::DeserializeMetadata; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::ExecutionStep; +use crate::IntoArray; +use crate::Precision; +use crate::ProstMetadata; +use crate::arrays::PrimitiveArray; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::compute::rules::PARENT_RULES; +use crate::arrays::patched::patch_lanes; +use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::serde::ArrayChildren; +use crate::stats::ArrayStats; +use crate::stats::StatsSetRef; +use crate::vtable; +use crate::vtable::ArrayId; +use crate::vtable::VTable; +use crate::vtable::ValidityChild; +use crate::vtable::ValidityVTableFromChild; + +vtable!(Patched); + +#[derive(Debug)] +pub struct Patched; + +impl ValidityChild for Patched { + fn validity_child(array: &PatchedArray) -> &ArrayRef { + &array.inner + } +} + +#[derive(Clone, prost::Message)] +pub struct PatchedMetadata { + #[prost(uint32, tag = "1")] + pub(crate) offset: u32, +} + +impl VTable for Patched { + type Array = PatchedArray; + type Metadata = ProstMetadata; + type OperationsVTable = Self; + type ValidityVTable = ValidityVTableFromChild; + + fn id(_array: &Self::Array) -> ArrayId { + ArrayId::new_ref("vortex.patched") + } + + fn len(array: &Self::Array) -> usize { + array.len + } + + fn dtype(array: &Self::Array) -> &DType { + array.inner.dtype() + } + + fn stats(array: &Self::Array) -> StatsSetRef<'_> { + array.stats_set.to_ref(array.as_ref()) + } + + fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { + array.inner.array_hash(state, precision); + array.values_ptype.hash(state); + array.n_chunks.hash(state); + array.n_lanes.hash(state); + array.lane_offsets.array_hash(state, precision); + array.indices.array_hash(state, precision); + array.values.array_hash(state, precision); + } + + fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { + array.n_chunks == other.n_chunks + && array.n_lanes == other.n_lanes + && array.values_ptype == other.values_ptype + && array.inner.array_eq(&other.inner, precision) + && array.lane_offsets.array_eq(&other.lane_offsets, precision) + && array.indices.array_eq(&other.indices, precision) + && array.values.array_eq(&other.values, precision) + } + + fn nbuffers(_array: &Self::Array) -> usize { + 3 + } + + fn buffer(array: &Self::Array, idx: usize) -> BufferHandle { + match idx { + 0 => array.lane_offsets.clone(), + 1 => array.indices.clone(), + 2 => array.values.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn buffer_name(_array: &Self::Array, idx: usize) -> Option { + match idx { + 0 => Some("lane_offsets".to_string()), + 1 => Some("patch_indices".to_string()), + 2 => Some("patch_values".to_string()), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), + } + } + + fn nchildren(_array: &Self::Array) -> usize { + 1 + } + + fn child(array: &Self::Array, idx: usize) -> ArrayRef { + if idx == 0 { + array.inner.clone() + } else { + vortex_panic!("invalid child index for PatchedArray: {idx}"); + } + } + + fn child_name(_array: &Self::Array, idx: usize) -> String { + if idx == 0 { + "inner".to_string() + } else { + vortex_panic!("invalid child index for PatchedArray: {idx}"); + } + } + + #[allow(clippy::cast_possible_truncation)] + fn metadata(array: &Self::Array) -> VortexResult { + Ok(ProstMetadata(PatchedMetadata { + offset: array.offset as u32, + })) + } + + fn serialize(_metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(vec![])) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let inner = as DeserializeMetadata>::deserialize(bytes)?; + Ok(ProstMetadata(inner)) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + let inner = children.get(0, dtype, len)?; + + let n_chunks = len.div_ceil(1024); + + let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); + + let &[lane_offsets, indices, values] = &buffers else { + vortex_bail!("invalid buffer count for PatchedArray"); + }; + + Ok(PatchedArray { + inner, + n_chunks, + n_lanes, + offset: metadata.offset as usize, + len, + lane_offsets: lane_offsets.clone(), + indices: indices.clone(), + values: values.clone(), + values_ptype: dtype.as_ptype(), + stats_set: ArrayStats::default(), + }) + } + + fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { + vortex_ensure!( + children.len() == 1, + "PatchedArray must have exactly 1 child" + ); + + array.inner = children.remove(0); + + Ok(()) + } + + fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + let inner = array + .inner + .clone() + .execute::(ctx)? + .into_primitive(); + + let PrimitiveArrayParts { + buffer, + ptype, + validity, + } = inner.into_parts(); + + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + + let patched_values = match_each_native_ptype!(array.values_ptype, |V| { + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + let values: Buffer = Buffer::from_byte_buffer(array.values.clone().unwrap_host()); + + let offset = array.offset; + let len = array.len; + + apply::( + &mut output, + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + &indices, + &values, + ); + + // The output will always be aligned to a chunk boundary, we apply the offset/len + // at the end to slice to only the in-bounds values. + let _output = output.as_slice(); + let output = output.freeze().slice(offset..offset + len); + + PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) + }); + + Ok(ExecutionStep::done(patched_values.into_array())) + } + + fn execute_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + PARENT_KERNELS.execute(array, parent, child_idx, ctx) + } + + fn reduce_parent( + array: &Self::Array, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +/// Apply patches on top of the existing value types. +#[allow(clippy::too_many_arguments)] +fn apply( + output: &mut [V], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + indices: &[u16], + values: &[V], +) { + for chunk in 0..n_chunks { + let start = lane_offsets[chunk * n_lanes] as usize; + let stop = lane_offsets[chunk * n_lanes + n_lanes] as usize; + + for idx in start..stop { + // the indices slice is measured as an offset into the 1024-value chunk. + let index = chunk * 1024 + indices[idx] as usize; + if index < offset || index >= offset + len { + continue; + } + + let value = values[idx]; + output[index] = value; + } + } +} + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + use vortex_buffer::buffer_mut; + use vortex_session::VortexSession; + + use crate::Canonical; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::dtype::Nullability; + use crate::patches::Patches; + use crate::scalar::Scalar; + + #[test] + fn test_execute() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let executed = array + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .into_buffer::(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 1; + expected[2] = 1; + expected[3] = 1; + + assert_eq!(executed, expected.freeze()); + } + + #[test] + fn test_scalar_at() { + let values = buffer![0u16; 1024].into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![1u16; 3].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + assert_eq!( + array.scalar_at(0).unwrap(), + Scalar::primitive(0u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(1).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(2).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + assert_eq!( + array.scalar_at(3).unwrap(), + Scalar::primitive(1u16, Nullability::NonNullable) + ); + } +} diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs new file mode 100644 index 00000000000..ddf5dcec590 --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::DynArray; +use crate::arrays::patched::Patched; +use crate::arrays::patched::PatchedArray; +use crate::arrays::patched::patch_lanes; +use crate::match_each_native_ptype; +use crate::scalar::Scalar; +use crate::vtable::OperationsVTable; + +impl OperationsVTable for Patched { + fn scalar_at(array: &PatchedArray, index: usize) -> VortexResult { + // First check the patches + let chunk = index / 1024; + #[allow(clippy::cast_possible_truncation)] + let chunk_index = (index % 1024) as u16; + match_each_native_ptype!(array.values_ptype, |V| { + let lane = index % patch_lanes::(); + let accessor = array.accessor::(); + let patches = accessor.access(chunk, lane); + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (patch_index, patch_value) in patches.iter() { + if patch_index == chunk_index { + return Ok(Scalar::primitive( + patch_value, + array.inner.dtype().nullability(), + )); + } + } + }); + + // Otherwise, access the underlying value. + array.inner.scalar_at(index) + } +} diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs new file mode 100644 index 00000000000..99d04666d5f --- /dev/null +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -0,0 +1,183 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use std::ops::Range; + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::DynArray; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PatchedArray; +use crate::arrays::slice::SliceReduce; +use crate::stats::ArrayStats; + +/// Is this something that uses a SliceKernel or a SliceReduce +impl SliceReduce for Patched { + fn slice(array: &Self::Array, range: Range) -> VortexResult> { + // We **always** slice at 1024-element chunk boundaries. We keep the offset + len + // around so that when we execute we know how much to chop off. + let new_offset = (range.start + array.offset) % 1024; + let new_len = range.end - range.start; + + let chunk_start = (range.start + array.offset) / 1024; + let chunk_stop = (range.end + array.offset).div_ceil(1024); + + // Slice the inner to chunk boundaries + let inner_start = chunk_start * 1024; + let inner_stop = (chunk_stop * 1024).min(array.inner.len()); + let inner = array.inner.slice(inner_start..inner_stop)?; + + // Slice to only maintain offsets to the sliced chunks + let sliced_lane_offsets = array + .lane_offsets + .slice_typed::((chunk_start * array.n_lanes)..(chunk_stop * array.n_lanes) + 1); + + Ok(Some( + PatchedArray { + inner, + n_chunks: chunk_stop - chunk_start, + n_lanes: array.n_lanes, + + offset: new_offset, + len: new_len, + lane_offsets: sliced_lane_offsets, + indices: array.indices.clone(), + values: array.values.clone(), + values_ptype: array.values_ptype, + stats_set: ArrayStats::default(), + } + .into_array(), + )) + } +} + +#[cfg(test)] +mod tests { + use std::ops::Range; + + use rstest::rstest; + use vortex_buffer::Buffer; + use vortex_buffer::BufferMut; + use vortex_buffer::buffer; + use vortex_error::VortexResult; + + use crate::Canonical; + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::LEGACY_SESSION; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::dtype::NativePType; + use crate::patches::Patches; + + #[test] + fn test_reduce() -> VortexResult<()> { + let values = buffer![0u16; 512].into_array(); + let patch_indices = buffer![1u32, 8, 30].into_array(); + let patch_values = buffer![u16::MAX; 3].into_array(); + let patches = Patches::new(512, 0, patch_indices, patch_values, None).unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array.slice(1..10)?; + + insta::assert_snapshot!( + sliced.display_tree_encodings_only(), + @r#" + root: vortex.patched(u16, len=9) + inner: vortex.primitive(u16, len=512) + "#); + + let executed = sliced.execute::(&mut ctx)?.into_primitive(); + + assert_eq!( + &[u16::MAX, 0, 0, 0, 0, 0, 0, u16::MAX, 0], + executed.as_slice::() + ); + + Ok(()) + } + + #[rstest] + #[case::trivial(buffer![1u64; 2], buffer![1u32], buffer![u64::MAX], 1..2)] + #[case::one_chunk(buffer![0u64; 1024], buffer![1u32, 8, 30], buffer![u64::MAX; 3], 1..10)] + #[case::multichunk(buffer![1u64; 10_000], buffer![0u32, 1, 2, 3, 4, 16, 17, 18, 19, 1024, 2048, 2049], buffer![u64::MAX; 12], 1024..5000)] + fn test_cases( + #[case] inner: Buffer, + #[case] patch_indices: Buffer, + #[case] patch_values: Buffer, + #[case] range: Range, + ) { + // Create patched array. + let patches = Patches::new( + inner.len(), + 0, + patch_indices.into_array(), + patch_values.into_array(), + None, + ) + .unwrap(); + + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(inner.into_array(), &patches, &mut ctx).unwrap(); + + // Verify that applying slice first yields same result as applying slice at end. + let slice_first = patched_array + .slice(range.clone()) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let slice_last = patched_array + .into_array() + .execute::(&mut ctx) + .unwrap() + .into_primitive() + .slice(range) + .unwrap(); + + assert_arrays_eq!(slice_first, slice_last); + } + + #[test] + fn test_stacked_slices() { + let values = PrimitiveArray::from_iter(0u64..10_000).into_array(); + + let patched_indices = buffer![1u32, 2, 1024, 2048, 3072, 3088].into_array(); + let patched_values = buffer![0u64, 1, 2, 3, 4, 5].into_array(); + + let patches = Patches::new(10_000, 0, patched_indices, patched_values, None).unwrap(); + let mut ctx = ExecutionCtx::new(LEGACY_SESSION.clone()); + + let patched_array = + PatchedArray::from_array_and_patches(values, &patches, &mut ctx).unwrap(); + + let sliced = patched_array + .slice(1024..5000) + .unwrap() + .slice(1..2065) + .unwrap() + .execute::(&mut ctx) + .unwrap() + .into_array(); + + let mut expected = BufferMut::from_iter(1025u64..=3088); + expected[1023] = 3; + expected[2047] = 4; + expected[2063] = 5; + + let expected = expected.into_array(); + + assert_arrays_eq!(expected, sliced); + } +} diff --git a/vortex-buffer/src/buffer.rs b/vortex-buffer/src/buffer.rs index 053cb5baee5..11c360f21e8 100644 --- a/vortex-buffer/src/buffer.rs +++ b/vortex-buffer/src/buffer.rs @@ -523,6 +523,34 @@ impl Buffer { } } +impl ByteBuffer { + /// Reinterpret the byte buffer as a slice of values of type `V`. + /// + /// # Panics + /// + /// This method will only work if the buffer has the proper size and alignment to be viewed + /// as a buffer of `V` values. + pub fn reinterpret(&self) -> &[V] { + assert!( + self.is_aligned(Alignment::of::()), + "ByteBuffer not properly aligned to {}", + type_name::() + ); + + assert_eq!( + self.length % size_of::(), + 0, + "ByteBuffer length not a multiple of the value length" + ); + + let v_len = self.length / size_of::(); + let v_ptr = self.bytes.as_ptr().cast::(); + + // SAFETY: we checked that alignment and length are suitable to treat this as a &[V]. + unsafe { std::slice::from_raw_parts(v_ptr, v_len) } + } +} + /// An iterator over Buffer elements. /// /// This is an analog to the `std::slice::Iter` type. From afd70ef836a6386c73ce8ce6fc35f8a550188e0d Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 16:18:19 -0400 Subject: [PATCH 02/19] take Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/mod.rs | 1 + .../src/arrays/patched/compute/take.rs | 110 ++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 vortex-array/src/arrays/patched/compute/take.rs diff --git a/vortex-array/src/arrays/patched/compute/mod.rs b/vortex-array/src/arrays/patched/compute/mod.rs index aa8b18199b2..8634a22f90b 100644 --- a/vortex-array/src/arrays/patched/compute/mod.rs +++ b/vortex-array/src/arrays/patched/compute/mod.rs @@ -4,3 +4,4 @@ mod compare; mod filter; pub(crate) mod rules; +mod take; diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs new file mode 100644 index 00000000000..59a483c83f0 --- /dev/null +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use rustc_hash::FxHashMap; +use vortex_buffer::Buffer; +use vortex_error::VortexResult; + +use crate::arrays::dict::TakeExecute; +use crate::arrays::primitive::PrimitiveArrayParts; +use crate::arrays::{Patched, PrimitiveArray}; +use crate::dtype::{IntegerPType, NativePType}; +use crate::{ArrayRef, DynArray, IntoArray, match_each_native_ptype}; +use crate::{ExecutionCtx, match_each_unsigned_integer_ptype}; + +impl TakeExecute for Patched { + fn take( + array: &Self::Array, + indices: &ArrayRef, + ctx: &mut ExecutionCtx, + ) -> VortexResult> { + // Perform take on the inner array, including the placeholders. + let inner = array + .inner + .take(indices.clone())? + .execute::(ctx)?; + + let PrimitiveArrayParts { + buffer, + validity, + ptype, + } = inner.into_parts(); + + let indices_ptype = indices.dtype().as_ptype(); + + match_each_unsigned_integer_ptype!(indices_ptype, |I| { + match_each_native_ptype!(ptype, |V| { + let indices = indices.clone().execute::(ctx)?; + let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); + take_map( + output.as_mut(), + indices.as_slice::(), + array.offset, + array.len, + array.n_chunks, + array.n_lanes, + array.lane_offsets.as_host().reinterpret::(), + array.indices.as_host().reinterpret::(), + array.values.as_host().reinterpret::(), + ); + + // SAFETY: output and validity still have same length after take_map returns. + unsafe { + return Ok(Some( + PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), + )); + } + }) + }); + } +} + +/// Take patches for the given `indices` and apply them onto an `output` using a hash map. +/// +/// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect +/// the values. +fn take_map( + output: &mut [V], + indices: &[I], + offset: usize, + len: usize, + n_chunks: usize, + n_lanes: usize, + lane_offsets: &[u32], + patch_index: &[u16], + patch_value: &[V], +) { + // Build a hashmap of patch_index -> values. + let mut index_map = FxHashMap::with_capacity(indices.len()); + for chunk in 0..n_chunks { + for lane in 0..n_lanes { + let [lane_start, lane_end] = lane_offsets[chunk * n_lanes + lane..][..2]; + for i in lane_start..lane_end { + let patch_idx = patch_index[i as usize]; + let patch_value = patch_value[i as usize]; + + let index = chunk * 1024 + patch_idx as usize; + if index >= offset && index < offset + len { + index_map.insert(index, patch_value); + } + } + } + } + + // Now, iterate the take indices using the prebuilt hashmap. + // Undefined/null indices will miss the hash map, which we can ignore. + for index in indices { + let index = index.as_(); + if let Some(&patch_value) = index_map.get(&index) { + output[index] = patch_value; + } + } +} + +#[cfg(test)] +mod tests { + #[test] + fn test_take() { + // Patch some values here instead. + } +} From f6f3a0e6196e0c84e6c3b5f14de246fb02ceeb67 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 16:53:11 -0400 Subject: [PATCH 03/19] add unit tests Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 142 ++++++++++++++++-- 1 file changed, 131 insertions(+), 11 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 59a483c83f0..3db64313be4 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -5,12 +5,18 @@ use rustc_hash::FxHashMap; use vortex_buffer::Buffer; use vortex_error::VortexResult; +use crate::ArrayRef; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; use crate::arrays::dict::TakeExecute; use crate::arrays::primitive::PrimitiveArrayParts; -use crate::arrays::{Patched, PrimitiveArray}; -use crate::dtype::{IntegerPType, NativePType}; -use crate::{ArrayRef, DynArray, IntoArray, match_each_native_ptype}; -use crate::{ExecutionCtx, match_each_unsigned_integer_ptype}; +use crate::dtype::IntegerPType; +use crate::dtype::NativePType; +use crate::match_each_native_ptype; +use crate::match_each_unsigned_integer_ptype; impl TakeExecute for Patched { fn take( @@ -50,12 +56,12 @@ impl TakeExecute for Patched { // SAFETY: output and validity still have same length after take_map returns. unsafe { - return Ok(Some( + Ok(Some( PrimitiveArray::new_unchecked(output.freeze(), validity).into_array(), - )); + )) } }) - }); + }) } } @@ -63,6 +69,7 @@ impl TakeExecute for Patched { /// /// First, builds a hashmap from index to patch value, then uses the hashmap in a loop to collect /// the values. +#[allow(clippy::too_many_arguments)] fn take_map( output: &mut [V], indices: &[I], @@ -75,10 +82,11 @@ fn take_map( patch_value: &[V], ) { // Build a hashmap of patch_index -> values. - let mut index_map = FxHashMap::with_capacity(indices.len()); + let mut index_map = FxHashMap::with_capacity_and_hasher(indices.len(), Default::default()); for chunk in 0..n_chunks { for lane in 0..n_lanes { - let [lane_start, lane_end] = lane_offsets[chunk * n_lanes + lane..][..2]; + let lane_start = lane_offsets[chunk * n_lanes + lane]; + let lane_end = lane_offsets[chunk * n_lanes + lane + 1]; for i in lane_start..lane_end { let patch_idx = patch_index[i as usize]; let patch_value = patch_value[i as usize]; @@ -103,8 +111,120 @@ fn take_map( #[cfg(test)] mod tests { + use vortex_buffer::buffer; + use vortex_error::VortexResult; + use vortex_session::VortexSession; + + use crate::DynArray; + use crate::ExecutionCtx; + use crate::IntoArray; + use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::patches::Patches; + + fn make_patched_array( + base: &[u16], + patch_indices: &[u32], + patch_values: &[u16], + ) -> VortexResult { + let values = PrimitiveArray::from_iter(base.iter().copied()).into_array(); + let patches = Patches::new( + base.len(), + 0, + PrimitiveArray::from_iter(patch_indices.iter().copied()).into_array(), + PrimitiveArray::from_iter(patch_values.iter().copied()).into_array(), + None, + )?; + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + } + + #[test] + fn test_take_basic() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + + // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] + let indices = buffer![0u32, 1, 2, 3, 4].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_out_of_order() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at indices [1, 3] with values [10, 30] + let array = make_patched_array(&[0; 5], &[1, 3], &[10, 30])?.into_array(); + + // Take indices in reverse order + let indices = buffer![4u32, 3, 2, 1, 0].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + + #[test] + fn test_take_duplicates() -> VortexResult<()> { + // Array with base values [0, 0, 0, 0, 0] patched at index [2] with value [99] + let array = make_patched_array(&[0; 5], &[2], &[99])?.into_array(); + + // Take the same patched index multiple times + let indices = buffer![2u32, 2, 0, 2].into_array(); + let result = array.take(indices)?; + + let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); + assert_arrays_eq!(expected, result); + + Ok(()) + } + #[test] - fn test_take() { - // Patch some values here instead. + fn test_take_with_null_indices() -> VortexResult<()> { + use crate::arrays::BoolArray; + use crate::validity::Validity; + + // Array: 10 elements, base value 0, patches at indices 2, 5, 8 with values 20, 50, 80 + let array = make_patched_array(&[0; 10], &[2, 5, 8], &[20, 50, 80])?.into_array(); + + // Take 10 indices, with nulls at positions 1, 4, 7 + // Indices: [0, 2, 2, 5, 8, 0, 5, 8, 3, 1] + // Nulls: [ , , N, , , N, , , N, ] + // Position 2 (index=2, patched) is null + // Position 5 (index=0, unpatched) is null + // Position 8 (index=3, unpatched) is null + let indices = PrimitiveArray::new( + buffer![0u32, 2, 2, 5, 8, 0, 5, 8, 3, 1], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + let result = array.take(indices.into_array())?; + + // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] + let expected = PrimitiveArray::new( + buffer![0u16, 20, 0, 50, 80, 0, 50, 80, 0, 0], + Validity::Array( + BoolArray::from_iter([ + true, true, false, true, true, false, true, true, false, true, + ]) + .into_array(), + ), + ); + assert_arrays_eq!(expected.into_array(), result); + + Ok(()) } } From b4860b42ad6635fdafcd566912f2a126866e9071 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 15:17:07 -0400 Subject: [PATCH 04/19] final Signed-off-by: Andrew Duffy --- vortex-array/public-api.lock | 382 ++++++++++++++++++ vortex-array/src/arrays/patched/array.rs | 2 +- vortex-array/src/arrays/patched/vtable/mod.rs | 15 +- vortex-buffer/public-api.lock | 4 + 4 files changed, 397 insertions(+), 6 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 6eb168876d8..83b379b9deb 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -2044,6 +2044,10 @@ impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::dict::Dic pub fn vortex_array::arrays::dict::Dict::take(array: &vortex_array::arrays::dict::DictArray, indices: &vortex_array::ArrayRef, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub trait vortex_array::arrays::dict::TakeReduce: vortex_array::vtable::VTable pub fn vortex_array::arrays::dict::TakeReduce::take(array: &Self::Array, indices: &vortex_array::ArrayRef) -> vortex_error::VortexResult> @@ -2442,6 +2446,10 @@ impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::null:: pub fn vortex_array::arrays::null::Null::filter(_array: &vortex_array::arrays::null::NullArray, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + pub mod vortex_array::arrays::fixed_size_list pub struct vortex_array::arrays::fixed_size_list::FixedSizeList @@ -3234,6 +3242,176 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub mod vortex_array::arrays::patched + +pub struct vortex_array::arrays::patched::LanePatches<'a, V> + +pub vortex_array::arrays::patched::LanePatches::indices: &'a [u16] + +pub vortex_array::arrays::patched::LanePatches::values: &'a [V] + +impl<'a, V: core::marker::Copy> vortex_array::arrays::patched::LanePatches<'a, V> + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::is_empty(&self) -> bool + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::iter(&self) -> impl core::iter::traits::iterator::Iterator + +pub fn vortex_array::arrays::patched::LanePatches<'a, V>::len(&self) -> usize + +pub struct vortex_array::arrays::patched::PatchAccessor<'a, V> + +impl<'a, V: core::marker::Sized> vortex_array::arrays::patched::PatchAccessor<'a, V> + +pub fn vortex_array::arrays::patched::PatchAccessor<'a, V>::access(&'a self, chunk: usize, lane: usize) -> vortex_array::arrays::patched::LanePatches<'a, V> + +pub struct vortex_array::arrays::patched::Patched + +impl core::clone::Clone for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::clone(&self) -> vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + +pub struct vortex_array::arrays::patched::PatchedMetadata + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clone(&self) -> vortex_array::arrays::patched::PatchedMetadata + +impl core::default::Default for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::default() -> Self + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl prost::message::Message for vortex_array::arrays::patched::PatchedMetadata + +pub fn vortex_array::arrays::patched::PatchedMetadata::clear(&mut self) + +pub fn vortex_array::arrays::patched::PatchedMetadata::encoded_len(&self) -> usize + pub mod vortex_array::arrays::primitive #[repr(transparent)] pub struct vortex_array::arrays::primitive::NativeValue(pub T) @@ -4084,6 +4262,10 @@ impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::null::Nu pub fn vortex_array::arrays::null::Null::slice(_array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::slice::Slice pub fn vortex_array::arrays::slice::Slice::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> @@ -6932,6 +7114,134 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +pub struct vortex_array::arrays::Patched + +impl core::clone::Clone for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::clone(&self) -> vortex_array::arrays::patched::Patched + +impl core::fmt::Debug for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl vortex_array::arrays::dict::TakeExecute for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::take(array: &Self::Array, indices: &vortex_array::ArrayRef, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::arrays::filter::FilterReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::filter(array: &Self::Array, mask: &vortex_mask::Mask) -> vortex_error::VortexResult> + +impl vortex_array::arrays::slice::SliceReduce for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::slice(array: &Self::Array, range: core::ops::range::Range) -> vortex_error::VortexResult> + +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + +pub struct vortex_array::arrays::PatchedArray + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> + +pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +impl vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::to_array(&self) -> vortex_array::ArrayRef + +impl core::clone::Clone for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::clone(&self) -> vortex_array::arrays::patched::PatchedArray + +impl core::convert::AsRef for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::as_ref(&self) -> &dyn vortex_array::DynArray + +impl core::convert::From for vortex_array::ArrayRef + +pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::patched::PatchedArray) -> vortex_array::ArrayRef + +impl core::fmt::Debug for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result + +impl core::ops::deref::Deref for vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::PatchedArray::Target = dyn vortex_array::DynArray + +pub fn vortex_array::arrays::patched::PatchedArray::deref(&self) -> &Self::Target + +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + pub struct vortex_array::arrays::Primitive impl vortex_array::arrays::Primitive @@ -17262,6 +17572,10 @@ impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::array pub fn vortex_array::arrays::dict::Dict::compare(lhs: &vortex_array::arrays::dict::DictArray, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> +impl vortex_array::scalar_fn::fns::binary::CompareKernel for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: &vortex_array::ArrayRef, operator: vortex_array::scalar_fn::fns::operators::CompareOperator, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + pub fn vortex_array::scalar_fn::fns::binary::and_kleene(lhs: &vortex_array::ArrayRef, rhs: &vortex_array::ArrayRef) -> vortex_error::VortexResult pub fn vortex_array::scalar_fn::fns::binary::compare_nested_arrow_arrays(lhs: &dyn arrow_array::array::Array, rhs: &dyn arrow_array::array::Array, operator: vortex_array::scalar_fn::fns::operators::CompareOperator) -> vortex_error::VortexResult @@ -22064,6 +22378,10 @@ impl vortex_array::vtable::OperationsVTable fo pub fn vortex_array::arrays::null::Null::scalar_at(_array: &vortex_array::arrays::null::NullArray, _index: usize, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult +impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult + impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub fn vortex_array::arrays::scalar_fn::ScalarFnVTable::scalar_at(array: &vortex_array::arrays::scalar_fn::ScalarFnArray, index: usize, _ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult @@ -23140,6 +23458,62 @@ pub fn vortex_array::arrays::null::Null::vtable(_array: &Self::Array) -> &Self pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -23260,6 +23634,10 @@ impl vortex_array::vtable::ValidityChild for vo pub fn vortex_array::arrays::Extension::validity_child(array: &vortex_array::arrays::ExtensionArray) -> &vortex_array::ArrayRef +impl vortex_array::vtable::ValidityChild for vortex_array::arrays::patched::Patched + +pub fn vortex_array::arrays::patched::Patched::validity_child(array: &vortex_array::arrays::patched::PatchedArray) -> &vortex_array::ArrayRef + pub trait vortex_array::vtable::ValidityChildSliceHelper pub fn vortex_array::vtable::ValidityChildSliceHelper::sliced_child_array(&self) -> vortex_error::VortexResult @@ -24512,6 +24890,10 @@ impl vortex_array::IntoArray for vortex_array::arrays::null::NullArray pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array::ArrayRef +impl vortex_array::IntoArray for vortex_array::arrays::patched::PatchedArray + +pub fn vortex_array::arrays::patched::PatchedArray::into_array(self) -> vortex_array::ArrayRef + impl vortex_array::IntoArray for vortex_array::arrays::scalar_fn::ScalarFnArray pub fn vortex_array::arrays::scalar_fn::ScalarFnArray::into_array(self) -> vortex_array::ArrayRef diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 9f96f3c5c9f..8a6c3e9de17 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -216,7 +216,7 @@ fn transpose( lane_offsets[index] += lane_offsets[index - 1]; } - // Loop over patches, writing thme to final positions + // Loop over patches, writing them to final positions let indices_out = indices_buffer.spare_capacity_mut(); let values_out = values_buffer.spare_capacity_mut(); for (index, &value) in std::iter::zip(indices_in, values_in) { diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index bcc87d869ce..edfb01fc101 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -7,6 +7,7 @@ mod slice; use std::hash::Hash; use std::hash::Hasher; +use std::sync::Arc; use vortex_buffer::Buffer; use vortex_error::VortexResult; @@ -22,7 +23,7 @@ use crate::Canonical; use crate::DeserializeMetadata; use crate::DynArray; use crate::ExecutionCtx; -use crate::ExecutionStep; +use crate::ExecutionResult; use crate::IntoArray; use crate::Precision; use crate::ProstMetadata; @@ -47,7 +48,7 @@ use crate::vtable::ValidityVTableFromChild; vtable!(Patched); -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Patched; impl ValidityChild for Patched { @@ -68,7 +69,11 @@ impl VTable for Patched { type OperationsVTable = Self; type ValidityVTable = ValidityVTableFromChild; - fn id(_array: &Self::Array) -> ArrayId { + fn vtable(_array: &Self::Array) -> &Self { + &Patched + } + + fn id(&self) -> ArrayId { ArrayId::new_ref("vortex.patched") } @@ -210,7 +215,7 @@ impl VTable for Patched { Ok(()) } - fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { let inner = array .inner .clone() @@ -253,7 +258,7 @@ impl VTable for Patched { PrimitiveArray::from_byte_buffer(output.into_byte_buffer(), ptype, validity) }); - Ok(ExecutionStep::done(patched_values.into_array())) + Ok(ExecutionResult::done(patched_values.into_array())) } fn execute_parent( diff --git a/vortex-buffer/public-api.lock b/vortex-buffer/public-api.lock index 29b941c77d4..f3b52183e39 100644 --- a/vortex-buffer/public-api.lock +++ b/vortex-buffer/public-api.lock @@ -550,6 +550,10 @@ pub fn vortex_buffer::Buffer::from_arrow_buffer(arrow: arrow_buffer::buffer: pub fn vortex_buffer::Buffer::into_arrow_buffer(self) -> arrow_buffer::buffer::immutable::Buffer +impl vortex_buffer::Buffer + +pub fn vortex_buffer::Buffer::reinterpret(&self) -> &[V] + impl vortex_buffer::Buffer pub fn vortex_buffer::Buffer::from_arrow_scalar_buffer(arrow: arrow_buffer::buffer::scalar::ScalarBuffer) -> Self From 1a9d90e0651b665e364268db3c3be2d6fc40a9f1 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 17:36:19 -0400 Subject: [PATCH 05/19] actually make the kernel get used Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/kernels.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/vtable/kernels.rs b/vortex-array/src/arrays/patched/vtable/kernels.rs index 2c60ec4a20f..7994b19e02e 100644 --- a/vortex-array/src/arrays/patched/vtable/kernels.rs +++ b/vortex-array/src/arrays/patched/vtable/kernels.rs @@ -2,8 +2,11 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use crate::arrays::Patched; +use crate::arrays::dict::TakeExecuteAdaptor; use crate::kernel::ParentKernelSet; use crate::scalar_fn::fns::binary::CompareExecuteAdaptor; -pub(super) const PARENT_KERNELS: ParentKernelSet = - ParentKernelSet::new(&[ParentKernelSet::lift(&CompareExecuteAdaptor(Patched))]); +pub(super) const PARENT_KERNELS: ParentKernelSet = ParentKernelSet::new(&[ + ParentKernelSet::lift(&CompareExecuteAdaptor(Patched)), + ParentKernelSet::lift(&TakeExecuteAdaptor(Patched)), +]); From 5dcae7d01416d1e0c39ae55372f463ec339c7e84 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 21:14:21 -0400 Subject: [PATCH 06/19] fix tests Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/take.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 3db64313be4..92b81f81c02 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -101,10 +101,10 @@ fn take_map( // Now, iterate the take indices using the prebuilt hashmap. // Undefined/null indices will miss the hash map, which we can ignore. - for index in indices { + for (output_index, index) in indices.iter().enumerate() { let index = index.as_(); if let Some(&patch_value) = index_map.get(&index) { - output[index] = patch_value; + output[output_index] = patch_value; } } } @@ -150,7 +150,7 @@ mod tests { // Take indices [0, 1, 2, 3, 4] - should get [0, 10, 0, 30, 0] let indices = buffer![0u32, 1, 2, 3, 4].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); let expected = PrimitiveArray::from_iter([0u16, 10, 0, 30, 0]).into_array(); assert_arrays_eq!(expected, result); @@ -165,7 +165,7 @@ mod tests { // Take indices in reverse order let indices = buffer![4u32, 3, 2, 1, 0].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); let expected = PrimitiveArray::from_iter([0u16, 30, 0, 10, 0]).into_array(); assert_arrays_eq!(expected, result); @@ -180,7 +180,10 @@ mod tests { // Take the same patched index multiple times let indices = buffer![2u32, 2, 0, 2].into_array(); - let result = array.take(indices)?; + let result = array.take(indices)?.to_canonical()?.into_array(); + + // execute the array. + let _canonical = result.to_canonical()?.into_primitive(); let expected = PrimitiveArray::from_iter([99u16, 99, 0, 99]).into_array(); assert_arrays_eq!(expected, result); @@ -211,7 +214,10 @@ mod tests { .into_array(), ), ); - let result = array.take(indices.into_array())?; + let result = array + .take(indices.into_array())? + .to_canonical()? + .into_array(); // Expected: [0, 20, null, 50, 80, null, 50, 80, null, 0] let expected = PrimitiveArray::new( From b8431cb21041e7bec2a64afeb953f951c0edd94e Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 19 Mar 2026 11:28:29 -0400 Subject: [PATCH 07/19] use child for values instead of buffer Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 32 +++++++++---- .../src/arrays/patched/compute/compare.rs | 12 ++++- .../src/arrays/patched/compute/take.rs | 8 +++- vortex-array/src/arrays/patched/mod.rs | 29 +++++++---- vortex-array/src/arrays/patched/vtable/mod.rs | 48 ++++++++++--------- .../src/arrays/patched/vtable/operations.rs | 27 +++++------ .../src/arrays/patched/vtable/slice.rs | 2 +- 7 files changed, 99 insertions(+), 59 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 8a6c3e9de17..4bf26dafc17 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -12,17 +12,19 @@ use crate::ArrayRef; use crate::Canonical; use crate::DynArray; use crate::ExecutionCtx; +use crate::IntoArray; +use crate::arrays::PrimitiveArray; use crate::arrays::patched::PatchAccessor; use crate::arrays::patched::TransposedPatches; use crate::arrays::patched::patch_lanes; use crate::buffer::BufferHandle; use crate::dtype::IntegerPType; use crate::dtype::NativePType; -use crate::dtype::PType; use crate::match_each_native_ptype; use crate::match_each_unsigned_integer_ptype; use crate::patches::Patches; use crate::stats::ArrayStats; +use crate::validity::Validity; /// An array that partially "patches" another array with new values. /// @@ -50,14 +52,17 @@ pub struct PatchedArray { /// indices within a 1024-element chunk. The PType of these MUST be u16 pub(super) indices: BufferHandle, /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. - pub(super) values: BufferHandle, - /// PType of the scalars in `values`. Can be any native type. - pub(super) values_ptype: PType, + pub(super) values: ArrayRef, pub(super) stats_set: ArrayStats, } impl PatchedArray { + /// Create a new `PatchedArray` from a child array and a set of [`Patches`]. + /// + /// # Errors + /// + /// The `inner` array must be primitive type, and it must have the same DType as the patches. pub fn from_array_and_patches( inner: ArrayRef, patches: &Patches, @@ -68,6 +73,11 @@ impl PatchedArray { "array DType must match patches DType" ); + vortex_ensure!( + inner.dtype().is_primitive(), + "Creating PatchedArray from Patches only supported for primitive arrays" + ); + let values_ptype = patches.dtype().as_ptype(); let TransposedPatches { @@ -80,27 +90,32 @@ impl PatchedArray { let len = inner.len(); + let values = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(values), + values_ptype, + Validity::NonNullable, + ) + .into_array(); + Ok(Self { inner, n_chunks, n_lanes, - values_ptype, offset: 0, len, lane_offsets: BufferHandle::new_host(lane_offsets), indices: BufferHandle::new_host(indices), - values: BufferHandle::new_host(values), + values, stats_set: ArrayStats::default(), }) } /// Get an accessor, which allows ranged access to patches by chunk/lane. - pub fn accessor(&self) -> PatchAccessor<'_, V> { + pub fn accessor(&self) -> PatchAccessor<'_> { PatchAccessor { n_lanes: self.n_lanes, lane_offsets: self.lane_offsets.as_host().reinterpret::(), indices: self.indices.as_host().reinterpret::(), - values: self.values.as_host().reinterpret::(), } } @@ -133,7 +148,6 @@ impl PatchedArray { len, indices, values, - values_ptype: self.values_ptype, lane_offsets: sliced_lane_offsets, stats_set: ArrayStats::default(), }) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index d1932ed1e44..51eeb63a432 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -12,6 +12,7 @@ use crate::IntoArray; use crate::arrays::BoolArray; use crate::arrays::ConstantArray; use crate::arrays::Patched; +use crate::arrays::PrimitiveArray; use crate::arrays::bool::BoolArrayParts; use crate::arrays::patched::patch_lanes; use crate::arrays::primitive::NativeValue; @@ -28,6 +29,12 @@ impl CompareKernel for Patched { operator: CompareOperator, ctx: &mut ExecutionCtx, ) -> VortexResult> { + // We only accelerate comparisons for primitives + if !lhs.dtype().is_primitive() { + return Ok(None); + } + + // We only accelerate comparisons against constants let Some(constant) = rhs.as_constant() else { return Ok(None); }; @@ -87,9 +94,10 @@ impl CompareKernel for Patched { let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); let indices = lhs.indices.as_host().reinterpret::(); + let values = lhs.values.clone().execute::(ctx)?; - match_each_native_ptype!(lhs.values_ptype, |V| { - let values = lhs.values.as_host().reinterpret::(); + match_each_native_ptype!(values.ptype(), |V| { + let values = values.as_slice::(); let constant = constant .as_primitive() .as_::() diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index 92b81f81c02..eee1a4c898a 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -24,6 +24,11 @@ impl TakeExecute for Patched { indices: &ArrayRef, ctx: &mut ExecutionCtx, ) -> VortexResult> { + // Only pushdown take when we have primitive types. + if !array.dtype().is_primitive() { + return Ok(None); + } + // Perform take on the inner array, including the placeholders. let inner = array .inner @@ -41,6 +46,7 @@ impl TakeExecute for Patched { match_each_unsigned_integer_ptype!(indices_ptype, |I| { match_each_native_ptype!(ptype, |V| { let indices = indices.clone().execute::(ctx)?; + let values = array.values.clone().execute::(ctx)?; let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); take_map( output.as_mut(), @@ -51,7 +57,7 @@ impl TakeExecute for Patched { array.n_lanes, array.lane_offsets.as_host().reinterpret::(), array.indices.as_host().reinterpret::(), - array.values.as_host().reinterpret::(), + values.as_slice::(), ); // SAFETY: output and validity still have same length after take_map returns. diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index f035204c188..d37f15419b9 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -32,23 +32,34 @@ const fn patch_lanes() -> usize { if size_of::() < 8 { 32 } else { 16 } } -pub struct PatchAccessor<'a, V> { +pub struct PatchAccessor<'a> { n_lanes: usize, lane_offsets: &'a [u32], indices: &'a [u16], - values: &'a [V], } -impl<'a, V: Sized> PatchAccessor<'a, V> { - /// Access the patches for a particular lane - pub fn access(&'a self, chunk: usize, lane: usize) -> LanePatches<'a, V> { +pub struct PatchOffset { + /// Global offset into the list of patches. These are some of the + pub index: usize, + /// This is the value stored in the `indices` buffer, which encodes the offset of the `index`-th + /// patch + pub chunk_offset: u16, +} + +impl<'a> PatchAccessor<'a> { + /// Get an iterator over indices and values offsets. + /// + /// The first component is the index into the `indices` and `values`, and the second component + /// is the set of values instead here...I think? + pub fn offsets_iter( + &self, + chunk: usize, + lane: usize, + ) -> impl Iterator + '_ { let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; - LanePatches { - indices: &self.indices[start..stop], - values: &self.values[start..stop], - } + std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) } } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index edfb01fc101..3219e3733d1 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -91,7 +91,6 @@ impl VTable for Patched { fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { array.inner.array_hash(state, precision); - array.values_ptype.hash(state); array.n_chunks.hash(state); array.n_lanes.hash(state); array.lane_offsets.array_hash(state, precision); @@ -102,7 +101,6 @@ impl VTable for Patched { fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { array.n_chunks == other.n_chunks && array.n_lanes == other.n_lanes - && array.values_ptype == other.values_ptype && array.inner.array_eq(&other.inner, precision) && array.lane_offsets.array_eq(&other.lane_offsets, precision) && array.indices.array_eq(&other.indices, precision) @@ -117,7 +115,6 @@ impl VTable for Patched { match idx { 0 => array.lane_offsets.clone(), 1 => array.indices.clone(), - 2 => array.values.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -126,28 +123,27 @@ impl VTable for Patched { match idx { 0 => Some("lane_offsets".to_string()), 1 => Some("patch_indices".to_string()), - 2 => Some("patch_values".to_string()), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn nchildren(_array: &Self::Array) -> usize { - 1 + 2 } fn child(array: &Self::Array, idx: usize) -> ArrayRef { - if idx == 0 { - array.inner.clone() - } else { - vortex_panic!("invalid child index for PatchedArray: {idx}"); + match idx { + 0 => array.inner.clone(), + 1 => array.values.clone(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn child_name(_array: &Self::Array, idx: usize) -> String { - if idx == 0 { - "inner".to_string() - } else { - vortex_panic!("invalid child index for PatchedArray: {idx}"); + match idx { + 0 => "inner".to_string(), + 1 => "patch_values".to_string(), + _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -186,10 +182,14 @@ impl VTable for Patched { let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); - let &[lane_offsets, indices, values] = &buffers else { + let &[lane_offsets, indices] = &buffers else { vortex_bail!("invalid buffer count for PatchedArray"); }; + // values and indices should have same len. + let expected_len = indices.as_host().reinterpret::().len(); + let values = children.get(1, dtype, expected_len)?; + Ok(PatchedArray { inner, n_chunks, @@ -198,19 +198,19 @@ impl VTable for Patched { len, lane_offsets: lane_offsets.clone(), indices: indices.clone(), - values: values.clone(), - values_ptype: dtype.as_ptype(), + values, stats_set: ArrayStats::default(), }) } fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { vortex_ensure!( - children.len() == 1, - "PatchedArray must have exactly 1 child" + children.len() == 2, + "PatchedArray must have exactly 2 children" ); array.inner = children.remove(0); + array.values = children.remove(0); Ok(()) } @@ -231,15 +231,17 @@ impl VTable for Patched { let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let values = array.values.clone().execute::(ctx)?; + + // TODO(aduffy): add support for non-primitive PatchedArray patches application. - let patched_values = match_each_native_ptype!(array.values_ptype, |V| { + let patched_values = match_each_native_ptype!(values.ptype(), |V| { let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); - let values: Buffer = Buffer::from_byte_buffer(array.values.clone().unwrap_host()); let offset = array.offset; let len = array.len; - apply::( + apply_patches_primitive::( &mut output, offset, len, @@ -247,7 +249,7 @@ impl VTable for Patched { array.n_lanes, &lane_offsets, &indices, - &values, + values.as_slice::(), ); // The output will always be aligned to a chunk boundary, we apply the offset/len @@ -281,7 +283,7 @@ impl VTable for Patched { /// Apply patches on top of the existing value types. #[allow(clippy::too_many_arguments)] -fn apply( +fn apply_patches_primitive( output: &mut [V], offset: usize, len: usize, diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index ddf5dcec590..7f24e5fed9f 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -7,6 +7,7 @@ use crate::DynArray; use crate::arrays::patched::Patched; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::patch_lanes; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; @@ -17,21 +18,19 @@ impl OperationsVTable for Patched { let chunk = index / 1024; #[allow(clippy::cast_possible_truncation)] let chunk_index = (index % 1024) as u16; - match_each_native_ptype!(array.values_ptype, |V| { - let lane = index % patch_lanes::(); - let accessor = array.accessor::(); - let patches = accessor.access(chunk, lane); - // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely - // be slower. - for (patch_index, patch_value) in patches.iter() { - if patch_index == chunk_index { - return Ok(Scalar::primitive( - patch_value, - array.inner.dtype().nullability(), - )); - } + + let values_ptype = PType::try_from(array.dtype())?; + + let lane = match_each_native_ptype!(values_ptype, |V| { index % patch_lanes::() }); + let accessor = array.accessor(); + + // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely + // be slower. + for (index, patch_index) in accessor.offsets_iter(chunk, lane) { + if patch_index == chunk_index { + return array.values.scalar_at(index); } - }); + } // Otherwise, access the underlying value. array.inner.scalar_at(index) diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs index 99d04666d5f..b67b68ffe0f 100644 --- a/vortex-array/src/arrays/patched/vtable/slice.rs +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -45,7 +45,6 @@ impl SliceReduce for Patched { lane_offsets: sliced_lane_offsets, indices: array.indices.clone(), values: array.values.clone(), - values_ptype: array.values_ptype, stats_set: ArrayStats::default(), } .into_array(), @@ -93,6 +92,7 @@ mod tests { @r#" root: vortex.patched(u16, len=9) inner: vortex.primitive(u16, len=512) + patch_values: vortex.primitive(u16, len=3) "#); let executed = sliced.execute::(&mut ctx)?.into_primitive(); From e46c161a21417e68da335eb8ae7dc261cb6d8e96 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Mon, 23 Mar 2026 16:22:27 -0400 Subject: [PATCH 08/19] cast in scalar_at Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 2 -- vortex-array/src/arrays/patched/vtable/operations.rs | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 4bf26dafc17..ca6d7205515 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -27,8 +27,6 @@ use crate::stats::ArrayStats; use crate::validity::Validity; /// An array that partially "patches" another array with new values. -/// -/// Patched arrays implement the set of nodes that do this instead here...I think? #[derive(Debug, Clone)] pub struct PatchedArray { /// The inner array that is being patched. This is the zeroth child. diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 7f24e5fed9f..72e4186f111 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -28,7 +28,7 @@ impl OperationsVTable for Patched { // be slower. for (index, patch_index) in accessor.offsets_iter(chunk, lane) { if patch_index == chunk_index { - return array.values.scalar_at(index); + return array.values.scalar_at(index)?.cast(array.dtype()); } } From 33ecbeb1b5e188c49eaa57e9d2935c956e0e038d Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 14:57:51 -0400 Subject: [PATCH 09/19] implement append_to_builder for Patched We call append_to_builder on the inner first, then just do a single pass and overwrite what it just wrote. Signed-off-by: Andrew Duffy --- .../src/arrays/patched/compute/compare.rs | 3 +- vortex-array/src/arrays/patched/vtable/mod.rs | 141 ++++++++++++++++++ vortex-array/src/builders/primitive.rs | 5 + 3 files changed, 147 insertions(+), 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 51eeb63a432..7f7aa8541ff 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -167,8 +167,7 @@ impl CompareKernel for Patched { } }); - // SAFETY: thing - let result = unsafe { BoolArray::new_unchecked(bits.freeze(), validity) }; + let result = BoolArray::new(bits.freeze(), validity); Ok(Some(result.into_array())) } } diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index 3219e3733d1..a609c4cb8bc 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -10,6 +10,7 @@ use std::hash::Hasher; use std::sync::Arc; use vortex_buffer::Buffer; +use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_ensure; @@ -34,6 +35,8 @@ use crate::arrays::patched::patch_lanes; use crate::arrays::patched::vtable::kernels::PARENT_KERNELS; use crate::arrays::primitive::PrimitiveArrayParts; use crate::buffer::BufferHandle; +use crate::builders::ArrayBuilder; +use crate::builders::PrimitiveBuilder; use crate::dtype::DType; use crate::dtype::NativePType; use crate::match_each_native_ptype; @@ -169,6 +172,61 @@ impl VTable for Patched { Ok(ProstMetadata(inner)) } + fn append_to_builder( + array: &Self::Array, + builder: &mut dyn ArrayBuilder, + ctx: &mut ExecutionCtx, + ) -> VortexResult<()> { + let dtype = array.dtype(); + + if !dtype.is_primitive() { + // Default pathway: canonicalize and propagate. + let canonical = array + .clone() + .into_array() + .execute::(ctx)? + .into_array(); + builder.extend_from_array(&canonical); + return Ok(()); + } + + let ptype = dtype.as_ptype(); + + let len = array.len(); + array.inner.append_to_builder(builder, ctx)?; + + let offset = array.offset; + let lane_offsets: Buffer = + Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); + let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let values = array.values.clone().execute::(ctx)?; + + match_each_native_ptype!(ptype, |V| { + let typed_builder = builder + .as_any_mut() + .downcast_mut::>() + .vortex_expect("correctly typed builder"); + + // Overwrite the last `len` elements of the builder. These would have been + // populated by the inner.append_to_builder() call above. + let output = typed_builder.values_mut(); + let trailer = output.len() - len; + + apply_patches_primitive::( + &mut output[trailer..], + offset, + len, + array.n_chunks, + array.n_lanes, + &lane_offsets, + &indices, + values.as_slice::(), + ); + }); + + Ok(()) + } + fn build( dtype: &DType, len: usize, @@ -320,9 +378,13 @@ mod tests { use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::PatchedArray; + use crate::arrays::PrimitiveArray; + use crate::assert_arrays_eq; + use crate::builders::builder_with_capacity; use crate::dtype::Nullability; use crate::patches::Patches; use crate::scalar::Scalar; + use crate::validity::Validity; #[test] fn test_execute() { @@ -393,4 +455,83 @@ mod tests { Scalar::primitive(1u16, Nullability::NonNullable) ); } + + #[test] + fn test_append_to_builder_non_nullable() { + let values = PrimitiveArray::new(buffer![0u16; 1024], Validity::NonNullable).into_array(); + let patches = Patches::new( + 1024, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + let mut expected = buffer_mut![0u16; 1024]; + expected[1] = 10; + expected[2] = 20; + expected[3] = 30; + let expected = expected.into_array(); + + assert_arrays_eq!(expected, result); + } + + #[test] + fn test_append_to_builder_with_validity() { + // Create inner array with nulls at indices 0 and 5. + let validity = Validity::from_iter((0..10).map(|i| i != 0 && i != 5)); + let values = PrimitiveArray::new(buffer![0u16; 10], validity).into_array(); + + // Apply patches at indices 1, 2, 3. + let patches = Patches::new( + 10, + 0, + buffer![1u32, 2, 3].into_array(), + buffer![10u16, 20, 30].into_array(), + None, + ) + .unwrap(); + + let session = VortexSession::empty(); + let mut ctx = ExecutionCtx::new(session); + + let array = PatchedArray::from_array_and_patches(values, &patches, &mut ctx) + .unwrap() + .into_array(); + + let mut builder = builder_with_capacity(array.dtype(), array.len()); + array.append_to_builder(builder.as_mut(), &mut ctx).unwrap(); + + let result = builder.finish(); + + // Expected: null at 0, patched 10/20/30 at 1/2/3, zero at 4, null at 5, zeros at 6-9. + let expected = PrimitiveArray::from_option_iter([ + None, + Some(10u16), + Some(20), + Some(30), + Some(0), + None, + Some(0), + Some(0), + Some(0), + Some(0), + ]) + .into_array(); + + assert_arrays_eq!(expected, result); + } } diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 4070a22e70d..f486cadc83f 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -62,6 +62,11 @@ impl PrimitiveBuilder { self.values.as_ref() } + /// Returns the raw primitive values in this builder as a mutable slice. + pub fn values_mut(&mut self) -> &mut [T] { + self.values.as_mut() + } + /// Create a new handle to the next `len` uninitialized values in the builder. /// /// All reads/writes through the handle to the values buffer or the validity buffer will operate From 60bf2528a5080d85c39b1f110cf063d5603abbf5 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 25 Mar 2026 16:06:38 -0400 Subject: [PATCH 10/19] cleanup Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 4 +-- vortex-array/src/arrays/patched/mod.rs | 35 ++---------------------- 2 files changed, 5 insertions(+), 34 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index ca6d7205515..6384d10e13c 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -86,8 +86,6 @@ impl PatchedArray { values, } = transpose_patches(patches, ctx)?; - let len = inner.len(); - let values = PrimitiveArray::from_buffer_handle( BufferHandle::new_host(values), values_ptype, @@ -95,6 +93,8 @@ impl PatchedArray { ) .into_array(); + let len = inner.len(); + Ok(Self { inner, n_chunks, diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index d37f15419b9..654254d9c93 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -32,25 +32,18 @@ const fn patch_lanes() -> usize { if size_of::() < 8 { 32 } else { 16 } } +/// A cached accessor to the patch values. pub struct PatchAccessor<'a> { n_lanes: usize, lane_offsets: &'a [u32], indices: &'a [u16], } -pub struct PatchOffset { - /// Global offset into the list of patches. These are some of the - pub index: usize, - /// This is the value stored in the `indices` buffer, which encodes the offset of the `index`-th - /// patch - pub chunk_offset: u16, -} - impl<'a> PatchAccessor<'a> { /// Get an iterator over indices and values offsets. /// - /// The first component is the index into the `indices` and `values`, and the second component - /// is the set of values instead here...I think? + /// The first component is the index into the `indices` and `values`, and the second is + /// the datum from `indices[index]` already prefetched. pub fn offsets_iter( &self, chunk: usize, @@ -62,25 +55,3 @@ impl<'a> PatchAccessor<'a> { std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) } } - -pub struct LanePatches<'a, V> { - pub indices: &'a [u16], - pub values: &'a [V], -} - -impl<'a, V: Copy> LanePatches<'a, V> { - pub fn len(&self) -> usize { - self.indices.len() - } - - pub fn is_empty(&self) -> bool { - self.indices.is_empty() - } - - pub fn iter(&self) -> impl Iterator { - self.indices - .iter() - .copied() - .zip(self.values.iter().copied()) - } -} From 862d8143ea464a311c2ba42693db77911c23fcbd Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 26 Mar 2026 15:49:17 -0400 Subject: [PATCH 11/19] indices as child instead of buffer Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/array.rs | 31 +++++++++----- .../src/arrays/patched/compute/compare.rs | 3 +- .../src/arrays/patched/compute/take.rs | 7 ++-- vortex-array/src/arrays/patched/mod.rs | 24 ----------- vortex-array/src/arrays/patched/vtable/mod.rs | 41 ++++++++++--------- .../src/arrays/patched/vtable/operations.rs | 22 ++++++++-- .../src/arrays/patched/vtable/slice.rs | 1 + 7 files changed, 68 insertions(+), 61 deletions(-) diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 6384d10e13c..5a2aa9fa03e 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -14,12 +14,12 @@ use crate::DynArray; use crate::ExecutionCtx; use crate::IntoArray; use crate::arrays::PrimitiveArray; -use crate::arrays::patched::PatchAccessor; use crate::arrays::patched::TransposedPatches; use crate::arrays::patched::patch_lanes; use crate::buffer::BufferHandle; use crate::dtype::IntegerPType; use crate::dtype::NativePType; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::match_each_unsigned_integer_ptype; use crate::patches::Patches; @@ -48,7 +48,7 @@ pub struct PatchedArray { /// lane offsets. The PType of these MUST be u32 pub(super) lane_offsets: BufferHandle, /// indices within a 1024-element chunk. The PType of these MUST be u16 - pub(super) indices: BufferHandle, + pub(super) indices: ArrayRef, /// patch values corresponding to the indices. The ptype is specified by `values_ptype`. pub(super) values: ArrayRef, @@ -86,6 +86,12 @@ impl PatchedArray { values, } = transpose_patches(patches, ctx)?; + let indices = PrimitiveArray::from_buffer_handle( + BufferHandle::new_host(indices), + PType::U16, + Validity::NonNullable, + ) + .into_array(); let values = PrimitiveArray::from_buffer_handle( BufferHandle::new_host(values), values_ptype, @@ -102,19 +108,24 @@ impl PatchedArray { offset: 0, len, lane_offsets: BufferHandle::new_host(lane_offsets), - indices: BufferHandle::new_host(indices), + indices, values, stats_set: ArrayStats::default(), }) } +} - /// Get an accessor, which allows ranged access to patches by chunk/lane. - pub fn accessor(&self) -> PatchAccessor<'_> { - PatchAccessor { - n_lanes: self.n_lanes, - lane_offsets: self.lane_offsets.as_host().reinterpret::(), - indices: self.indices.as_host().reinterpret::(), - } +impl PatchedArray { + pub(crate) fn seek_to_lane(&self, chunk: usize, lane: usize) -> Range { + assert!(chunk < self.n_chunks); + assert!(lane < self.n_lanes); + + let lane_offsets = self.lane_offsets.as_host().reinterpret::(); + + let start = lane_offsets[chunk * self.n_lanes + lane] as usize; + let stop = lane_offsets[chunk * self.n_lanes + lane + 1] as usize; + + start..stop } /// Slice the array to just the patches and inner values that are within the chunk range. diff --git a/vortex-array/src/arrays/patched/compute/compare.rs b/vortex-array/src/arrays/patched/compute/compare.rs index 7f7aa8541ff..9b9f8f938f6 100644 --- a/vortex-array/src/arrays/patched/compute/compare.rs +++ b/vortex-array/src/arrays/patched/compute/compare.rs @@ -93,10 +93,11 @@ impl CompareKernel for Patched { } let lane_offsets = lhs.lane_offsets.as_host().reinterpret::(); - let indices = lhs.indices.as_host().reinterpret::(); + let indices = lhs.indices.clone().execute::(ctx)?; let values = lhs.values.clone().execute::(ctx)?; match_each_native_ptype!(values.ptype(), |V| { + let indices = indices.as_slice::(); let values = values.as_slice::(); let constant = constant .as_primitive() diff --git a/vortex-array/src/arrays/patched/compute/take.rs b/vortex-array/src/arrays/patched/compute/take.rs index eee1a4c898a..a40881ab843 100644 --- a/vortex-array/src/arrays/patched/compute/take.rs +++ b/vortex-array/src/arrays/patched/compute/take.rs @@ -46,7 +46,8 @@ impl TakeExecute for Patched { match_each_unsigned_integer_ptype!(indices_ptype, |I| { match_each_native_ptype!(ptype, |V| { let indices = indices.clone().execute::(ctx)?; - let values = array.values.clone().execute::(ctx)?; + let patch_indices = array.indices.clone().execute::(ctx)?; + let patch_values = array.values.clone().execute::(ctx)?; let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); take_map( output.as_mut(), @@ -56,8 +57,8 @@ impl TakeExecute for Patched { array.n_chunks, array.n_lanes, array.lane_offsets.as_host().reinterpret::(), - array.indices.as_host().reinterpret::(), - values.as_slice::(), + patch_indices.as_slice::(), + patch_values.as_slice::(), ); // SAFETY: output and validity still have same length after take_map returns. diff --git a/vortex-array/src/arrays/patched/mod.rs b/vortex-array/src/arrays/patched/mod.rs index 654254d9c93..32edda880c8 100644 --- a/vortex-array/src/arrays/patched/mod.rs +++ b/vortex-array/src/arrays/patched/mod.rs @@ -31,27 +31,3 @@ const fn patch_lanes() -> usize { // from shared to global memory. if size_of::() < 8 { 32 } else { 16 } } - -/// A cached accessor to the patch values. -pub struct PatchAccessor<'a> { - n_lanes: usize, - lane_offsets: &'a [u32], - indices: &'a [u16], -} - -impl<'a> PatchAccessor<'a> { - /// Get an iterator over indices and values offsets. - /// - /// The first component is the index into the `indices` and `values`, and the second is - /// the datum from `indices[index]` already prefetched. - pub fn offsets_iter( - &self, - chunk: usize, - lane: usize, - ) -> impl Iterator + '_ { - let start = self.lane_offsets[chunk * self.n_lanes + lane] as usize; - let stop = self.lane_offsets[chunk * self.n_lanes + lane + 1] as usize; - - std::iter::zip(start..stop, self.indices[start..stop].iter().copied()) - } -} diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index a609c4cb8bc..d21e0ed889b 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -39,6 +39,7 @@ use crate::builders::ArrayBuilder; use crate::builders::PrimitiveBuilder; use crate::dtype::DType; use crate::dtype::NativePType; +use crate::dtype::PType; use crate::match_each_native_ptype; use crate::serde::ArrayChildren; use crate::stats::ArrayStats; @@ -64,6 +65,8 @@ impl ValidityChild for Patched { pub struct PatchedMetadata { #[prost(uint32, tag = "1")] pub(crate) offset: u32, + #[prost(uint32, tag = "2")] + pub(crate) n_patches: u32, } impl VTable for Patched { @@ -111,13 +114,12 @@ impl VTable for Patched { } fn nbuffers(_array: &Self::Array) -> usize { - 3 + 1 } fn buffer(array: &Self::Array, idx: usize) -> BufferHandle { match idx { 0 => array.lane_offsets.clone(), - 1 => array.indices.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -125,19 +127,19 @@ impl VTable for Patched { fn buffer_name(_array: &Self::Array, idx: usize) -> Option { match idx { 0 => Some("lane_offsets".to_string()), - 1 => Some("patch_indices".to_string()), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } fn nchildren(_array: &Self::Array) -> usize { - 2 + 3 } fn child(array: &Self::Array, idx: usize) -> ArrayRef { match idx { 0 => array.inner.clone(), - 1 => array.values.clone(), + 1 => array.indices.clone(), + 2 => array.values.clone(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -145,7 +147,8 @@ impl VTable for Patched { fn child_name(_array: &Self::Array, idx: usize) -> String { match idx { 0 => "inner".to_string(), - 1 => "patch_values".to_string(), + 1 => "patch_indices".to_string(), + 2 => "patch_values".to_string(), _ => vortex_panic!("invalid buffer index for PatchedArray: {idx}"), } } @@ -154,6 +157,7 @@ impl VTable for Patched { fn metadata(array: &Self::Array) -> VortexResult { Ok(ProstMetadata(PatchedMetadata { offset: array.offset as u32, + n_patches: array.indices.len() as u32, })) } @@ -198,7 +202,7 @@ impl VTable for Patched { let offset = array.offset; let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); - let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); + let indices = array.indices.clone().execute::(ctx)?; let values = array.values.clone().execute::(ctx)?; match_each_native_ptype!(ptype, |V| { @@ -219,7 +223,7 @@ impl VTable for Patched { array.n_chunks, array.n_lanes, &lane_offsets, - &indices, + indices.as_slice::(), values.as_slice::(), ); }); @@ -234,19 +238,16 @@ impl VTable for Patched { buffers: &[BufferHandle], children: &dyn ArrayChildren, ) -> VortexResult { - let inner = children.get(0, dtype, len)?; - let n_chunks = len.div_ceil(1024); - let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); - let &[lane_offsets, indices] = &buffers else { + let &[lane_offsets] = &buffers else { vortex_bail!("invalid buffer count for PatchedArray"); }; - // values and indices should have same len. - let expected_len = indices.as_host().reinterpret::().len(); - let values = children.get(1, dtype, expected_len)?; + let inner = children.get(0, dtype, len)?; + let indices = children.get(1, PType::U16.into(), metadata.n_patches as usize)?; + let values = children.get(1, dtype, metadata.n_patches as usize)?; Ok(PatchedArray { inner, @@ -255,7 +256,7 @@ impl VTable for Patched { offset: metadata.offset as usize, len, lane_offsets: lane_offsets.clone(), - indices: indices.clone(), + indices, values, stats_set: ArrayStats::default(), }) @@ -288,10 +289,10 @@ impl VTable for Patched { let lane_offsets: Buffer = Buffer::from_byte_buffer(array.lane_offsets.clone().unwrap_host()); - let indices: Buffer = Buffer::from_byte_buffer(array.indices.clone().unwrap_host()); - let values = array.values.clone().execute::(ctx)?; + let indices = array.indices.clone().execute::(ctx)?; - // TODO(aduffy): add support for non-primitive PatchedArray patches application. + // TODO(aduffy): add support for non-primitive PatchedArray patches application (?) + let values = array.values.clone().execute::(ctx)?; let patched_values = match_each_native_ptype!(values.ptype(), |V| { let mut output = Buffer::::from_byte_buffer(buffer.unwrap_host()).into_mut(); @@ -306,7 +307,7 @@ impl VTable for Patched { array.n_chunks, array.n_lanes, &lane_offsets, - &indices, + indices.as_slice::(), values.as_slice::(), ); diff --git a/vortex-array/src/arrays/patched/vtable/operations.rs b/vortex-array/src/arrays/patched/vtable/operations.rs index 72e4186f111..23b46867fe5 100644 --- a/vortex-array/src/arrays/patched/vtable/operations.rs +++ b/vortex-array/src/arrays/patched/vtable/operations.rs @@ -4,16 +4,23 @@ use vortex_error::VortexResult; use crate::DynArray; +use crate::ExecutionCtx; +use crate::arrays::PrimitiveArray; use crate::arrays::patched::Patched; use crate::arrays::patched::PatchedArray; use crate::arrays::patched::patch_lanes; use crate::dtype::PType; use crate::match_each_native_ptype; +use crate::optimizer::ArrayOptimizer; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; impl OperationsVTable for Patched { - fn scalar_at(array: &PatchedArray, index: usize) -> VortexResult { + fn scalar_at( + array: &PatchedArray, + index: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult { // First check the patches let chunk = index / 1024; #[allow(clippy::cast_possible_truncation)] @@ -22,11 +29,20 @@ impl OperationsVTable for Patched { let values_ptype = PType::try_from(array.dtype())?; let lane = match_each_native_ptype!(values_ptype, |V| { index % patch_lanes::() }); - let accessor = array.accessor(); + + let range = array.seek_to_lane(chunk, lane); + + // Get the range of indices corresponding to the lane, potentially decoding them to avoid + // the overhead of repeated scalar_at calls. + let patch_indices = array + .indices + .slice(range.clone())? + .optimize()? + .execute::(ctx)?; // NOTE: we do linear scan as lane has <= 32 patches, binary search would likely // be slower. - for (index, patch_index) in accessor.offsets_iter(chunk, lane) { + for (&patch_index, index) in std::iter::zip(patch_indices.as_slice::(), range) { if patch_index == chunk_index { return array.values.scalar_at(index)?.cast(array.dtype()); } diff --git a/vortex-array/src/arrays/patched/vtable/slice.rs b/vortex-array/src/arrays/patched/vtable/slice.rs index b67b68ffe0f..98be8f0264e 100644 --- a/vortex-array/src/arrays/patched/vtable/slice.rs +++ b/vortex-array/src/arrays/patched/vtable/slice.rs @@ -92,6 +92,7 @@ mod tests { @r#" root: vortex.patched(u16, len=9) inner: vortex.primitive(u16, len=512) + patch_indices: vortex.primitive(u16, len=3) patch_values: vortex.primitive(u16, len=3) "#); From e372d988428303bb649aa287a9e2888676109945 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Thu, 26 Mar 2026 15:58:39 -0400 Subject: [PATCH 12/19] lockfiles Signed-off-by: Andrew Duffy --- vortex-array/public-api.lock | 88 +++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 27 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index 83b379b9deb..c1db8298260 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -3244,26 +3244,6 @@ pub fn vortex_array::arrays::null::NullArray::into_array(self) -> vortex_array:: pub mod vortex_array::arrays::patched -pub struct vortex_array::arrays::patched::LanePatches<'a, V> - -pub vortex_array::arrays::patched::LanePatches::indices: &'a [u16] - -pub vortex_array::arrays::patched::LanePatches::values: &'a [V] - -impl<'a, V: core::marker::Copy> vortex_array::arrays::patched::LanePatches<'a, V> - -pub fn vortex_array::arrays::patched::LanePatches<'a, V>::is_empty(&self) -> bool - -pub fn vortex_array::arrays::patched::LanePatches<'a, V>::iter(&self) -> impl core::iter::traits::iterator::Iterator - -pub fn vortex_array::arrays::patched::LanePatches<'a, V>::len(&self) -> usize - -pub struct vortex_array::arrays::patched::PatchAccessor<'a, V> - -impl<'a, V: core::marker::Sized> vortex_array::arrays::patched::PatchAccessor<'a, V> - -pub fn vortex_array::arrays::patched::PatchAccessor<'a, V>::access(&'a self, chunk: usize, lane: usize) -> vortex_array::arrays::patched::LanePatches<'a, V> - pub struct vortex_array::arrays::patched::Patched impl core::clone::Clone for vortex_array::arrays::patched::Patched @@ -3292,7 +3272,7 @@ pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: & impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched -pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched @@ -3358,8 +3338,6 @@ pub struct vortex_array::arrays::patched::PatchedArray impl vortex_array::arrays::patched::PatchedArray -pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> - pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::arrays::patched::PatchedArray @@ -7142,7 +7120,7 @@ pub fn vortex_array::arrays::patched::Patched::compare(lhs: &Self::Array, rhs: & impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched -pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched @@ -7208,8 +7186,6 @@ pub struct vortex_array::arrays::PatchedArray impl vortex_array::arrays::patched::PatchedArray -pub fn vortex_array::arrays::patched::PatchedArray::accessor(&self) -> vortex_array::arrays::patched::PatchAccessor<'_, V> - pub fn vortex_array::arrays::patched::PatchedArray::from_array_and_patches(inner: vortex_array::ArrayRef, patches: &vortex_array::patches::Patches, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::arrays::patched::PatchedArray @@ -9450,6 +9426,8 @@ pub fn vortex_array::builders::PrimitiveBuilder::uninit_range(&mut self, len: pub fn vortex_array::builders::PrimitiveBuilder::values(&self) -> &[T] +pub fn vortex_array::builders::PrimitiveBuilder::values_mut(&mut self) -> &mut [T] + pub fn vortex_array::builders::PrimitiveBuilder::with_capacity(nullability: vortex_array::dtype::Nullability, capacity: usize) -> Self impl vortex_array::builders::ArrayBuilder for vortex_array::builders::PrimitiveBuilder @@ -22158,6 +22136,62 @@ pub fn vortex_array::arrays::null::Null::vtable(_array: &Self::Array) -> &Self pub fn vortex_array::arrays::null::Null::with_children(_array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> +impl vortex_array::vtable::VTable for vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::Array = vortex_array::arrays::patched::PatchedArray + +pub type vortex_array::arrays::patched::Patched::Metadata = vortex_array::ProstMetadata + +pub type vortex_array::arrays::patched::Patched::OperationsVTable = vortex_array::arrays::patched::Patched + +pub type vortex_array::arrays::patched::Patched::ValidityVTable = vortex_array::vtable::ValidityVTableFromChild + +pub fn vortex_array::arrays::patched::Patched::append_to_builder(array: &Self::Array, builder: &mut dyn vortex_array::builders::ArrayBuilder, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult<()> + +pub fn vortex_array::arrays::patched::Patched::array_eq(array: &Self::Array, other: &Self::Array, precision: vortex_array::Precision) -> bool + +pub fn vortex_array::arrays::patched::Patched::array_hash(array: &Self::Array, state: &mut H, precision: vortex_array::Precision) + +pub fn vortex_array::arrays::patched::Patched::buffer(array: &Self::Array, idx: usize) -> vortex_array::buffer::BufferHandle + +pub fn vortex_array::arrays::patched::Patched::buffer_name(_array: &Self::Array, idx: usize) -> core::option::Option + +pub fn vortex_array::arrays::patched::Patched::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::child(array: &Self::Array, idx: usize) -> vortex_array::ArrayRef + +pub fn vortex_array::arrays::patched::Patched::child_name(_array: &Self::Array, idx: usize) -> alloc::string::String + +pub fn vortex_array::arrays::patched::Patched::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::dtype(array: &Self::Array) -> &vortex_array::dtype::DType + +pub fn vortex_array::arrays::patched::Patched::execute(array: alloc::sync::Arc, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::execute_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::id(&self) -> vortex_array::vtable::ArrayId + +pub fn vortex_array::arrays::patched::Patched::len(array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::metadata(array: &Self::Array) -> vortex_error::VortexResult + +pub fn vortex_array::arrays::patched::Patched::nbuffers(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::nchildren(_array: &Self::Array) -> usize + +pub fn vortex_array::arrays::patched::Patched::reduce(array: &Self::Array) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::reduce_parent(array: &Self::Array, parent: &vortex_array::ArrayRef, child_idx: usize) -> vortex_error::VortexResult> + +pub fn vortex_array::arrays::patched::Patched::serialize(_metadata: Self::Metadata) -> vortex_error::VortexResult>> + +pub fn vortex_array::arrays::patched::Patched::stats(array: &Self::Array) -> vortex_array::stats::StatsSetRef<'_> + +pub fn vortex_array::arrays::patched::Patched::vtable(_array: &Self::Array) -> &Self + +pub fn vortex_array::arrays::patched::Patched::with_children(array: &mut Self::Array, children: alloc::vec::Vec) -> vortex_error::VortexResult<()> + impl vortex_array::vtable::VTable for vortex_array::arrays::scalar_fn::ScalarFnVTable pub type vortex_array::arrays::scalar_fn::ScalarFnVTable::Array = vortex_array::arrays::scalar_fn::ScalarFnArray @@ -22380,7 +22414,7 @@ pub fn vortex_array::arrays::null::Null::scalar_at(_array: &vortex_array::arrays impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::patched::Patched -pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize) -> vortex_error::VortexResult +pub fn vortex_array::arrays::patched::Patched::scalar_at(array: &vortex_array::arrays::patched::PatchedArray, index: usize, ctx: &mut vortex_array::ExecutionCtx) -> vortex_error::VortexResult impl vortex_array::vtable::OperationsVTable for vortex_array::arrays::scalar_fn::ScalarFnVTable From 05c67441b57911f018766c3df125bdbf6224a37a Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Wed, 18 Mar 2026 17:21:22 -0400 Subject: [PATCH 13/19] final Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index d21e0ed889b..008619a23c0 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -216,6 +216,8 @@ impl VTable for Patched { let output = typed_builder.values_mut(); let trailer = output.len() - len; + let trailer = values.len() - len; + apply_patches_primitive::( &mut output[trailer..], offset, From f6375f83e824ca31100c124e4bec1c4cbf0fef36 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Mon, 23 Mar 2026 16:39:01 -0400 Subject: [PATCH 14/19] add a LazyPatchedArray this lets us deserialize BPArray with Patches without eagerly transposing Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/lazy_patched/mod.rs | 6 + .../src/arrays/lazy_patched/vtable/mod.rs | 195 ++++++++++++++++++ .../arrays/lazy_patched/vtable/operations.rs | 20 ++ .../arrays/lazy_patched/vtable/validity.rs | 13 ++ vortex-array/src/arrays/mod.rs | 1 + 5 files changed, 235 insertions(+) create mode 100644 vortex-array/src/arrays/lazy_patched/mod.rs create mode 100644 vortex-array/src/arrays/lazy_patched/vtable/mod.rs create mode 100644 vortex-array/src/arrays/lazy_patched/vtable/operations.rs create mode 100644 vortex-array/src/arrays/lazy_patched/vtable/validity.rs diff --git a/vortex-array/src/arrays/lazy_patched/mod.rs b/vortex-array/src/arrays/lazy_patched/mod.rs new file mode 100644 index 00000000000..7f2d1d29cf2 --- /dev/null +++ b/vortex-array/src/arrays/lazy_patched/mod.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod vtable; + +pub use vtable::*; diff --git a/vortex-array/src/arrays/lazy_patched/vtable/mod.rs b/vortex-array/src/arrays/lazy_patched/vtable/mod.rs new file mode 100644 index 00000000000..330100cdf2f --- /dev/null +++ b/vortex-array/src/arrays/lazy_patched/vtable/mod.rs @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +mod operations; +mod validity; + +use std::hash::Hasher; + +use vortex_error::VortexResult; +use vortex_error::vortex_ensure; +use vortex_error::vortex_ensure_eq; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; + +use crate::ArrayEq; +use crate::ArrayHash; +use crate::ArrayRef; +use crate::DeserializeMetadata; +use crate::DynArray; +use crate::ExecutionCtx; +use crate::ExecutionResult; +use crate::IntoArray; +use crate::Precision; +use crate::ProstMetadata; +use crate::SerializeMetadata; +use crate::arrays::PatchedArray; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::patches::Patches; +use crate::serde::ArrayChildren; +use crate::stats::StatsSetRef; +use crate::vtable; +use crate::vtable::ArrayId; +use crate::vtable::VTable; +use crate::vtable::ValidityVTableFromChild; + +#[derive(Clone, Debug)] +pub struct LazyPatched; + +vtable!(LazyPatched); + +#[derive(Clone, prost::Message)] +pub struct LazyPatchedMetadata { + #[prost(uint32, tag = "1")] + pub(crate) num_patches: u32, +} + +impl VTable for LazyPatched { + type Array = LazyPatchedArray; + type Metadata = ProstMetadata; + + type OperationsVTable = Self; + type ValidityVTable = ValidityVTableFromChild; + + fn vtable(_array: &Self::Array) -> &Self { + &LazyPatched + } + + fn id(&self) -> ArrayId { + ArrayId::new_ref("vortex.patched_lazy") + } + + fn len(array: &Self::Array) -> usize { + array.inner.len() + } + + fn dtype(array: &Self::Array) -> &DType { + array.inner.dtype() + } + + fn stats(_array: &Self::Array) -> StatsSetRef<'_> { + todo!() + } + + fn array_hash(array: &Self::Array, state: &mut H, precision: Precision) { + array.inner.array_hash(state, precision); + array.patches.array_hash(state, precision); + } + + fn array_eq(array: &Self::Array, other: &Self::Array, precision: Precision) -> bool { + array.inner.array_eq(&other.inner, precision) + && array.patches.array_eq(&other.patches, precision) + } + + fn nbuffers(_array: &Self::Array) -> usize { + 0 + } + + fn buffer(_array: &Self::Array, _idx: usize) -> BufferHandle { + vortex_panic!("LazyPatched array holds no buffers") + } + + fn buffer_name(_array: &Self::Array, _idx: usize) -> Option { + vortex_panic!("LazyPatched array holds no buffers") + } + + fn nchildren(_array: &Self::Array) -> usize { + 3 + } + + fn child(array: &Self::Array, idx: usize) -> ArrayRef { + match idx { + 0 => array.inner.clone(), + 1 => array.patches.indices().clone(), + 2 => array.patches.values().clone(), + _ => unreachable!("invalid LazyPatched child index {}", idx), + } + } + + fn child_name(_array: &Self::Array, idx: usize) -> String { + match idx { + 0 => "inner".to_string(), + 1 => "patch_indices".to_string(), + 2 => "patch_values".to_string(), + _ => unreachable!("invalid LazyPatched child index {}", idx), + } + } + + fn metadata(array: &Self::Array) -> VortexResult { + let num_patches = u32::try_from(array.patches.num_patches())?; + + Ok(ProstMetadata(LazyPatchedMetadata { num_patches })) + } + + fn serialize(metadata: Self::Metadata) -> VortexResult>> { + Ok(Some(metadata.serialize())) + } + + fn deserialize( + bytes: &[u8], + _dtype: &DType, + _len: usize, + _buffers: &[BufferHandle], + _session: &VortexSession, + ) -> VortexResult { + let deserialized = ::deserialize(bytes)?; + Ok(ProstMetadata(deserialized)) + } + + fn build( + dtype: &DType, + len: usize, + metadata: &Self::Metadata, + _buffers: &[BufferHandle], + children: &dyn ArrayChildren, + ) -> VortexResult { + // There should be 3 children + // 1. inner + // 2. patch_indices + // 3. patch_values + vortex_ensure!( + children.len() == 3, + "expected exactly 3 children from LazyPatched, found {}", + children.len() + ); + + let inner = children.get(0, dtype, len)?; + + let num_patches = metadata.num_patches as usize; + let patch_indices = children.get(1, dtype, num_patches)?; + let patch_values = children.get(2, dtype, num_patches)?; + + let patches = Patches::new(len, 0, patch_indices, patch_values, None)?; + + Ok(LazyPatchedArray { inner, patches }) + } + + fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { + vortex_ensure_eq!(children.len(), 3); + + array.inner = children.remove(0); + + let patch_indices = children.remove(0); + let patch_values = children.remove(0); + + array.patches = Patches::new(array.inner.len(), 0, patch_indices, patch_values, None)?; + + Ok(()) + } + + fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + // Execution => actually transpose the patches, get back a `PatchedArray`. + let patched = + PatchedArray::from_array_and_patches(array.inner.clone(), &array.patches, ctx)? + .into_array(); + + Ok(ExecutionResult::done(patched)) + } +} + +#[derive(Debug, Clone)] +pub struct LazyPatchedArray { + inner: ArrayRef, + patches: Patches, +} diff --git a/vortex-array/src/arrays/lazy_patched/vtable/operations.rs b/vortex-array/src/arrays/lazy_patched/vtable/operations.rs new file mode 100644 index 00000000000..d782960af2b --- /dev/null +++ b/vortex-array/src/arrays/lazy_patched/vtable/operations.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::DynArray; +use crate::arrays::lazy_patched::LazyPatched; +use crate::arrays::lazy_patched::LazyPatchedArray; +use crate::scalar::Scalar; +use crate::vtable::OperationsVTable; + +impl OperationsVTable for LazyPatched { + fn scalar_at(array: &LazyPatchedArray, index: usize) -> VortexResult { + Ok(if let Some(scalar) = array.patches.get_patched(index)? { + scalar + } else { + array.inner.scalar_at(index)? + }) + } +} diff --git a/vortex-array/src/arrays/lazy_patched/vtable/validity.rs b/vortex-array/src/arrays/lazy_patched/vtable/validity.rs new file mode 100644 index 00000000000..234ae791c58 --- /dev/null +++ b/vortex-array/src/arrays/lazy_patched/vtable/validity.rs @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use crate::ArrayRef; +use crate::arrays::lazy_patched::LazyPatched; +use crate::arrays::lazy_patched::LazyPatchedArray; +use crate::vtable::ValidityChild; + +impl ValidityChild for LazyPatched { + fn validity_child(array: &LazyPatchedArray) -> &ArrayRef { + &array.inner + } +} diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 5abbcb84b85..cab46ca576c 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -104,3 +104,4 @@ pub use variant::VariantArray; #[cfg(feature = "arbitrary")] pub mod arbitrary; +pub mod lazy_patched; From a92a9718d68feeaaa7001feddd20aa88c4bf6f10 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Mon, 23 Mar 2026 16:39:42 -0400 Subject: [PATCH 15/19] update VTable::build to return ArrayRef This lets us return something other than the original array encoding at read time. Currently we'll want this so that BitPacked::build returns a LazyPatched, but this is applicable for pretty much any back-compat preserving encoding rewrites. Signed-off-by: Andrew Duffy --- encodings/alp/src/alp/array.rs | 7 ++-- encodings/alp/src/alp_rd/array.rs | 7 ++-- encodings/bytebool/src/array.rs | 4 +- encodings/datetime-parts/src/array.rs | 4 +- .../src/decimal_byte_parts/mod.rs | 4 +- .../fastlanes/src/bitpacking/vtable/mod.rs | 41 +++++++++++-------- encodings/fastlanes/src/delta/vtable/mod.rs | 4 +- encodings/fastlanes/src/for/vtable/mod.rs | 4 +- encodings/fastlanes/src/rle/vtable/mod.rs | 7 ++-- encodings/fsst/src/array.rs | 12 +++--- encodings/pco/src/array.rs | 5 ++- encodings/runend/src/array.rs | 7 ++-- encodings/sequence/src/array.rs | 8 ++-- encodings/sparse/src/lib.rs | 17 ++++---- encodings/zigzag/src/array.rs | 4 +- encodings/zstd/src/array.rs | 5 ++- encodings/zstd/src/zstd_buffers.rs | 5 ++- vortex-array/src/arrays/bool/vtable/mod.rs | 8 +++- vortex-array/src/arrays/chunked/vtable/mod.rs | 5 ++- .../src/arrays/constant/vtable/mod.rs | 4 +- vortex-array/src/arrays/decimal/vtable/mod.rs | 11 ++++- vortex-array/src/arrays/dict/vtable/mod.rs | 6 ++- .../src/arrays/extension/vtable/mod.rs | 5 ++- vortex-array/src/arrays/filter/vtable.rs | 5 ++- .../src/arrays/fixed_size_list/vtable/mod.rs | 5 ++- .../src/arrays/lazy_patched/vtable/mod.rs | 25 +++++++++-- vortex-array/src/arrays/list/vtable/mod.rs | 4 +- .../src/arrays/listview/vtable/mod.rs | 5 ++- vortex-array/src/arrays/masked/vtable/mod.rs | 4 +- vortex-array/src/arrays/null/mod.rs | 5 ++- vortex-array/src/arrays/patched/vtable/mod.rs | 5 ++- .../src/arrays/primitive/vtable/mod.rs | 7 ++-- .../src/arrays/scalar_fn/vtable/mod.rs | 5 ++- vortex-array/src/arrays/shared/vtable.rs | 5 ++- vortex-array/src/arrays/slice/vtable.rs | 6 ++- vortex-array/src/arrays/struct_/vtable/mod.rs | 8 +++- vortex-array/src/arrays/varbin/vtable/mod.rs | 4 +- .../src/arrays/varbinview/vtable/mod.rs | 13 ++++-- vortex-array/src/arrays/variant/vtable/mod.rs | 5 ++- vortex-array/src/vtable/dyn_.rs | 22 ++-------- vortex-array/src/vtable/mod.rs | 2 +- vortex-python/src/arrays/py/vtable.rs | 2 +- 42 files changed, 186 insertions(+), 135 deletions(-) diff --git a/encodings/alp/src/alp/array.rs b/encodings/alp/src/alp/array.rs index 086b699bf54..9e0ff67ac93 100644 --- a/encodings/alp/src/alp/array.rs +++ b/encodings/alp/src/alp/array.rs @@ -163,7 +163,7 @@ impl VTable for ALP { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let encoded_ptype = match &dtype { DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n), DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n), @@ -185,14 +185,15 @@ impl VTable for ALP { }) .transpose()?; - ALPArray::try_new( + Ok(ALPArray::try_new( encoded, Exponents { e: u8::try_from(metadata.exp_e)?, f: u8::try_from(metadata.exp_f)?, }, patches, - ) + )? + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/alp/src/alp_rd/array.rs b/encodings/alp/src/alp_rd/array.rs index a02b1bdde59..f6807bed621 100644 --- a/encodings/alp/src/alp_rd/array.rs +++ b/encodings/alp/src/alp_rd/array.rs @@ -199,7 +199,7 @@ impl VTable for ALPRD { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if children.len() < 2 { vortex_bail!( "Expected at least 2 children for ALPRD encoding, found {}", @@ -247,7 +247,7 @@ impl VTable for ALPRD { }) .transpose()?; - ALPRDArray::try_new( + Ok(ALPRDArray::try_new( dtype.clone(), left_parts, left_parts_dictionary, @@ -259,7 +259,8 @@ impl VTable for ALPRD { ) })?, left_parts_patches, - ) + )? + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/bytebool/src/array.rs b/encodings/bytebool/src/array.rs index 8e9a7939fd8..02f784d4513 100644 --- a/encodings/bytebool/src/array.rs +++ b/encodings/bytebool/src/array.rs @@ -146,7 +146,7 @@ impl VTable for ByteBool { _metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let validity = if children.is_empty() { Validity::from(dtype.nullability()) } else if children.len() == 1 { @@ -161,7 +161,7 @@ impl VTable for ByteBool { } let buffer = buffers[0].clone(); - Ok(ByteBoolArray::new(buffer, validity)) + Ok(ByteBoolArray::new(buffer, validity).into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/datetime-parts/src/array.rs b/encodings/datetime-parts/src/array.rs index 58750254e9f..850daac04e6 100644 --- a/encodings/datetime-parts/src/array.rs +++ b/encodings/datetime-parts/src/array.rs @@ -185,7 +185,7 @@ impl VTable for DateTimeParts { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if children.len() != 3 { vortex_bail!( "Expected 3 children for datetime-parts encoding, found {}", @@ -209,7 +209,7 @@ impl VTable for DateTimeParts { len, )?; - DateTimePartsArray::try_new(dtype.clone(), days, seconds, subseconds) + Ok(DateTimePartsArray::try_new(dtype.clone(), days, seconds, subseconds)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs index 281b9a131d2..b19d7d9f68a 100644 --- a/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs +++ b/encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs @@ -160,7 +160,7 @@ impl VTable for DecimalByteParts { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let Some(decimal_dtype) = dtype.as_decimal_opt() else { vortex_bail!("decoding decimal but given non decimal dtype {}", dtype) }; @@ -174,7 +174,7 @@ impl VTable for DecimalByteParts { "lower_part_count > 0 not currently supported" ); - DecimalBytePartsArray::try_new(msp, *decimal_dtype) + Ok(DecimalBytePartsArray::try_new(msp, *decimal_dtype)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index dd286a4e43d..b748423c79f 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -14,6 +14,7 @@ use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; +use vortex_array::arrays::lazy_patched::LazyPatchedArray; use vortex_array::buffer::BufferHandle; use vortex_array::builders::ArrayBuilder; use vortex_array::dtype::DType; @@ -46,6 +47,7 @@ use crate::bitpack_decompress::unpack_array; use crate::bitpack_decompress::unpack_into_primitive_builder; use crate::bitpacking::vtable::kernels::PARENT_KERNELS; use crate::bitpacking::vtable::rules::RULES; + mod kernels; mod operations; mod rules; @@ -277,7 +279,7 @@ impl VTable for BitPacked { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if buffers.len() != 1 { vortex_bail!("Expected 1 buffer, got {}", buffers.len()); } @@ -307,25 +309,11 @@ impl VTable for BitPacked { let validity = load_validity(validity_idx)?; - let patches = metadata - .patches - .map(|p| { - let indices = children.get(0, &p.indices_dtype()?, p.len()?)?; - let values = children.get(1, dtype, p.len()?)?; - let chunk_offsets = p - .chunk_offsets_dtype()? - .map(|dtype| children.get(2, &dtype, p.chunk_offsets_len() as usize)) - .transpose()?; - - Patches::new(len, p.offset()?, indices, values, chunk_offsets) - }) - .transpose()?; - - BitPackedArray::try_new( + let bitpacked = BitPackedArray::try_new( packed, PType::try_from(dtype)?, validity, - patches, + None, u8::try_from(metadata.bit_width).map_err(|_| { vortex_err!( "BitPackedMetadata bit_width {} does not fit in u8", @@ -339,7 +327,24 @@ impl VTable for BitPacked { metadata.offset ) })?, - ) + )? + .into_array(); + + match metadata.patches { + Some(p) => { + let indices = children.get(0, &p.indices_dtype()?, p.len()?)?; + let values = children.get(1, dtype, p.len()?)?; + let chunk_offsets = p + .chunk_offsets_dtype()? + .map(|dtype| children.get(2, &dtype, p.chunk_offsets_len() as usize)) + .transpose()?; + + let patches = Patches::new(len, p.offset()?, indices, values, chunk_offsets)?; + + Ok(LazyPatchedArray::try_new(bitpacked, patches)?.into_array()) + } + None => Ok(bitpacked), + } } fn append_to_builder( diff --git a/encodings/fastlanes/src/delta/vtable/mod.rs b/encodings/fastlanes/src/delta/vtable/mod.rs index fc4dc9b9b20..416b23fafaa 100644 --- a/encodings/fastlanes/src/delta/vtable/mod.rs +++ b/encodings/fastlanes/src/delta/vtable/mod.rs @@ -176,7 +176,7 @@ impl VTable for Delta { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { assert_eq!(children.len(), 2); let ptype = PType::try_from(dtype)?; let lanes = match_each_unsigned_integer_ptype!(ptype, |T| { ::LANES }); @@ -191,7 +191,7 @@ impl VTable for Delta { let bases = children.get(0, dtype, bases_len)?; let deltas = children.get(1, dtype, deltas_len)?; - DeltaArray::try_new(bases, deltas, metadata.0.offset as usize, len) + Ok(DeltaArray::try_new(bases, deltas, metadata.0.offset as usize, len)?.into_array()) } fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { diff --git a/encodings/fastlanes/src/for/vtable/mod.rs b/encodings/fastlanes/src/for/vtable/mod.rs index fb40d6d93da..7674786cc9c 100644 --- a/encodings/fastlanes/src/for/vtable/mod.rs +++ b/encodings/fastlanes/src/for/vtable/mod.rs @@ -150,7 +150,7 @@ impl VTable for FoR { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if children.len() != 1 { vortex_bail!( "Expected 1 child for FoR encoding, found {}", @@ -160,7 +160,7 @@ impl VTable for FoR { let encoded = children.get(0, dtype, len)?; - FoRArray::try_new(encoded, metadata.clone()) + Ok(FoRArray::try_new(encoded, metadata.clone())?.into_array()) } fn reduce_parent( diff --git a/encodings/fastlanes/src/rle/vtable/mod.rs b/encodings/fastlanes/src/rle/vtable/mod.rs index b4f3eaa0c93..951a72e009c 100644 --- a/encodings/fastlanes/src/rle/vtable/mod.rs +++ b/encodings/fastlanes/src/rle/vtable/mod.rs @@ -195,7 +195,7 @@ impl VTable for RLE { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let metadata = &metadata.0; let values = children.get( 0, @@ -218,13 +218,14 @@ impl VTable for RLE { usize::try_from(metadata.values_idx_offsets_len)?, )?; - RLEArray::try_new( + Ok(RLEArray::try_new( values, indices, values_idx_offsets, metadata.offset as usize, len, - ) + )? + .into_array()) } fn execute_parent( diff --git a/encodings/fsst/src/array.rs b/encodings/fsst/src/array.rs index c7685665c66..c1619fda852 100644 --- a/encodings/fsst/src/array.rs +++ b/encodings/fsst/src/array.rs @@ -222,7 +222,7 @@ impl VTable for FSST { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let symbols = Buffer::::from_byte_buffer(buffers[0].clone().try_to_host_sync()?); let symbol_lengths = Buffer::::from_byte_buffer(buffers[1].clone().try_to_host_sync()?); @@ -250,13 +250,14 @@ impl VTable for FSST { len, )?; - return FSSTArray::try_new( + return Ok(FSSTArray::try_new( dtype.clone(), symbols, symbol_lengths, codes, uncompressed_lengths, - ); + )? + .into_array()); } // Check for the current deserialization path. @@ -297,13 +298,14 @@ impl VTable for FSST { codes_validity, )?; - return FSSTArray::try_new( + return Ok(FSSTArray::try_new( dtype.clone(), symbols, symbol_lengths, codes, uncompressed_lengths, - ); + )? + .into_array()); } vortex_bail!( diff --git a/encodings/pco/src/array.rs b/encodings/pco/src/array.rs index e2156eb106a..8a1f2ec8348 100644 --- a/encodings/pco/src/array.rs +++ b/encodings/pco/src/array.rs @@ -215,7 +215,7 @@ impl VTable for Pco { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let validity = if children.is_empty() { Validity::from(dtype.nullability()) } else if children.len() == 1 { @@ -250,7 +250,8 @@ impl VTable for Pco { metadata.0.clone(), len, validity, - )) + ) + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/runend/src/array.rs b/encodings/runend/src/array.rs index 58fe7ee5476..1de8f5d14b2 100644 --- a/encodings/runend/src/array.rs +++ b/encodings/runend/src/array.rs @@ -162,19 +162,20 @@ impl VTable for RunEnd { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let ends_dtype = DType::Primitive(metadata.ends_ptype(), Nullability::NonNullable); let runs = usize::try_from(metadata.num_runs).vortex_expect("Must be a valid usize"); let ends = children.get(0, &ends_dtype, runs)?; let values = children.get(1, dtype, runs)?; - RunEndArray::try_new_offset_length( + Ok(RunEndArray::try_new_offset_length( ends, values, usize::try_from(metadata.offset).vortex_expect("Offset must be a valid usize"), len, - ) + )? + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/sequence/src/array.rs b/encodings/sequence/src/array.rs index f39ca5d39b0..ea748332bcb 100644 --- a/encodings/sequence/src/array.rs +++ b/encodings/sequence/src/array.rs @@ -9,6 +9,7 @@ use vortex_array::ArrayRef; use vortex_array::DeserializeMetadata; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; +use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::SerializeMetadata; @@ -367,14 +368,15 @@ impl VTable for Sequence { metadata: &Self::Metadata, _buffers: &[BufferHandle], _children: &dyn ArrayChildren, - ) -> VortexResult { - SequenceArray::try_new( + ) -> VortexResult { + Ok(SequenceArray::try_new( metadata.base, metadata.multiplier, dtype.as_ptype(), dtype.nullability(), len, - ) + )? + .into_array()) } fn with_children(_array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/sparse/src/lib.rs b/encodings/sparse/src/lib.rs index 3b656abb3db..6f994500dde 100644 --- a/encodings/sparse/src/lib.rs +++ b/encodings/sparse/src/lib.rs @@ -193,7 +193,7 @@ impl VTable for Sparse { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { vortex_ensure_eq!( children.len(), 2, @@ -208,16 +208,13 @@ impl VTable for Sparse { )?; let patch_values = children.get(1, dtype, metadata.patches.len()?)?; - SparseArray::try_new_from_patches( - Patches::new( - len, - metadata.patches.offset()?, - patch_indices, - patch_values, - None, - )?, + Ok(SparseArray::try_new( + patch_indices, + patch_values, + len, metadata.fill_value.clone(), - ) + )? + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/zigzag/src/array.rs b/encodings/zigzag/src/array.rs index 92ee5ebf41e..dea06ac1288 100644 --- a/encodings/zigzag/src/array.rs +++ b/encodings/zigzag/src/array.rs @@ -132,7 +132,7 @@ impl VTable for ZigZag { _metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if children.len() != 1 { vortex_bail!("Expected 1 child, got {}", children.len()); } @@ -141,7 +141,7 @@ impl VTable for ZigZag { let encoded_type = DType::Primitive(ptype.to_unsigned(), dtype.nullability()); let encoded = children.get(0, &encoded_type, len)?; - ZigZagArray::try_new(encoded) + Ok(ZigZagArray::try_new(encoded)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/zstd/src/array.rs b/encodings/zstd/src/array.rs index 579f0bc7a23..6b4d0374244 100644 --- a/encodings/zstd/src/array.rs +++ b/encodings/zstd/src/array.rs @@ -223,7 +223,7 @@ impl VTable for Zstd { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let validity = if children.is_empty() { Validity::from(dtype.nullability()) } else if children.len() == 1 { @@ -260,7 +260,8 @@ impl VTable for Zstd { metadata.0.clone(), len, validity, - )) + ) + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/encodings/zstd/src/zstd_buffers.rs b/encodings/zstd/src/zstd_buffers.rs index 138c3ae285f..3826b78a95a 100644 --- a/encodings/zstd/src/zstd_buffers.rs +++ b/encodings/zstd/src/zstd_buffers.rs @@ -11,6 +11,7 @@ use vortex_array::ArrayHash; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; use vortex_array::ExecutionResult; +use vortex_array::IntoArray; use vortex_array::Precision; use vortex_array::ProstMetadata; use vortex_array::buffer::BufferHandle; @@ -443,7 +444,7 @@ impl VTable for ZstdBuffers { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let compressed_buffers: Vec = buffers.to_vec(); let child_arrays: Vec = (0..children.len()) @@ -463,7 +464,7 @@ impl VTable for ZstdBuffers { }; array.validate()?; - Ok(array) + Ok(array.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/bool/vtable/mod.rs b/vortex-array/src/arrays/bool/vtable/mod.rs index 5442ba3b3f7..826b1d20149 100644 --- a/vortex-array/src/arrays/bool/vtable/mod.rs +++ b/vortex-array/src/arrays/bool/vtable/mod.rs @@ -13,6 +13,7 @@ use crate::ArrayRef; use crate::DeserializeMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::BoolArray; @@ -154,7 +155,7 @@ impl VTable for Bool { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if buffers.len() != 1 { vortex_bail!("Expected 1 buffer, got {}", buffers.len()); } @@ -170,7 +171,10 @@ impl VTable for Bool { let buffer = buffers[0].clone(); - BoolArray::try_new_from_handle(buffer, metadata.offset as usize, len, validity) + Ok( + BoolArray::try_new_from_handle(buffer, metadata.offset as usize, len, validity)? + .into_array(), + ) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index c7b4a9e22fc..c2f30fbcd46 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -154,7 +154,7 @@ impl VTable for Chunked { _metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if children.is_empty() { vortex_bail!("Chunked array needs at least one child"); } @@ -200,7 +200,8 @@ impl VTable for Chunked { chunk_offsets, chunks, stats_set: Default::default(), - }) + } + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/constant/vtable/mod.rs b/vortex-array/src/arrays/constant/vtable/mod.rs index a5a6d80f53c..fbb1f5ddd8c 100644 --- a/vortex-array/src/arrays/constant/vtable/mod.rs +++ b/vortex-array/src/arrays/constant/vtable/mod.rs @@ -162,8 +162,8 @@ impl VTable for Constant { metadata: &Self::Metadata, _buffers: &[BufferHandle], _children: &dyn ArrayChildren, - ) -> VortexResult { - Ok(ConstantArray::new(metadata.clone(), len)) + ) -> VortexResult { + Ok(ConstantArray::new(metadata.clone(), len).into_array()) } fn with_children(_array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/decimal/vtable/mod.rs b/vortex-array/src/arrays/decimal/vtable/mod.rs index 4c9451cd97b..5be78612317 100644 --- a/vortex-array/src/arrays/decimal/vtable/mod.rs +++ b/vortex-array/src/arrays/decimal/vtable/mod.rs @@ -14,6 +14,7 @@ use crate::ArrayRef; use crate::DeserializeMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::ProstMetadata; use crate::SerializeMetadata; use crate::arrays::DecimalArray; @@ -161,7 +162,7 @@ impl VTable for Decimal { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if buffers.len() != 1 { vortex_bail!("Expected 1 buffer, got {}", buffers.len()); } @@ -187,7 +188,13 @@ impl VTable for Decimal { "DecimalArray buffer not aligned for values type {:?}", D::DECIMAL_TYPE ); - DecimalArray::try_new_handle(values, metadata.values_type(), *decimal_dtype, validity) + Ok(DecimalArray::try_new_handle( + values, + metadata.values_type(), + *decimal_dtype, + validity, + )? + .into_array()) }) } diff --git a/vortex-array/src/arrays/dict/vtable/mod.rs b/vortex-array/src/arrays/dict/vtable/mod.rs index 0e07b7b7846..84fa30e7940 100644 --- a/vortex-array/src/arrays/dict/vtable/mod.rs +++ b/vortex-array/src/arrays/dict/vtable/mod.rs @@ -163,7 +163,7 @@ impl VTable for Dict { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if children.len() != 2 { vortex_bail!( "Expected 2 children for dict encoding, found {}", @@ -183,7 +183,9 @@ impl VTable for Dict { // SAFETY: We've validated the metadata and children. Ok(unsafe { - DictArray::new_unchecked(codes, values).set_all_values_referenced(all_values_referenced) + DictArray::new_unchecked(codes, values) + .set_all_values_referenced(all_values_referenced) + .into_array() }) } diff --git a/vortex-array/src/arrays/extension/vtable/mod.rs b/vortex-array/src/arrays/extension/vtable/mod.rs index 13a4bc1e093..2b7b7937832 100644 --- a/vortex-array/src/arrays/extension/vtable/mod.rs +++ b/vortex-array/src/arrays/extension/vtable/mod.rs @@ -20,6 +20,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::arrays::ExtensionArray; use crate::arrays::extension::compute::rules::PARENT_RULES; @@ -133,7 +134,7 @@ impl VTable for Extension { _metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let DType::Extension(ext_dtype) = dtype else { vortex_bail!("Not an extension DType"); }; @@ -141,7 +142,7 @@ impl VTable for Extension { vortex_bail!("Expected 1 child, got {}", children.len()); } let storage = children.get(0, ext_dtype.storage_dtype(), len)?; - Ok(ExtensionArray::new(ext_dtype.clone(), storage)) + Ok(ExtensionArray::new(ext_dtype.clone(), storage).into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/filter/vtable.rs b/vortex-array/src/arrays/filter/vtable.rs index a0b23e00cfd..47f1b24f6b6 100644 --- a/vortex-array/src/arrays/filter/vtable.rs +++ b/vortex-array/src/arrays/filter/vtable.rs @@ -137,14 +137,15 @@ impl VTable for Filter { metadata: &FilterMetadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { assert_eq!(len, metadata.0.true_count()); let child = children.get(0, dtype, metadata.0.len())?; Ok(FilterArray { child, mask: metadata.0.clone(), stats: Default::default(), - }) + } + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs index 2b0e3544872..f5e1cd316b9 100644 --- a/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs +++ b/vortex-array/src/arrays/fixed_size_list/vtable/mod.rs @@ -15,6 +15,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::arrays::FixedSizeListArray; use crate::arrays::fixed_size_list::compute::rules::PARENT_RULES; @@ -171,7 +172,7 @@ impl VTable for FixedSizeList { _metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { vortex_ensure!( buffers.is_empty(), "`FixedSizeList::build` expects no buffers" @@ -198,7 +199,7 @@ impl VTable for FixedSizeList { let num_elements = len * (*list_size as usize); let elements = children.get(0, element_dtype.as_ref(), num_elements)?; - FixedSizeListArray::try_new(elements, *list_size, validity, len) + Ok(FixedSizeListArray::try_new(elements, *list_size, validity, len)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/lazy_patched/vtable/mod.rs b/vortex-array/src/arrays/lazy_patched/vtable/mod.rs index 330100cdf2f..0e29a8c3ad8 100644 --- a/vortex-array/src/arrays/lazy_patched/vtable/mod.rs +++ b/vortex-array/src/arrays/lazy_patched/vtable/mod.rs @@ -5,6 +5,7 @@ mod operations; mod validity; use std::hash::Hasher; +use std::sync::Arc; use vortex_error::VortexResult; use vortex_error::vortex_ensure; @@ -143,7 +144,7 @@ impl VTable for LazyPatched { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { // There should be 3 children // 1. inner // 2. patch_indices @@ -162,7 +163,7 @@ impl VTable for LazyPatched { let patches = Patches::new(len, 0, patch_indices, patch_values, None)?; - Ok(LazyPatchedArray { inner, patches }) + Ok(LazyPatchedArray { inner, patches }.into_array()) } fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { @@ -178,7 +179,7 @@ impl VTable for LazyPatched { Ok(()) } - fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult { + fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { // Execution => actually transpose the patches, get back a `PatchedArray`. let patched = PatchedArray::from_array_and_patches(array.inner.clone(), &array.patches, ctx)? @@ -193,3 +194,21 @@ pub struct LazyPatchedArray { inner: ArrayRef, patches: Patches, } + +impl LazyPatchedArray { + /// Create a new `LazyPatchedArray` from an inner array and an aligned set of [`Patches`]. + /// + /// # Errors + /// + /// Returns an error if the patches are not aligned to the array, i.e. the `array_len` of + /// the patches does not equal the length of the inner array. + pub fn try_new(inner: ArrayRef, patches: Patches) -> VortexResult { + vortex_ensure_eq!( + inner.len(), + patches.array_len(), + "Patches array_len does not match array len" + ); + + Ok(Self { inner, patches }) + } +} diff --git a/vortex-array/src/arrays/list/vtable/mod.rs b/vortex-array/src/arrays/list/vtable/mod.rs index 33cf835c596..a6a3ba9592e 100644 --- a/vortex-array/src/arrays/list/vtable/mod.rs +++ b/vortex-array/src/arrays/list/vtable/mod.rs @@ -163,7 +163,7 @@ impl VTable for List { metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let validity = if children.len() == 2 { Validity::from(dtype.nullability()) } else if children.len() == 3 { @@ -188,7 +188,7 @@ impl VTable for List { len + 1, )?; - ListArray::try_new(elements, offsets, validity) + Ok(ListArray::try_new(elements, offsets, validity)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/listview/vtable/mod.rs b/vortex-array/src/arrays/listview/vtable/mod.rs index 84e50ff072c..d7009fcb1e6 100644 --- a/vortex-array/src/arrays/listview/vtable/mod.rs +++ b/vortex-array/src/arrays/listview/vtable/mod.rs @@ -15,6 +15,7 @@ use crate::ArrayRef; use crate::DeserializeMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::ProstMetadata; use crate::SerializeMetadata; @@ -169,7 +170,7 @@ impl VTable for ListView { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { vortex_ensure!( buffers.is_empty(), "`ListViewArray::build` expects no buffers" @@ -212,7 +213,7 @@ impl VTable for ListView { len, )?; - ListViewArray::try_new(elements, offsets, sizes, validity) + Ok(ListViewArray::try_new(elements, offsets, sizes, validity)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/masked/vtable/mod.rs b/vortex-array/src/arrays/masked/vtable/mod.rs index 263c73cd4c5..98e63767bac 100644 --- a/vortex-array/src/arrays/masked/vtable/mod.rs +++ b/vortex-array/src/arrays/masked/vtable/mod.rs @@ -144,7 +144,7 @@ impl VTable for Masked { _metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if !buffers.is_empty() { vortex_bail!("Expected 0 buffer, got {}", buffers.len()); } @@ -163,7 +163,7 @@ impl VTable for Masked { ); }; - MaskedArray::try_new(child, validity) + Ok(MaskedArray::try_new(child, validity)?.into_array()) } fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { diff --git a/vortex-array/src/arrays/null/mod.rs b/vortex-array/src/arrays/null/mod.rs index b5e34050ea1..306a83eba11 100644 --- a/vortex-array/src/arrays/null/mod.rs +++ b/vortex-array/src/arrays/null/mod.rs @@ -13,6 +13,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::arrays::null::compute::rules::PARENT_RULES; use crate::buffer::BufferHandle; @@ -115,8 +116,8 @@ impl VTable for Null { _metadata: &Self::Metadata, _buffers: &[BufferHandle], _children: &dyn ArrayChildren, - ) -> VortexResult { - Ok(NullArray::new(len)) + ) -> VortexResult { + Ok(NullArray::new(len).into_array()) } fn with_children(_array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index 008619a23c0..093083a1748 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -239,7 +239,7 @@ impl VTable for Patched { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let n_chunks = len.div_ceil(1024); let n_lanes = match_each_native_ptype!(dtype.as_ptype(), |P| { patch_lanes::

() }); @@ -261,7 +261,8 @@ impl VTable for Patched { indices, values, stats_set: ArrayStats::default(), - }) + } + .into_array()) } fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/primitive/vtable/mod.rs b/vortex-array/src/arrays/primitive/vtable/mod.rs index 0eab4f5dece..c0671e025d3 100644 --- a/vortex-array/src/arrays/primitive/vtable/mod.rs +++ b/vortex-array/src/arrays/primitive/vtable/mod.rs @@ -12,6 +12,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::arrays::PrimitiveArray; use crate::buffer::BufferHandle; use crate::dtype::DType; @@ -140,7 +141,7 @@ impl VTable for Primitive { _metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { if buffers.len() != 1 { vortex_bail!("Expected 1 buffer, got {}", buffers.len()); } @@ -181,9 +182,7 @@ impl VTable for Primitive { // SAFETY: checked ahead of time unsafe { - Ok(PrimitiveArray::new_unchecked_from_handle( - buffer, ptype, validity, - )) + Ok(PrimitiveArray::new_unchecked_from_handle(buffer, ptype, validity).into_array()) } } diff --git a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs index 28619eb778d..3d50b18254d 100644 --- a/vortex-array/src/arrays/scalar_fn/vtable/mod.rs +++ b/vortex-array/src/arrays/scalar_fn/vtable/mod.rs @@ -165,7 +165,7 @@ impl VTable for ScalarFnVTable { metadata: &ScalarFnMetadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let children: Vec<_> = metadata .child_dtypes .iter() @@ -190,7 +190,8 @@ impl VTable for ScalarFnVTable { len, children, stats: Default::default(), - }) + } + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/shared/vtable.rs b/vortex-array/src/arrays/shared/vtable.rs index 63cdea7f734..2696a6b79af 100644 --- a/vortex-array/src/arrays/shared/vtable.rs +++ b/vortex-array/src/arrays/shared/vtable.rs @@ -14,6 +14,7 @@ use crate::Canonical; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::arrays::SharedArray; use crate::buffer::BufferHandle; @@ -131,9 +132,9 @@ impl VTable for Shared { _metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn crate::serde::ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let child = children.get(0, dtype, len)?; - Ok(SharedArray::new(child)) + Ok(SharedArray::new(child).into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/slice/vtable.rs b/vortex-array/src/arrays/slice/vtable.rs index 74c33760f16..eeff90fc199 100644 --- a/vortex-array/src/arrays/slice/vtable.rs +++ b/vortex-array/src/arrays/slice/vtable.rs @@ -21,6 +21,7 @@ use crate::ArrayHash; use crate::ArrayRef; use crate::Canonical; use crate::DynArray; +use crate::IntoArray; use crate::Precision; use crate::arrays::slice::array::SliceArray; use crate::arrays::slice::rules::PARENT_RULES; @@ -137,14 +138,15 @@ impl VTable for Slice { metadata: &SliceMetadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { assert_eq!(len, metadata.0.len()); let child = children.get(0, dtype, metadata.0.end)?; Ok(SliceArray { child, range: metadata.0.clone(), stats: Default::default(), - }) + } + .into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/struct_/vtable/mod.rs b/vortex-array/src/arrays/struct_/vtable/mod.rs index cb0b089d650..c671ce00c90 100644 --- a/vortex-array/src/arrays/struct_/vtable/mod.rs +++ b/vortex-array/src/arrays/struct_/vtable/mod.rs @@ -16,6 +16,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::arrays::StructArray; use crate::arrays::struct_::compute::rules::PARENT_RULES; use crate::buffer::BufferHandle; @@ -146,7 +147,7 @@ impl VTable for Struct { _metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let DType::Struct(struct_dtype, nullability) = dtype else { vortex_bail!("Expected struct dtype, found {:?}", dtype) }; @@ -175,7 +176,10 @@ impl VTable for Struct { }) .try_collect()?; - StructArray::try_new_with_dtype(children, struct_dtype.clone(), len, validity) + Ok( + StructArray::try_new_with_dtype(children, struct_dtype.clone(), len, validity)? + .into_array(), + ) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/varbin/vtable/mod.rs b/vortex-array/src/arrays/varbin/vtable/mod.rs index 8adf733a34a..621e213b0c8 100644 --- a/vortex-array/src/arrays/varbin/vtable/mod.rs +++ b/vortex-array/src/arrays/varbin/vtable/mod.rs @@ -160,7 +160,7 @@ impl VTable for VarBin { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let validity = if children.len() == 1 { Validity::from(dtype.nullability()) } else if children.len() == 2 { @@ -181,7 +181,7 @@ impl VTable for VarBin { } let bytes = buffers[0].clone().try_to_host_sync()?; - VarBinArray::try_new(offsets, bytes, dtype.clone(), validity) + Ok(VarBinArray::try_new(offsets, bytes, dtype.clone(), validity)?.into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/varbinview/vtable/mod.rs b/vortex-array/src/arrays/varbinview/vtable/mod.rs index 9794c733009..7da6e53d01b 100644 --- a/vortex-array/src/arrays/varbinview/vtable/mod.rs +++ b/vortex-array/src/arrays/varbinview/vtable/mod.rs @@ -18,6 +18,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::arrays::VarBinViewArray; use crate::arrays::varbinview::BinaryView; @@ -167,7 +168,7 @@ impl VTable for VarBinView { _metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { let Some((views_handle, data_handles)) = buffers.split_last() else { vortex_bail!("Expected at least 1 buffer, got 0"); }; @@ -195,12 +196,13 @@ impl VTable for VarBinView { // If any buffer is on device, skip host validation and use try_new_handle. if buffers.iter().any(|b| b.is_on_device()) { - return VarBinViewArray::try_new_handle( + return Ok(VarBinViewArray::try_new_handle( views_handle.clone(), Arc::from(data_handles.to_vec()), dtype.clone(), validity, - ); + )? + .into_array()); } let data_buffers = data_handles @@ -209,7 +211,10 @@ impl VTable for VarBinView { .collect::>(); let views = Buffer::::from_byte_buffer(views_handle.clone().as_host().clone()); - VarBinViewArray::try_new(views, Arc::from(data_buffers), dtype.clone(), validity) + Ok( + VarBinViewArray::try_new(views, Arc::from(data_buffers), dtype.clone(), validity)? + .into_array(), + ) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/arrays/variant/vtable/mod.rs b/vortex-array/src/arrays/variant/vtable/mod.rs index eb412d4bd87..b780e5ab37b 100644 --- a/vortex-array/src/arrays/variant/vtable/mod.rs +++ b/vortex-array/src/arrays/variant/vtable/mod.rs @@ -18,6 +18,7 @@ use crate::ArrayRef; use crate::EmptyMetadata; use crate::ExecutionCtx; use crate::ExecutionResult; +use crate::IntoArray; use crate::Precision; use crate::arrays::VariantArray; use crate::buffer::BufferHandle; @@ -128,7 +129,7 @@ impl VTable for Variant { _metadata: &Self::Metadata, _buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { vortex_ensure!(matches!(dtype, DType::Variant(_)), "Expected Variant DType"); vortex_ensure!( children.len() == 1, @@ -137,7 +138,7 @@ impl VTable for Variant { ); // The child carries the nullability for the whole VariantArray. let child = children.get(0, dtype, len)?; - Ok(VariantArray::new(child)) + Ok(VariantArray::new(child).into_array()) } fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { diff --git a/vortex-array/src/vtable/dyn_.rs b/vortex-array/src/vtable/dyn_.rs index 6b7266719c4..11db3853ee0 100644 --- a/vortex-array/src/vtable/dyn_.rs +++ b/vortex-array/src/vtable/dyn_.rs @@ -92,24 +92,10 @@ impl DynVTable for V { let metadata = V::deserialize(metadata, dtype, len, buffers, session)?; let inner = V::build(dtype, len, &metadata, buffers, children)?; // Validate the inner array's properties before wrapping. - assert_eq!(V::len(&inner), len, "Array length mismatch after building"); - assert_eq!( - V::dtype(&inner), - dtype, - "Array dtype mismatch after building" - ); - // Wrap in Array for safe downcasting. - // SAFETY: We just validated that V::len(&inner) == len and V::dtype(&inner) == dtype. - let array = unsafe { - Array::new_unchecked( - self.clone(), - dtype.clone(), - len, - inner, - ArrayStats::default(), - ) - }; - Ok(array.into_array()) + assert_eq!(inner.len(), len, "Array length mismatch after building"); + assert_eq!(inner.dtype(), dtype, "Array dtype mismatch after building"); + + Ok(inner) } fn with_children(&self, array: &ArrayRef, children: Vec) -> VortexResult { diff --git a/vortex-array/src/vtable/mod.rs b/vortex-array/src/vtable/mod.rs index 9f9924a6f19..b8037c2c6f4 100644 --- a/vortex-array/src/vtable/mod.rs +++ b/vortex-array/src/vtable/mod.rs @@ -142,7 +142,7 @@ pub trait VTable: 'static + Clone + Sized + Send + Sync + Debug { metadata: &Self::Metadata, buffers: &[BufferHandle], children: &dyn ArrayChildren, - ) -> VortexResult; + ) -> VortexResult; /// Replaces the children in `array` with `children`. fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()>; diff --git a/vortex-python/src/arrays/py/vtable.rs b/vortex-python/src/arrays/py/vtable.rs index 99459d8c286..5457994461d 100644 --- a/vortex-python/src/arrays/py/vtable.rs +++ b/vortex-python/src/arrays/py/vtable.rs @@ -148,7 +148,7 @@ impl VTable for PythonVTable { _metadata: &Self::Metadata, _buffers: &[BufferHandle], _children: &dyn ArrayChildren, - ) -> VortexResult { + ) -> VortexResult { todo!() } From c9d2bbf8695f84de3e6f0810a3fcbbd5aabac9d0 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Mon, 23 Mar 2026 17:06:16 -0400 Subject: [PATCH 16/19] Remove patches from BitPackedArray removes both the patches field as well as all code for handling patches. this is safe to do now that we have updated the VTable build function to always read methods. note that we need to leave the metadata as-is. Signed-off-by: Andrew Duffy --- encodings/alp/src/alp_rd/mod.rs | 26 +- .../fastlanes/benches/bitpacking_take.rs | 18 - .../src/bitpacking/array/bitpack_compress.rs | 218 ++++++++- .../bitpacking/array/bitpack_decompress.rs | 441 ++++++++++-------- .../fastlanes/src/bitpacking/array/mod.rs | 113 +---- .../fastlanes/src/bitpacking/compute/cast.rs | 51 +- .../src/bitpacking/compute/filter.rs | 81 ++-- .../src/bitpacking/compute/is_constant.rs | 160 +++---- .../fastlanes/src/bitpacking/compute/mod.rs | 48 +- .../fastlanes/src/bitpacking/compute/slice.rs | 12 +- .../fastlanes/src/bitpacking/compute/take.rs | 102 ++-- .../fastlanes/src/bitpacking/vtable/mod.rs | 99 +--- .../src/bitpacking/vtable/operations.rs | 160 +++---- .../fastlanes/src/for/array/for_compress.rs | 14 +- .../fastlanes/src/for/array/for_decompress.rs | 20 +- .../src/compressor/integer/mod.rs | 13 +- vortex-cuda/benches/bitpacked_cuda.rs | 20 +- vortex-cuda/benches/dynamic_dispatch_cuda.rs | 412 ++++++++-------- vortex-cuda/benches/for_cuda.rs | 19 +- .../src/dynamic_dispatch/plan_builder.rs | 8 +- vortex-cuda/src/kernel/encodings/bitpacked.rs | 66 ++- vortex-cuda/src/kernel/encodings/for_.rs | 14 +- .../arrays/synthetic/encodings/bitpacked.rs | 77 ++- .../common_encoding_tree_throughput.rs | 65 ++- 24 files changed, 1215 insertions(+), 1042 deletions(-) diff --git a/encodings/alp/src/alp_rd/mod.rs b/encodings/alp/src/alp_rd/mod.rs index 7521ff15b7c..aaf126b9ae6 100644 --- a/encodings/alp/src/alp_rd/mod.rs +++ b/encodings/alp/src/alp_rd/mod.rs @@ -8,6 +8,7 @@ use vortex_array::ExecutionCtx; use vortex_array::IntoArray; use vortex_array::patches::Patches; use vortex_array::validity::Validity; +use vortex_fastlanes::bitpack_compress::BitPackedEncoder; use vortex_fastlanes::bitpack_compress::bitpack_encode_unchecked; mod array; @@ -230,20 +231,19 @@ impl RDEncoder { // Bit-pack down the encoded left-parts array that have been dictionary encoded. let primitive_left = PrimitiveArray::new(left_parts, array.validity().clone()); - // SAFETY: by construction, all values in left_parts can be packed to left_bit_width. - let packed_left = unsafe { - bitpack_encode_unchecked(primitive_left, left_bit_width as _) - .vortex_expect("bitpack_encode_unchecked should succeed for left parts") - .into_array() - }; - + let packed_left = BitPackedEncoder::new(&primitive_left) + .with_bit_width(left_bit_width as _) + .pack() + .vortex_expect("bitpack_encode_unchecked should succeed for left parts") + .into_array() + .vortex_expect("Packed::into_array"); let primitive_right = PrimitiveArray::new(right_parts, Validity::NonNullable); - // SAFETY: by construction, all values in right_parts are right_bit_width + leading zeros. - let packed_right = unsafe { - bitpack_encode_unchecked(primitive_right, self.right_bit_width as _) - .vortex_expect("bitpack_encode_unchecked should succeed for right parts") - .into_array() - }; + let packed_right = BitPackedEncoder::new(&primitive_right) + .with_bit_width(self.right_bit_width as _) + .pack() + .vortex_expect("bitpack_encode_unchecked should succeed for right parts") + .into_array() + .vortex_expect("Packed::into_array"); // Bit-pack the dict-encoded left-parts // Bit-pack the right-parts diff --git a/encodings/fastlanes/benches/bitpacking_take.rs b/encodings/fastlanes/benches/bitpacking_take.rs index 23e857777f7..0dd1812612f 100644 --- a/encodings/fastlanes/benches/bitpacking_take.rs +++ b/encodings/fastlanes/benches/bitpacking_take.rs @@ -161,12 +161,6 @@ fn patched_take_10_stratified(bencher: Bencher) { let uncompressed = PrimitiveArray::new(values, Validity::NonNullable); let packed = bitpack_to_best_bit_width(&uncompressed).unwrap(); - assert!(packed.patches().is_some()); - assert_eq!( - packed.patches().unwrap().num_patches(), - NUM_EXCEPTIONS as usize - ); - let indices = PrimitiveArray::from_iter((0..10).map(|i| i * 6_653)); bencher @@ -186,12 +180,6 @@ fn patched_take_10_contiguous(bencher: Bencher) { let uncompressed = PrimitiveArray::new(values, Validity::NonNullable); let packed = bitpack_to_best_bit_width(&uncompressed).unwrap(); - assert!(packed.patches().is_some()); - assert_eq!( - packed.patches().unwrap().num_patches(), - NUM_EXCEPTIONS as usize - ); - let indices = buffer![0..10].into_array(); bencher @@ -250,12 +238,6 @@ fn patched_take_10k_contiguous_patches(bencher: Bencher) { let uncompressed = PrimitiveArray::new(values, Validity::NonNullable); let packed = bitpack_to_best_bit_width(&uncompressed).unwrap(); - assert!(packed.patches().is_some()); - assert_eq!( - packed.patches().unwrap().num_patches(), - NUM_EXCEPTIONS as usize - ); - let indices = PrimitiveArray::from_iter((BIG_BASE2..BIG_BASE2 + NUM_EXCEPTIONS).cycle().take(10000)); diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs index 6f29a72db0c..44279139a78 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_compress.rs @@ -4,7 +4,11 @@ use fastlanes::BitPacking; use itertools::Itertools; use num_traits::PrimInt; +use vortex_array::ArrayRef; use vortex_array::IntoArray; +use vortex_array::LEGACY_SESSION; +use vortex_array::VortexSessionExecute; +use vortex_array::arrays::PatchedArray; use vortex_array::arrays::PrimitiveArray; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::IntegerPType; @@ -21,16 +25,156 @@ use vortex_buffer::ByteBuffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; use vortex_error::vortex_bail; +use vortex_error::vortex_panic; use vortex_mask::AllOr; use vortex_mask::Mask; use crate::BitPackedArray; use crate::bitpack_decompress; -pub fn bitpack_to_best_bit_width(array: &PrimitiveArray) -> VortexResult { - let bit_width_freq = bit_width_histogram(array)?; - let best_bit_width = find_best_bit_width(array.ptype(), &bit_width_freq)?; - bitpack_encode(array, best_bit_width, Some(&bit_width_freq)) +/// The result of bit-packing an array. +#[derive(Debug)] +pub enum Packed { + // TODO(aduffy): hold onto the stats? + Unpatched(BitPackedArray), + Patched(BitPackedArray, Patches), +} + +impl Packed { + pub fn has_patches(&self) -> bool { + matches!(self, Self::Patched(_, _)) + } + + /// Unwrap the `packed` structure as the `Packed` variant without patches. + /// + /// # Panics + /// + /// Will panic if there are patches. + pub fn unwrap_unpatched(self) -> BitPackedArray { + match self { + Self::Unpatched(unpacked) => unpacked, + Self::Patched(..) => vortex_panic!("cannot unwrap Patched values as Unpatched"), + } + } + + /// Unwrap the patches from the `Packed` structure. + /// + /// # Panics + /// + /// Will panic if there are no patches. + pub fn unwrap_patches(self) -> Patches { + match self { + Self::Unpatched(_) => vortex_panic!("cannot unwrap patches from Unpatched"), + Self::Patched(_, patches) => patches, + } + } + + /// Consume and retrieve only the packed result, discarding any patches. + pub fn into_packed(self) -> BitPackedArray { + match self { + Packed::Unpatched(packed) => packed, + Packed::Patched(packed, _) => packed, + } + } + + /// Get the full `ArrayRef` for the packed result. + /// + /// This will either point to a raw `BitPackedArray`, or a `PatchedArray` with a + /// `BitPackedArray` child. + /// + /// # Errors + /// + /// If there are patches, we need to perform an array execution to transpose the patches. This + /// will propagate any error from calling `execute` on the patches components. + pub fn into_array(self) -> VortexResult { + // We might need to execute the patches instead. + match self { + Packed::Unpatched(unpatched) => Ok(unpatched.into_array()), + Packed::Patched(packed, patches) => Ok(PatchedArray::from_array_and_patches( + packed.into_array(), + &patches, + &mut LEGACY_SESSION.create_execution_ctx(), + )? + .into_array()), + } + } + + /// Apply a function to the patches, returning a new set of patches. + pub fn map_patches(self, func: F) -> VortexResult + where + F: FnOnce(Patches) -> VortexResult, + { + match self { + Packed::Unpatched(packed) => Ok(Packed::Unpatched(packed)), + Packed::Patched(packed, patches) => { + let mapped = func(patches)?; + Ok(Packed::Patched(packed, mapped)) + } + } + } +} + +/// An encoder for bit-packing `PrimitiveArray`s using FastLanes. +pub struct BitPackedEncoder<'a> { + array: &'a PrimitiveArray, + bit_width: Option, + histogram: Option<&'a [usize]>, +} + +impl<'a> BitPackedEncoder<'a> { + /// Create a new encoder that will bit-pack the provided array. + pub fn new(array: &'a PrimitiveArray) -> Self { + Self { + array, + bit_width: None, + histogram: None, + } + } + + /// Configure the encoder with a pre-selected bit-width for the output. + /// + /// If this is not configured, `pack` will scan the values and determine the optimal bit-width + /// for compression. + pub fn with_bit_width(mut self, bit_width: u8) -> Self { + self.bit_width = Some(bit_width); + self + } + + /// Configure the encoder with a pre-computed histogram of values by bit-width. + /// + /// If not set, `pack` will scan the values and build the histogram. + pub fn with_histogram(mut self, histogram: &'a [usize]) -> Self { + self.histogram = Some(histogram); + self + } + + /// Consume the encoder and return the packed result. Any configured bit-width will be + /// respected. + /// + /// # Error + /// + /// Packing will return an error if [`bitpack_encode`] would return an error, namely if the + /// types or values of the input `PrimitiveArray` are out of range. + pub fn pack(mut self) -> VortexResult { + let bit_width_freq = bit_width_histogram(self.array)?; + let bw: u8 = match self.bit_width.take() { + Some(bw) => bw, + None => find_best_bit_width(self.array.ptype(), &bit_width_freq)?, + }; + + let (packed, patches) = bitpack_encode(self.array, bw, Some(&bit_width_freq))?; + match patches { + Some(patches) => Ok(Packed::Patched(packed, patches)), + None => Ok(Packed::Unpatched(packed)), + } + } +} + +/// Find the ideal bit width that maximally compresses the input array. +/// +/// Returns the bit-packed, possibly patched, array. +pub fn bitpack_to_best_bit_width(array: &PrimitiveArray) -> VortexResult { + BitPackedEncoder::new(array).pack()?.into_array() } #[allow(unused_comparisons, clippy::absurd_extreme_comparisons)] @@ -38,7 +182,7 @@ pub fn bitpack_encode( array: &PrimitiveArray, bit_width: u8, bit_width_freq: Option<&[usize]>, -) -> VortexResult { +) -> VortexResult<(BitPackedArray, Option)> { let bit_width_freq = match bit_width_freq { Some(freq) => freq, None => &bit_width_histogram(array)?, @@ -77,17 +221,16 @@ pub fn bitpack_encode( BufferHandle::new_host(packed), array.dtype().clone(), array.validity().clone(), - patches, bit_width, array.len(), 0, ) }; - bitpacked - .stats_set - .to_ref(bitpacked.as_ref()) - .inherit_from(array.statistics()); - Ok(bitpacked) + // bitpacked + // .stats_set + // .to_ref(bitpacked.as_ref()) + // .inherit_from(array.statistics()); + Ok((bitpacked, patches)) } /// Bitpack an array into the specified bit-width without checking statistics. @@ -111,7 +254,6 @@ pub unsafe fn bitpack_encode_unchecked( BufferHandle::new_host(packed), array.dtype().clone(), array.validity().clone(), - None, bit_width, array.len(), 0, @@ -386,7 +528,7 @@ pub mod test_harness { use vortex_buffer::BufferMut; use vortex_error::VortexResult; - use super::bitpack_encode; + use super::BitPackedEncoder; pub fn make_array( rng: &mut StdRng, @@ -411,7 +553,10 @@ pub mod test_harness { PrimitiveArray::new(values, validity) }; - bitpack_encode(&values, 12, None).map(|a| a.into_array()) + BitPackedEncoder::new(&values) + .with_bit_width(12) + .pack()? + .into_array() } } @@ -457,8 +602,12 @@ mod test { Validity::from_iter(valid_values), ); assert!(values.ptype().is_unsigned_int()); - let compressed = BitPackedArray::encode(&values.into_array(), 4).unwrap(); - assert!(compressed.patches().is_none()); + let packed = BitPackedEncoder::new(&values) + .with_bit_width(4) + .pack() + .unwrap(); + assert!(!packed.has_patches()); + let compressed = packed.into_packed(); assert_eq!( (0..(1 << 4)).collect::>(), compressed @@ -476,7 +625,10 @@ mod test { let array = PrimitiveArray::new(values, Validity::AllValid); assert!(array.ptype().is_signed_int()); - let err = BitPackedArray::encode(&array.into_array(), 1024u32.ilog2() as u8).unwrap_err(); + let err = BitPackedEncoder::new(&array) + .with_bit_width(1024u32.ilog2() as u8) + .pack() + .unwrap_err(); assert!(matches!(err, VortexError::InvalidArgument(_, _))); } @@ -520,9 +672,13 @@ mod test { .for_each(|&idx| values[idx] = patch_value); let array = PrimitiveArray::from_iter(values); - let bitpacked = bitpack_encode(&array, 4, None).unwrap(); + let packed = BitPackedEncoder::new(&array) + .with_bit_width(4) + .pack() + .unwrap(); + assert!(packed.has_patches()); - let patches = bitpacked.patches().unwrap(); + let patches = packed.unwrap_patches(); let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive(); // chunk 0 (0-1023): patches at 100, 200 -> starts at patch index 0 @@ -543,9 +699,13 @@ mod test { .for_each(|&idx| values[idx] = patch_value); let array = PrimitiveArray::from_iter(values); - let bitpacked = bitpack_encode(&array, 4, None).unwrap(); + let packed = BitPackedEncoder::new(&array) + .with_bit_width(4) + .pack() + .unwrap(); + assert!(packed.has_patches()); - let patches = bitpacked.patches().unwrap(); + let patches = packed.unwrap_patches(); let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive(); assert_arrays_eq!(chunk_offsets, PrimitiveArray::from_iter([0u64, 2, 2])); @@ -562,9 +722,13 @@ mod test { .for_each(|&idx| values[idx] = patch_value); let array = PrimitiveArray::from_iter(values); - let bitpacked = bitpack_encode(&array, 4, None).unwrap(); + let packed = BitPackedEncoder::new(&array) + .with_bit_width(4) + .pack() + .unwrap(); + assert!(packed.has_patches()); - let patches = bitpacked.patches().unwrap(); + let patches = packed.unwrap_patches(); let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive(); // chunk 0 (0-1023): patches at 100, 200 -> starts at patch index 0 @@ -586,9 +750,13 @@ mod test { .for_each(|&idx| values[idx] = patch_value); let array = PrimitiveArray::from_iter(values); - let bitpacked = bitpack_encode(&array, 4, None).unwrap(); + let packed = BitPackedEncoder::new(&array) + .with_bit_width(4) + .pack() + .unwrap(); + assert!(packed.has_patches()); - let patches = bitpacked.patches().unwrap(); + let patches = packed.unwrap_patches(); let chunk_offsets = patches.chunk_offsets().as_ref().unwrap().to_primitive(); // Single chunk starting at patch index 0. diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs index e4099cdcf24..631735dd6b2 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs @@ -3,12 +3,12 @@ use fastlanes::BitPacking; use itertools::Itertools; -use num_traits::AsPrimitive; use vortex_array::ExecutionCtx; use vortex_array::arrays::PrimitiveArray; use vortex_array::builders::ArrayBuilder; use vortex_array::builders::PrimitiveBuilder; use vortex_array::builders::UninitRange; +use vortex_array::dtype::IntegerPType; use vortex_array::dtype::NativePType; use vortex_array::match_each_integer_ptype; use vortex_array::match_each_unsigned_integer_ptype; @@ -16,26 +16,21 @@ use vortex_array::patches::Patches; use vortex_array::scalar::Scalar; use vortex_error::VortexExpect; use vortex_error::VortexResult; +use vortex_error::vortex_panic; +use vortex_mask::Mask; use crate::BitPackedArray; use crate::unpack_iter::BitPacked; -/// Unpacks a bit-packed array into a primitive array. -pub fn unpack_array( - array: &BitPackedArray, - ctx: &mut ExecutionCtx, -) -> VortexResult { - match_each_integer_ptype!(array.ptype(), |P| { - unpack_primitive_array::

(array, ctx) - }) +pub fn unpack_array(array: &BitPackedArray) -> VortexResult { + match_each_integer_ptype!(array.ptype(), |P| { unpack_primitive_array::

(array) }) } pub fn unpack_primitive_array( array: &BitPackedArray, - ctx: &mut ExecutionCtx, ) -> VortexResult { let mut builder = PrimitiveBuilder::with_capacity(array.dtype().nullability(), array.len()); - unpack_into_primitive_builder::(array, &mut builder, ctx)?; + unpack_into_primitive_builder::(array, &mut builder)?; assert_eq!(builder.len(), array.len()); Ok(builder.finish_into_primitive()) } @@ -44,7 +39,6 @@ pub(crate) fn unpack_into_primitive_builder( array: &BitPackedArray, // TODO(ngates): do we want to use fastlanes alignment for this buffer? builder: &mut PrimitiveBuilder, - ctx: &mut ExecutionCtx, ) -> VortexResult<()> { // If the array is empty, then we don't need to add anything to the builder. if array.is_empty() { @@ -65,10 +59,6 @@ pub(crate) fn unpack_into_primitive_builder( let mut bit_packed_iter = array.unpacked_chunks(); bit_packed_iter.decode_into(uninit_slice); - if let Some(patches) = array.patches() { - apply_patches_to_uninit_range(&mut uninit_range, patches, ctx)?; - }; - // SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and // initialized the same number of values needed via `decode_into`. unsafe { @@ -95,20 +85,43 @@ pub fn apply_patches_to_uninit_range_fn T>( let indices = patches.indices().clone().execute::(ctx)?; let values = patches.values().clone().execute::(ctx)?; - assert!(values.all_valid()?, "Patch values must be all valid"); + let validity = values.validity_mask()?; let values = values.as_slice::(); match_each_unsigned_integer_ptype!(indices.ptype(), |P| { - for (index, &value) in indices.as_slice::

().iter().zip_eq(values) { - dst.set_value( -

>::as_(*index) - patches.offset(), - f(value), - ); - } + insert_values_and_validity_at_indices_to_uninit_range( + dst, + indices.as_slice::

(), + values, + validity, + patches.offset(), + f, + ) }); Ok(()) } +fn insert_values_and_validity_at_indices_to_uninit_range< + T: NativePType, + IndexT: IntegerPType, + F: Fn(T) -> T, +>( + dst: &mut UninitRange, + indices: &[IndexT], + values: &[T], + values_validity: Mask, + indices_offset: usize, + f: F, +) { + let Mask::AllTrue(_) = values_validity else { + vortex_panic!("BitPackedArray somehow had nullable patch values"); + }; + + for (index, &value) in indices.iter().zip_eq(values) { + dst.set_value(index.as_() - indices_offset, f(value)); + } +} + pub fn unpack_single(array: &BitPackedArray, index: usize) -> Scalar { let bit_width = array.bit_width() as usize; let ptype = array.ptype(); @@ -170,14 +183,18 @@ mod tests { use vortex_session::VortexSession; use super::*; - use crate::bitpack_compress::bitpack_encode; + use crate::bitpack_compress::BitPackedEncoder; static SESSION: LazyLock = LazyLock::new(|| VortexSession::empty().with::()); fn compression_roundtrip(n: usize) { let values = PrimitiveArray::from_iter((0..n).map(|i| (i % 2047) as u16)); - let compressed = BitPackedArray::encode(&values.clone().into_array(), 11).unwrap(); + let compressed = BitPackedEncoder::new(&values) + .with_bit_width(11) + .pack() + .unwrap() + .unwrap_unpatched(); assert_arrays_eq!(compressed, values); values @@ -206,8 +223,13 @@ mod tests { #[test] fn test_all_zeros() -> VortexResult<()> { let zeros = buffer![0u16, 0, 0, 0].into_array().to_primitive(); - let bitpacked = bitpack_encode(&zeros, 0, None)?; - let actual = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let bitpacked = BitPackedEncoder::new(&zeros) + .with_bit_width(0) + .pack()? + .unwrap_unpatched(); + let actual = bitpacked + .into_array() + .execute::(&mut SESSION.create_execution_ctx())?; assert_arrays_eq!(actual, PrimitiveArray::from_iter([0u16, 0, 0, 0])); Ok(()) } @@ -215,29 +237,39 @@ mod tests { #[test] fn test_simple_patches() -> VortexResult<()> { let zeros = buffer![0u16, 1, 0, 1].into_array().to_primitive(); - let bitpacked = bitpack_encode(&zeros, 0, None).unwrap(); - let actual = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let bitpacked = BitPackedEncoder::new(&zeros) + .with_bit_width(0) + .pack()? + .into_array()?; + let actual = bitpacked.execute::(&mut SESSION.create_execution_ctx())?; assert_arrays_eq!(actual, PrimitiveArray::from_iter([0u16, 1, 0, 1])); Ok(()) } #[test] fn test_one_full_chunk() -> VortexResult<()> { - let zeros = BufferMut::from_iter(0u16..1024).into_array().to_primitive(); - let bitpacked = bitpack_encode(&zeros, 10, None).unwrap(); - let actual = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let values = BufferMut::from_iter(0u16..1024).into_array().to_primitive(); + let bitpacked = BitPackedEncoder::new(&values) + .with_bit_width(10) + .pack()? + .into_packed(); + let actual = bitpacked + .into_array() + .execute::(&mut SESSION.create_execution_ctx())?; assert_arrays_eq!(actual, PrimitiveArray::from_iter(0u16..1024)); Ok(()) } #[test] fn test_three_full_chunks_with_patches() -> VortexResult<()> { - let zeros = BufferMut::from_iter((5u16..1029).chain(5u16..1029).chain(5u16..1029)) + let values = BufferMut::from_iter((5u16..1029).chain(5u16..1029).chain(5u16..1029)) .into_array() .to_primitive(); - let bitpacked = bitpack_encode(&zeros, 10, None).unwrap(); - assert!(bitpacked.patches().is_some()); - let actual = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let packed = BitPackedEncoder::new(&values).with_bit_width(10).pack()?; + assert!(packed.has_patches()); + let actual = packed + .into_array()? + .execute::(&mut SESSION.create_execution_ctx())?; assert_arrays_eq!( actual, PrimitiveArray::from_iter((5u16..1029).chain(5u16..1029).chain(5u16..1029)) @@ -247,42 +279,44 @@ mod tests { #[test] fn test_one_full_chunk_and_one_short_chunk_no_patch() -> VortexResult<()> { - let zeros = BufferMut::from_iter(0u16..1025).into_array().to_primitive(); - let bitpacked = bitpack_encode(&zeros, 11, None).unwrap(); - assert!(bitpacked.patches().is_none()); - let actual = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let values = BufferMut::from_iter(0u16..1025).into_array().to_primitive(); + let packed = BitPackedEncoder::new(&values).with_bit_width(11).pack()?; + assert!(!packed.has_patches()); + let actual = packed + .into_array()? + .execute::(&mut SESSION.create_execution_ctx())?; assert_arrays_eq!(actual, PrimitiveArray::from_iter(0u16..1025)); Ok(()) } #[test] fn test_one_full_chunk_and_one_short_chunk_with_patches() -> VortexResult<()> { - let zeros = BufferMut::from_iter(512u16..1537) - .into_array() - .to_primitive(); - let bitpacked = bitpack_encode(&zeros, 10, None).unwrap(); + let values = PrimitiveArray::from_iter(512u16..1537); + let packed = BitPackedEncoder::new(&values).with_bit_width(10).pack()?; + let bitpacked = packed.into_array()?; assert_eq!(bitpacked.len(), 1025); - assert!(bitpacked.patches().is_some()); - let actual = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let actual = bitpacked + .into_array() + .execute::(&mut SESSION.create_execution_ctx())?; assert_arrays_eq!(actual, PrimitiveArray::from_iter(512u16..1537)); Ok(()) } #[test] fn test_offset_and_short_chunk_and_patches() -> VortexResult<()> { - let zeros = BufferMut::from_iter(512u16..1537) + let values = BufferMut::from_iter(512u16..1537) .into_array() .to_primitive(); - let bitpacked = bitpack_encode(&zeros, 10, None).unwrap(); + let packed = BitPackedEncoder::new(&values).with_bit_width(10).pack()?; + assert!(packed.has_patches()); + let bitpacked = packed.into_array()?; assert_eq!(bitpacked.len(), 1025); - assert!(bitpacked.patches().is_some()); - let slice_ref = bitpacked.into_array().slice(1023..1025).unwrap(); + let slice_ref = bitpacked.slice(1023..1025)?; let actual = { let mut ctx = SESSION.create_execution_ctx(); slice_ref .clone() - .execute::(&mut ctx) - .unwrap() + .execute::(&mut ctx)? .into_primitive() }; assert_arrays_eq!(actual, PrimitiveArray::from_iter([1535u16, 1536])); @@ -291,12 +325,13 @@ mod tests { #[test] fn test_offset_and_short_chunk_with_chunks_between_and_patches() -> VortexResult<()> { - let zeros = BufferMut::from_iter(512u16..2741) + let values = BufferMut::from_iter(512u16..2741) .into_array() .to_primitive(); - let bitpacked = bitpack_encode(&zeros, 10, None).unwrap(); + let packed = BitPackedEncoder::new(&values).with_bit_width(10).pack()?; + assert!(packed.has_patches()); + let bitpacked = packed.into_array()?; assert_eq!(bitpacked.len(), 2229); - assert!(bitpacked.patches().is_some()); let slice_ref = bitpacked.into_array().slice(1023..2049).unwrap(); let actual = { let mut ctx = SESSION.create_execution_ctx(); @@ -316,14 +351,13 @@ mod tests { #[test] fn test_unpack_into_empty_array() -> VortexResult<()> { let empty: PrimitiveArray = PrimitiveArray::from_iter(Vec::::new()); - let bitpacked = bitpack_encode(&empty, 0, None).unwrap(); + let bitpacked = BitPackedEncoder::new(&empty) + .with_bit_width(0) + .pack()? + .into_packed(); let mut builder = PrimitiveBuilder::::new(Nullability::NonNullable); - unpack_into_primitive_builder( - &bitpacked, - &mut builder, - &mut SESSION.create_execution_ctx(), - )?; + unpack_into_primitive_builder(&bitpacked, &mut builder)?; let result = builder.finish_into_primitive(); assert_eq!( @@ -343,73 +377,97 @@ mod tests { let array = PrimitiveArray::new(values, validity); // Bitpack the array. - let bitpacked = bitpack_encode(&array, 3, None).unwrap(); + let bitpacked = BitPackedEncoder::new(&array) + .with_bit_width(3) + .pack()? + .into_packed(); // Unpack into a new builder. let mut builder = PrimitiveBuilder::::with_capacity(Nullability::Nullable, 5); - unpack_into_primitive_builder( - &bitpacked, - &mut builder, - &mut SESSION.create_execution_ctx(), - )?; + unpack_into_primitive_builder(&bitpacked, &mut builder)?; let result = builder.finish_into_primitive(); // Verify the validity mask was correctly applied. assert_eq!(result.len(), 5); - assert!(!result.scalar_at(0).unwrap().is_null()); - assert!(result.scalar_at(1).unwrap().is_null()); - assert!(!result.scalar_at(2).unwrap().is_null()); - assert!(!result.scalar_at(3).unwrap().is_null()); - assert!(result.scalar_at(4).unwrap().is_null()); + assert!(!result.scalar_at(0)?.is_null()); + assert!(result.scalar_at(1)?.is_null()); + assert!(!result.scalar_at(2)?.is_null()); + assert!(!result.scalar_at(3)?.is_null()); + assert!(result.scalar_at(4)?.is_null()); Ok(()) } - /// Test that `unpack_into` correctly handles arrays with patches. + /// Test basic unpacking to primitive array for multiple types and sizes. #[test] - fn test_unpack_into_with_patches() -> VortexResult<()> { - // Create an array where most values fit in 4 bits but some need patches. - let values: Vec = (0..100) - .map(|i| if i % 20 == 0 { 1000 + i } else { i % 16 }) - .collect(); - let array = PrimitiveArray::from_iter(values.clone()); - - // Bitpack with a bit width that will require patches. - let bitpacked = bitpack_encode(&array, 4, None).unwrap(); - assert!( - bitpacked.patches().is_some(), - "Should have patches for values > 15" - ); - - // Unpack into a new builder. - let mut builder = PrimitiveBuilder::::with_capacity(Nullability::NonNullable, 100); - unpack_into_primitive_builder( - &bitpacked, - &mut builder, - &mut SESSION.create_execution_ctx(), - )?; - - let result = builder.finish_into_primitive(); - - // Verify all values were correctly unpacked including patches. - assert_arrays_eq!(result, PrimitiveArray::from_iter(values)); + fn test_execute_basic() -> VortexResult<()> { + // Test with u8 values. + let u8_values = PrimitiveArray::from_iter([5u8, 10, 15, 20, 25]); + let u8_bitpacked = BitPackedEncoder::new(&u8_values) + .with_bit_width(5) + .pack()? + .into_array()?; + let u8_result = + u8_bitpacked.execute::(&mut SESSION.create_execution_ctx())?; + assert_eq!(u8_result.len(), 5); + assert_arrays_eq!(u8_result, u8_values); + + // Test with u32 values - empty array. + let u32_empty: PrimitiveArray = PrimitiveArray::from_iter(Vec::::new()); + let u32_empty_bp = BitPackedEncoder::new(&u32_empty) + .with_bit_width(0) + .pack()? + .into_array()?; + let u32_empty_result = + u32_empty_bp.execute::(&mut SESSION.create_execution_ctx())?; + assert_eq!(u32_empty_result.len(), 0); + + // Test with u16 values - exactly one chunk (1024 elements). + let u16_values = PrimitiveArray::from_iter(0u16..1024); + let u16_bitpacked = BitPackedEncoder::new(&u16_values) + .with_bit_width(10) + .pack()? + .into_array()?; + let u16_result = + u16_bitpacked.execute::(&mut SESSION.create_execution_ctx())?; + assert_eq!(u16_result.len(), 1024); + + // Test with i32 values - partial chunk (1025 elements). + let i32_values = PrimitiveArray::from_iter((0i32..1025).map(|x| x % 512)); + let i32_bitpacked = BitPackedEncoder::new(&i32_values) + .with_bit_width(9) + .pack()? + .into_array()?; + let i32_result = + i32_bitpacked.execute::(&mut SESSION.create_execution_ctx())?; + assert_eq!(i32_result.len(), 1025); + assert_arrays_eq!(i32_result, i32_values); Ok(()) } /// Test unpacking with patches at various positions. #[test] - fn test_unpack_to_primitive_with_patches() -> VortexResult<()> { + fn test_execute_with_patches() -> VortexResult<()> { // Create an array where patches are needed at start, middle, and end. - let values = buffer![ - 2000u32, // Patch at start + let values: Vec = vec![ + 2000, // Patch at start 5, 10, 15, 20, 25, 30, 3000, // Patch in middle 35, 40, 45, 50, 55, 4000, // Patch at end ]; - let array = PrimitiveArray::new(values, Validity::NonNullable); + let array = PrimitiveArray::from_iter(values.clone()); // Bitpack with a small bit width to force patches. - let bitpacked = bitpack_encode(&array, 6, None).unwrap(); - assert!(bitpacked.patches().is_some(), "Should have patches"); + let packed = BitPackedEncoder::new(&array).with_bit_width(6).pack()?; + assert!(packed.has_patches(), "Should have patches"); + + // Execute to primitive array. + let result = packed + .into_array()? + .execute::(&mut SESSION.create_execution_ctx())?; + + // Verify length and values. + assert_eq!(result.len(), values.len()); + assert_arrays_eq!(result, PrimitiveArray::from_iter(values)); // Test with a larger array with multiple patches across chunks. let large_values: Vec = (0..3072) @@ -421,26 +479,33 @@ mod tests { } }) .collect(); - let large_array = PrimitiveArray::from_iter(large_values); - let large_bitpacked = bitpack_encode(&large_array, 8, None).unwrap(); - assert!(large_bitpacked.patches().is_some()); - - let large_result = unpack_array(&large_bitpacked, &mut SESSION.create_execution_ctx())?; + let large_array = PrimitiveArray::from_iter(large_values.clone()); + let large_packed = BitPackedEncoder::new(&large_array) + .with_bit_width(8) + .pack()?; + assert!(large_packed.has_patches()); + + let large_result = large_packed + .into_array()? + .execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(large_result.len(), 3072); + assert_arrays_eq!(large_result, PrimitiveArray::from_iter(large_values)); Ok(()) } /// Test unpacking with nullability and validity masks. #[test] - fn test_unpack_to_primitive_nullability() { + fn test_execute_nullability() -> VortexResult<()> { // Test with null values at various positions. let values = Buffer::from_iter([100u32, 0, 200, 0, 300, 0, 400]); let validity = Validity::from_iter([true, false, true, false, true, false, true]); let array = PrimitiveArray::new(values, validity); - let bitpacked = bitpack_encode(&array, 9, None).unwrap(); - let result = - unpack_array(&bitpacked, &mut SESSION.create_execution_ctx()).vortex_expect("unpack"); + let bitpacked = BitPackedEncoder::new(&array) + .with_bit_width(9) + .pack()? + .into_array()?; + let result = bitpacked.execute::(&mut SESSION.create_execution_ctx())?; // Verify length. assert_eq!(result.len(), 7); @@ -454,11 +519,14 @@ mod tests { let patch_validity = Validity::from_iter([true, false, true, false, true, true, false]); let patch_array = PrimitiveArray::new(patch_values, patch_validity); - let patch_bitpacked = bitpack_encode(&patch_array, 5, None).unwrap(); - assert!(patch_bitpacked.patches().is_some()); + let patch_packed = BitPackedEncoder::new(&patch_array) + .with_bit_width(5) + .pack()?; + assert!(patch_packed.has_patches()); - let patch_result = unpack_array(&patch_bitpacked, &mut SESSION.create_execution_ctx()) - .vortex_expect("unpack"); + let patch_result = patch_packed + .into_array()? + .execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(patch_result.len(), 7); // Test all nulls edge case. @@ -466,59 +534,37 @@ mod tests { Buffer::from_iter([0u32, 0, 0, 0]), Validity::from_iter([false, false, false, false]), ); - let all_nulls_bp = bitpack_encode(&all_nulls, 0, None).unwrap(); - let all_nulls_result = unpack_array(&all_nulls_bp, &mut SESSION.create_execution_ctx()) - .vortex_expect("unpack"); + let all_nulls_bp = BitPackedEncoder::new(&all_nulls) + .with_bit_width(0) + .pack()? + .into_array()?; + let all_nulls_result = + all_nulls_bp.execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(all_nulls_result.len(), 4); + Ok(()) } - /// Test that the execute method produces consistent results with other unpacking methods. + /// Test that the execute method produces consistent results. #[test] fn test_execute_method_consistency() -> VortexResult<()> { - // Test that execute(), unpack_to_primitive(), and unpack_array() all produce consistent results. let test_consistency = |array: &PrimitiveArray, bit_width: u8| -> VortexResult<()> { - let bitpacked = bitpack_encode(array, bit_width, None).unwrap(); - - let unpacked_array = unpack_array(&bitpacked, &mut SESSION.create_execution_ctx())?; + let packed = BitPackedEncoder::new(array) + .with_bit_width(bit_width) + .pack()?; + // Using the execute() method. let executed = { let mut ctx = SESSION.create_execution_ctx(); - bitpacked - .into_array() - .execute::(&mut ctx) - .unwrap() + packed.into_array()?.execute::(&mut ctx).unwrap() }; - assert_eq!( - unpacked_array.len(), - array.len(), - "unpacked array length mismatch" - ); - - // The executed canonical should also have the correct length. + // The executed canonical should have the correct length. let executed_primitive = executed.into_primitive(); assert_eq!( executed_primitive.len(), array.len(), "executed primitive length mismatch" ); - - // Verify that the execute() method works correctly by comparing with unpack_array. - // We convert unpack_array result to canonical to compare. - let unpacked_executed = { - let mut ctx = SESSION.create_execution_ctx(); - unpacked_array - .into_array() - .execute::(&mut ctx) - .unwrap() - .into_primitive() - }; - assert_eq!( - executed_primitive.len(), - unpacked_executed.len(), - "execute() and unpack_array().execute() produced different lengths" - ); - // Both should produce identical arrays since they represent the same data. Ok(()) }; @@ -538,8 +584,8 @@ mod tests { // Test with sliced array (offset > 0). let values = PrimitiveArray::from_iter(0u32..2048); - let bitpacked = bitpack_encode(&values, 11, None).unwrap(); - let slice_ref = bitpacked.into_array().slice(500..1500).unwrap(); + let packed = BitPackedEncoder::new(&values).with_bit_width(11).pack()?; + let slice_ref = packed.into_array()?.slice(500..1500).unwrap(); let sliced = { let mut ctx = SESSION.create_execution_ctx(); slice_ref @@ -549,57 +595,41 @@ mod tests { .into_primitive() }; - // Test all three methods on the sliced array. - let primitive_result = sliced.clone(); - let unpacked_array = sliced; - let executed = { - let mut ctx = SESSION.create_execution_ctx(); - slice_ref.clone().execute::(&mut ctx).unwrap() - }; - - assert_eq!( - primitive_result.len(), - 1000, - "sliced primitive length should be 1000" - ); - assert_eq!( - unpacked_array.len(), - 1000, - "sliced unpacked array length should be 1000" - ); - - let executed_primitive = executed.into_primitive(); - assert_eq!( - executed_primitive.len(), - 1000, - "sliced executed primitive length should be 1000" - ); + assert_eq!(sliced.len(), 1000, "sliced primitive length should be 1000"); Ok(()) } /// Test edge cases for unpacking. #[test] - fn test_unpack_edge_cases() -> VortexResult<()> { + fn test_execute_edge_cases() -> VortexResult<()> { // Empty array. let empty: PrimitiveArray = PrimitiveArray::from_iter(Vec::::new()); - let empty_bp = bitpack_encode(&empty, 0, None).unwrap(); - let empty_result = unpack_array(&empty_bp, &mut SESSION.create_execution_ctx())?; + let empty_bp = BitPackedEncoder::new(&empty) + .with_bit_width(0) + .pack()? + .into_array()?; + let empty_result = + empty_bp.execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(empty_result.len(), 0); // All zeros (bit_width = 0). let zeros = PrimitiveArray::from_iter([0u32; 100]); - let zeros_bp = bitpack_encode(&zeros, 0, None).unwrap(); - let zeros_result = unpack_array(&zeros_bp, &mut SESSION.create_execution_ctx())?; + let zeros_bp = BitPackedEncoder::new(&zeros) + .with_bit_width(0) + .pack()? + .into_array()?; + let zeros_result = + zeros_bp.execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(zeros_result.len(), 100); - // Verify consistency with unpack_array. - let zeros_array = unpack_array(&zeros_bp, &mut SESSION.create_execution_ctx())?; - assert_eq!(zeros_result.len(), zeros_array.len()); - assert_arrays_eq!(zeros_result, zeros_array); + assert_arrays_eq!(zeros_result, zeros); // Maximum bit width for u16 (15 bits, since bitpacking requires bit_width < type bit width). let max_values = PrimitiveArray::from_iter([32767u16; 50]); // 2^15 - 1 - let max_bp = bitpack_encode(&max_values, 15, None).unwrap(); - let max_result = unpack_array(&max_bp, &mut SESSION.create_execution_ctx())?; + let max_bp = BitPackedEncoder::new(&max_values) + .with_bit_width(15) + .pack()? + .into_array()?; + let max_result = max_bp.execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(max_result.len(), 50); // Exactly 3072 elements with patches across chunks. @@ -612,21 +642,26 @@ mod tests { } }) .collect(); - let boundary_array = PrimitiveArray::from_iter(boundary_values); - let boundary_bp = bitpack_encode(&boundary_array, 7, None).unwrap(); - assert!(boundary_bp.patches().is_some()); - - let boundary_result = unpack_array(&boundary_bp, &mut SESSION.create_execution_ctx())?; + let boundary_array = PrimitiveArray::from_iter(boundary_values.clone()); + let boundary_packed = BitPackedEncoder::new(&boundary_array) + .with_bit_width(7) + .pack()?; + assert!(boundary_packed.has_patches()); + + let boundary_result = boundary_packed + .into_array()? + .execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(boundary_result.len(), 3072); - // Verify consistency. - let boundary_unpacked = unpack_array(&boundary_bp, &mut SESSION.create_execution_ctx())?; - assert_eq!(boundary_result.len(), boundary_unpacked.len()); - assert_arrays_eq!(boundary_result, boundary_unpacked); + assert_arrays_eq!(boundary_result, PrimitiveArray::from_iter(boundary_values)); // Single element. let single = PrimitiveArray::from_iter([42u8]); - let single_bp = bitpack_encode(&single, 6, None).unwrap(); - let single_result = unpack_array(&single_bp, &mut SESSION.create_execution_ctx())?; + let single_bp = BitPackedEncoder::new(&single) + .with_bit_width(6) + .pack()? + .into_array()?; + let single_result = + single_bp.execute::(&mut SESSION.create_execution_ctx())?; assert_eq!(single_result.len(), 1); Ok(()) } diff --git a/encodings/fastlanes/src/bitpacking/array/mod.rs b/encodings/fastlanes/src/bitpacking/array/mod.rs index dfe02730cb0..d3f04d2f4c0 100644 --- a/encodings/fastlanes/src/bitpacking/array/mod.rs +++ b/encodings/fastlanes/src/bitpacking/array/mod.rs @@ -2,24 +2,19 @@ // SPDX-FileCopyrightText: Copyright the Vortex contributors use fastlanes::BitPacking; -use vortex_array::ArrayRef; -use vortex_array::arrays::Primitive; use vortex_array::buffer::BufferHandle; use vortex_array::dtype::DType; use vortex_array::dtype::NativePType; use vortex_array::dtype::PType; -use vortex_array::patches::Patches; use vortex_array::stats::ArrayStats; use vortex_array::validity::Validity; use vortex_error::VortexResult; -use vortex_error::vortex_bail; use vortex_error::vortex_ensure; pub mod bitpack_compress; pub mod bitpack_decompress; pub mod unpack_iter; -use crate::bitpack_compress::bitpack_encode; use crate::unpack_iter::BitPacked; use crate::unpack_iter::BitUnpackedChunks; @@ -28,7 +23,6 @@ pub struct BitPackedArrayParts { pub bit_width: u8, pub len: usize, pub packed: BufferHandle, - pub patches: Option, pub validity: Validity, } @@ -41,7 +35,6 @@ pub struct BitPackedArray { pub(super) dtype: DType, pub(super) bit_width: u8, pub(super) packed: BufferHandle, - pub(super) patches: Option, pub(super) validity: Validity, pub(super) stats_set: ArrayStats, } @@ -71,7 +64,6 @@ impl BitPackedArray { packed: BufferHandle, dtype: DType, validity: Validity, - patches: Option, bit_width: u8, len: usize, offset: u16, @@ -82,7 +74,6 @@ impl BitPackedArray { dtype, bit_width, packed, - patches, validity, stats_set: Default::default(), } @@ -113,27 +104,18 @@ impl BitPackedArray { packed: BufferHandle, ptype: PType, validity: Validity, - patches: Option, bit_width: u8, length: usize, offset: u16, ) -> VortexResult { - Self::validate( - &packed, - ptype, - &validity, - patches.as_ref(), - bit_width, - length, - offset, - )?; + Self::validate(&packed, ptype, &validity, bit_width, length, offset)?; let dtype = DType::Primitive(ptype, validity.nullability()); // SAFETY: all components validated above unsafe { Ok(Self::new_unchecked( - packed, dtype, validity, patches, bit_width, length, offset, + packed, dtype, validity, bit_width, length, offset, )) } } @@ -142,7 +124,6 @@ impl BitPackedArray { packed: &BufferHandle, ptype: PType, validity: &Validity, - patches: Option<&Patches>, bit_width: u8, length: usize, offset: u16, @@ -163,11 +144,6 @@ impl BitPackedArray { "Offset must be less than the full block i.e., 1024, got {offset}" ); - // Validate patches - if let Some(patches) = patches { - Self::validate_patches(patches, ptype, length)?; - } - // Validate packed buffer let expected_packed_len = (length + offset as usize).div_ceil(1024) * (128 * bit_width as usize); @@ -181,24 +157,6 @@ impl BitPackedArray { Ok(()) } - fn validate_patches(patches: &Patches, ptype: PType, len: usize) -> VortexResult<()> { - // Ensure that array and patches have same ptype - vortex_ensure!( - patches.dtype().eq_ignore_nullability(ptype.into()), - "Patches DType {} does not match BitPackedArray dtype {}", - patches.dtype().as_nonnullable(), - ptype - ); - - vortex_ensure!( - patches.array_len() == len, - "BitPackedArray patches length {} != expected {len}", - patches.array_len(), - ); - - Ok(()) - } - pub fn ptype(&self) -> PType { self.dtype.as_ptype() } @@ -239,44 +197,11 @@ impl BitPackedArray { self.bit_width } - /// Access the patches array. - /// - /// If present, patches MUST be a `SparseArray` with equal-length to this array, and whose - /// indices indicate the locations of patches. The indices must have non-zero length. - #[inline] - pub fn patches(&self) -> Option<&Patches> { - self.patches.as_ref() - } - - pub fn replace_patches(&mut self, patches: Option) { - self.patches = patches; - } - #[inline] pub fn offset(&self) -> u16 { self.offset } - /// Bit-pack an array of primitive integers down to the target bit-width using the FastLanes - /// SIMD-accelerated packing kernels. - /// - /// # Errors - /// - /// If the provided array is not an integer type, an error will be returned. - /// - /// If the provided array contains negative values, an error will be returned. - /// - /// If the requested bit-width for packing is larger than the array's native width, an - /// error will be returned. - // FIXME(ngates): take a PrimitiveArray - pub fn encode(array: &ArrayRef, bit_width: u8) -> VortexResult { - if let Some(parray) = array.as_opt::() { - bitpack_encode(parray, bit_width, None) - } else { - vortex_bail!(InvalidArgument: "Bitpacking can only encode primitive arrays"); - } - } - /// Calculate the maximum value that **can** be contained by this array, given its bit-width. /// /// Note that this value need not actually be present in the array. @@ -291,7 +216,6 @@ impl BitPackedArray { bit_width: self.bit_width, len: self.len, packed: self.packed, - patches: self.patches, validity: self.validity, } } @@ -299,13 +223,11 @@ impl BitPackedArray { #[cfg(test)] mod test { - use vortex_array::IntoArray; use vortex_array::ToCanonical; use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; - use vortex_buffer::Buffer; - use crate::BitPackedArray; + use crate::bitpack_compress::BitPackedEncoder; #[test] fn test_encode() { @@ -319,7 +241,12 @@ mod test { Some(u64::MAX), ]; let uncompressed = PrimitiveArray::from_option_iter(values); - let packed = BitPackedArray::encode(&uncompressed.into_array(), 1).unwrap(); + let packed = BitPackedEncoder::new(&uncompressed) + .with_bit_width(1) + .pack() + .unwrap() + .into_array() + .unwrap(); let expected = PrimitiveArray::from_option_iter(values); assert_arrays_eq!(packed.to_primitive(), expected); } @@ -328,22 +255,28 @@ mod test { fn test_encode_too_wide() { let values = [Some(1u8), None, Some(1), None, Some(1), None]; let uncompressed = PrimitiveArray::from_option_iter(values); - let _packed = BitPackedArray::encode(&uncompressed.clone().into_array(), 8) + let _packed = BitPackedEncoder::new(&uncompressed) + .with_bit_width(8) + .pack() .expect_err("Cannot pack value into the same width"); - let _packed = BitPackedArray::encode(&uncompressed.into_array(), 9) + let _packed = BitPackedEncoder::new(&uncompressed) + .with_bit_width(9) + .pack() .expect_err("Cannot pack value into larger width"); } #[test] fn signed_with_patches() { - let values: Buffer = (0i32..=512).collect(); - let parray = values.clone().into_array(); + let parray = PrimitiveArray::from_iter(0i32..=512); - let packed_with_patches = BitPackedArray::encode(&parray, 9).unwrap(); - assert!(packed_with_patches.patches().is_some()); + let packed_with_patches = BitPackedEncoder::new(&parray) + .with_bit_width(9) + .pack() + .unwrap(); + assert!(packed_with_patches.has_patches()); assert_arrays_eq!( - packed_with_patches.to_primitive(), - PrimitiveArray::new(values, vortex_array::validity::Validity::NonNullable) + packed_with_patches.into_array().unwrap().to_primitive(), + parray ); } } diff --git a/encodings/fastlanes/src/bitpacking/compute/cast.rs b/encodings/fastlanes/src/bitpacking/compute/cast.rs index b6ba46626d3..30ad9c841b2 100644 --- a/encodings/fastlanes/src/bitpacking/compute/cast.rs +++ b/encodings/fastlanes/src/bitpacking/compute/cast.rs @@ -3,9 +3,7 @@ use vortex_array::ArrayRef; use vortex_array::IntoArray; -use vortex_array::builtins::ArrayBuiltins; use vortex_array::dtype::DType; -use vortex_array::patches::Patches; use vortex_array::scalar_fn::fns::cast::CastReduce; use vortex_array::vtable::ValidityHelper; use vortex_error::VortexResult; @@ -25,19 +23,6 @@ impl CastReduce for BitPacked { array.packed().clone(), dtype.as_ptype(), new_validity, - array - .patches() - .map(|patches| { - let new_values = patches.values().cast(dtype.clone())?; - Patches::new( - patches.array_len(), - patches.offset(), - patches.indices().clone(), - new_values, - patches.chunk_offsets().clone(), - ) - }) - .transpose()?, array.bit_width(), array.len(), array.offset(), @@ -61,14 +46,18 @@ mod tests { use vortex_array::dtype::DType; use vortex_array::dtype::Nullability; use vortex_array::dtype::PType; - use vortex_buffer::buffer; - use crate::BitPackedArray; + use crate::bitpack_compress::BitPackedEncoder; #[test] fn test_cast_bitpacked_u8_to_u32() { - let packed = - BitPackedArray::encode(&buffer![10u8, 20, 30, 40, 50, 60].into_array(), 6).unwrap(); + let parray = PrimitiveArray::from_iter([10u8, 20, 30, 40, 50, 60]); + + let packed = BitPackedEncoder::new(&parray) + .with_bit_width(6) + .pack() + .unwrap() + .unwrap_unpatched(); let casted = packed .into_array() @@ -88,7 +77,11 @@ mod tests { #[test] fn test_cast_bitpacked_nullable() { let values = PrimitiveArray::from_option_iter([Some(5u16), None, Some(10), Some(15), None]); - let packed = BitPackedArray::encode(&values.into_array(), 4).unwrap(); + let packed = BitPackedEncoder::new(&values) + .with_bit_width(4) + .pack() + .unwrap() + .unwrap_unpatched(); let casted = packed .into_array() @@ -101,11 +94,17 @@ mod tests { } #[rstest] - #[case(BitPackedArray::encode(&buffer![0u8, 10, 20, 30, 40, 50, 60, 63].into_array(), 6).unwrap())] - #[case(BitPackedArray::encode(&buffer![0u16, 100, 200, 300, 400, 500].into_array(), 9).unwrap())] - #[case(BitPackedArray::encode(&buffer![0u32, 1000, 2000, 3000, 4000].into_array(), 12).unwrap())] - #[case(BitPackedArray::encode(&PrimitiveArray::from_option_iter([Some(1u32), None, Some(7), Some(15), None]).into_array(), 4).unwrap())] - fn test_cast_bitpacked_conformance(#[case] array: BitPackedArray) { - test_cast_conformance(&array.into_array()); + #[case(PrimitiveArray::from_iter([0u8, 10, 20, 30, 40, 50, 60, 63]), 6)] + #[case(PrimitiveArray::from_iter([0u16, 100, 200, 300, 400, 500]), 9)] + #[case(PrimitiveArray::from_iter([0u32, 1000, 2000, 3000, 4000]), 12)] + #[case(PrimitiveArray::from_option_iter([Some(1u32), None, Some(7), Some(15), None]), 4)] + fn test_cast_bitpacked_conformance(#[case] parray: PrimitiveArray, #[case] bw: u8) { + let array = BitPackedEncoder::new(&parray) + .with_bit_width(bw) + .pack() + .unwrap() + .into_array() + .unwrap(); + test_cast_conformance(&array); } } diff --git a/encodings/fastlanes/src/bitpacking/compute/filter.rs b/encodings/fastlanes/src/bitpacking/compute/filter.rs index f394f76a26f..5e349ca2c96 100644 --- a/encodings/fastlanes/src/bitpacking/compute/filter.rs +++ b/encodings/fastlanes/src/bitpacking/compute/filter.rs @@ -46,7 +46,7 @@ impl FilterKernel for BitPacked { fn filter( array: &BitPackedArray, mask: &Mask, - ctx: &mut ExecutionCtx, + _ctx: &mut ExecutionCtx, ) -> VortexResult> { let values = match mask { Mask::AllTrue(_) | Mask::AllFalse(_) => { @@ -62,22 +62,12 @@ impl FilterKernel for BitPacked { } // Filter and patch using the correct unsigned type for FastLanes, then cast to signed if needed. - let mut primitive = match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |U| { + let primitive = match_each_unsigned_integer_ptype!(array.ptype().to_unsigned(), |U| { let (buffer, validity) = filter_primitive_without_patches::(array, values)?; // reinterpret_cast for signed types. PrimitiveArray::new(buffer, validity).reinterpret_cast(array.ptype()) }); - let patches = array - .patches() - .map(|patches| patches.filter(&Mask::Values(values.clone()), ctx)) - .transpose()? - .flatten(); - - if let Some(patches) = patches { - primitive = primitive.patch(&patches, ctx)?; - } - Ok(Some(primitive.into_array())) } } @@ -169,16 +159,19 @@ mod test { use vortex_array::compute::conformance::filter::test_filter_conformance; use vortex_array::validity::Validity; use vortex_buffer::Buffer; - use vortex_buffer::buffer; use vortex_mask::Mask; - use crate::BitPackedArray; + use crate::bitpack_compress::BitPackedEncoder; #[test] fn take_indices() { // Create a u8 array modulo 63. let unpacked = PrimitiveArray::from_iter((0..4096).map(|i| (i % 63) as u8)); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 6).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(6) + .pack() + .unwrap() + .unwrap_unpatched(); let mask = Mask::from_indices(bitpacked.len(), vec![0, 125, 2047, 2049, 2151, 2790]); @@ -193,7 +186,11 @@ mod test { fn take_sliced_indices() { // Create a u8 array modulo 63. let unpacked = PrimitiveArray::from_iter((0..4096).map(|i| (i % 63) as u8)); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 6).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(6) + .pack() + .unwrap() + .unwrap_unpatched(); let sliced = bitpacked.slice(128..2050).unwrap(); let mask = Mask::from_indices(sliced.len(), vec![1919, 1921]); @@ -205,7 +202,11 @@ mod test { #[test] fn filter_bitpacked() { let unpacked = PrimitiveArray::from_iter((0..4096).map(|i| (i % 63) as u8)); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 6).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(6) + .pack() + .unwrap() + .unwrap_unpatched(); let filtered = bitpacked .filter(Mask::from_indices(4096, (0..1024).collect())) .unwrap(); @@ -219,7 +220,11 @@ mod test { fn filter_bitpacked_signed() { let values: Buffer = (0..500).collect(); let unpacked = PrimitiveArray::new(values.clone(), Validity::NonNullable); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 9).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(9) + .pack() + .unwrap() + .unwrap_unpatched(); let filtered = bitpacked .filter(Mask::from_indices(values.len(), (0..250).collect())) .unwrap() @@ -234,18 +239,30 @@ mod test { #[test] fn test_filter_bitpacked_conformance() { // Test with u8 values - let unpacked = buffer![1u8, 2, 3, 4, 5].into_array(); - let bitpacked = BitPackedArray::encode(&unpacked, 3).unwrap(); + let unpacked = PrimitiveArray::from_iter([1u8, 2, 3, 4, 5]); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(3) + .pack() + .unwrap() + .unwrap_unpatched(); test_filter_conformance(&bitpacked.into_array()); // Test with u32 values - let unpacked = buffer![100u32, 200, 300, 400, 500].into_array(); - let bitpacked = BitPackedArray::encode(&unpacked, 9).unwrap(); + let unpacked = PrimitiveArray::from_iter([100u32, 200, 300, 400, 500]); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(9) + .pack() + .unwrap() + .unwrap_unpatched(); test_filter_conformance(&bitpacked.into_array()); // Test with nullable values let unpacked = PrimitiveArray::from_option_iter([Some(1u16), None, Some(3), Some(4), None]); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 3).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(3) + .pack() + .unwrap() + .unwrap_unpatched(); test_filter_conformance(&bitpacked.into_array()); } @@ -260,14 +277,19 @@ mod test { // Values 0-127 fit in 7 bits, but 1000 and 2000 do not. let values: Vec = vec![0, 10, 1000, 20, 30, 2000, 40, 50, 60, 70]; let unpacked = PrimitiveArray::from_iter(values.clone()); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 7).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(7) + .pack() + .unwrap(); assert!( - bitpacked.patches().is_some(), + bitpacked.has_patches(), "Expected patches for values exceeding bit width" ); // Filter to include some patched and some non-patched values. let filtered = bitpacked + .into_array() + .unwrap() .filter(Mask::from_indices(values.len(), vec![0, 2, 5, 9])) .unwrap() .to_primitive(); @@ -292,15 +314,20 @@ mod test { }) .collect(); let unpacked = PrimitiveArray::from_iter(values.clone()); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 7).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(7) + .pack() + .unwrap(); assert!( - bitpacked.patches().is_some(), + bitpacked.has_patches(), "Expected patches for values exceeding bit width" ); // Use low selectivity (only select 2% of values) to avoid full decompression. let indices: Vec = (0..20).collect(); let filtered = bitpacked + .into_array() + .unwrap() .filter(Mask::from_indices(values.len(), indices)) .unwrap() .to_primitive(); diff --git a/encodings/fastlanes/src/bitpacking/compute/is_constant.rs b/encodings/fastlanes/src/bitpacking/compute/is_constant.rs index d3efa37adef..ce0a4ecd4ff 100644 --- a/encodings/fastlanes/src/bitpacking/compute/is_constant.rs +++ b/encodings/fastlanes/src/bitpacking/compute/is_constant.rs @@ -1,22 +1,15 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::ops::Range; - -use itertools::Itertools; use lending_iterator::LendingIterator; use vortex_array::ArrayRef; use vortex_array::ExecutionCtx; -use vortex_array::ToCanonical; use vortex_array::aggregate_fn::AggregateFnRef; use vortex_array::aggregate_fn::fns::is_constant::IsConstant; use vortex_array::aggregate_fn::fns::is_constant::primitive::IS_CONST_LANE_WIDTH; use vortex_array::aggregate_fn::fns::is_constant::primitive::compute_is_constant; use vortex_array::aggregate_fn::kernels::DynAggregateKernel; -use vortex_array::arrays::PrimitiveArray; -use vortex_array::dtype::IntegerPType; use vortex_array::match_each_integer_ptype; -use vortex_array::match_each_unsigned_integer_ptype; use vortex_array::scalar::Scalar; use vortex_error::VortexResult; @@ -55,46 +48,40 @@ fn bitpacked_is_constant( array: &BitPackedArray, ) -> VortexResult { let mut bit_unpack_iterator = array.unpacked_chunks::(); - let patches = array.patches().map(|p| { - let values = p.values().to_primitive(); - let indices = p.indices().to_primitive(); - let offset = p.offset(); - (indices, values, offset) - }); let mut header_constant_value = None; - let mut current_idx = 0; + // let mut current_idx = 0; if let Some(header) = bit_unpack_iterator.initial() { - if let Some((indices, patches, offset)) = &patches { - apply_patches( - header, - current_idx..header.len(), - indices, - patches.as_slice::(), - *offset, - ) - } + // if let Some((indices, patches, offset)) = &patches { + // apply_patches( + // header, + // current_idx..header.len(), + // indices, + // patches.as_slice::(), + // *offset, + // ) + // } if !compute_is_constant::<_, WIDTH>(header) { return Ok(false); } header_constant_value = Some(header[0]); - current_idx = header.len(); + // current_idx = header.len(); } let mut first_chunk_value = None; let mut chunks_iter = bit_unpack_iterator.full_chunks(); while let Some(chunk) = chunks_iter.next() { - if let Some((indices, patches, offset)) = &patches { - let chunk_len = chunk.len(); - apply_patches( - chunk, - current_idx..current_idx + chunk_len, - indices, - patches.as_slice::(), - *offset, - ) - } + // if let Some((indices, patches, offset)) = &patches { + // let chunk_len = chunk.len(); + // apply_patches( + // chunk, + // current_idx..current_idx + chunk_len, + // indices, + // patches.as_slice::(), + // *offset, + // ) + // } if !compute_is_constant::<_, WIDTH>(chunk) { return Ok(false); @@ -113,20 +100,20 @@ fn bitpacked_is_constant( first_chunk_value = Some(chunk[0]); } - current_idx += chunk.len(); + // current_idx += chunk.len(); } if let Some(trailer) = bit_unpack_iterator.trailer() { - if let Some((indices, patches, offset)) = &patches { - let chunk_len = trailer.len(); - apply_patches( - trailer, - current_idx..current_idx + chunk_len, - indices, - patches.as_slice::(), - *offset, - ) - } + // if let Some((indices, patches, offset)) = &patches { + // let chunk_len = trailer.len(); + // apply_patches( + // trailer, + // current_idx..current_idx + chunk_len, + // indices, + // patches.as_slice::(), + // *offset, + // ) + // } if !compute_is_constant::<_, WIDTH>(trailer) { return Ok(false); @@ -142,58 +129,61 @@ fn bitpacked_is_constant( Ok(true) } -fn apply_patches( - values: &mut [T], - values_range: Range, - patch_indices: &PrimitiveArray, - patch_values: &[T], - indices_offset: usize, -) { - match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| { - apply_patches_idx_typed( - values, - values_range, - patch_indices.as_slice::(), - patch_values, - indices_offset, - ) - }); -} - -fn apply_patches_idx_typed( - values: &mut [T], - values_range: Range, - patch_indices: &[I], - patch_values: &[T], - indices_offset: usize, -) { - for (i, &v) in patch_indices - .iter() - .map(|i| i.as_() - indices_offset) - .zip_eq(patch_values) - .skip_while(|(i, _)| i < &values_range.start) - .take_while(|(i, _)| i < &values_range.end) - { - values[i - values_range.start] = v - } -} +// fn apply_patches( +// values: &mut [T], +// values_range: Range, +// patch_indices: &PrimitiveArray, +// patch_values: &[T], +// indices_offset: usize, +// ) { +// match_each_unsigned_integer_ptype!(patch_indices.ptype(), |I| { +// apply_patches_idx_typed( +// values, +// values_range, +// patch_indices.as_slice::(), +// patch_values, +// indices_offset, +// ) +// }); +// } + +// fn apply_patches_idx_typed( +// values: &mut [T], +// values_range: Range, +// patch_indices: &[I], +// patch_values: &[T], +// indices_offset: usize, +// ) { +// for (i, &v) in patch_indices +// .iter() +// .map(|i| i.as_() - indices_offset) +// .zip_eq(patch_values) +// .skip_while(|(i, _)| i < &values_range.start) +// .take_while(|(i, _)| i < &values_range.end) +// { +// values[i - values_range.start] = v +// } +// } #[cfg(test)] mod tests { - use vortex_array::IntoArray; use vortex_array::LEGACY_SESSION; use vortex_array::VortexSessionExecute; use vortex_array::aggregate_fn::fns::is_constant::is_constant; - use vortex_buffer::buffer; + use vortex_array::arrays::PrimitiveArray; use vortex_error::VortexResult; - use crate::BitPackedArray; + use crate::bitpack_compress::BitPackedEncoder; #[test] fn is_constant_with_patches() -> VortexResult<()> { - let array = BitPackedArray::encode(&buffer![4; 1025].into_array(), 2)?; + let parray = PrimitiveArray::from_iter([4; 1025]); + let array = BitPackedEncoder::new(&parray) + .with_bit_width(2) + .pack()? + .into_array()?; let mut ctx = LEGACY_SESSION.create_execution_ctx(); - assert!(is_constant(&array.into_array(), &mut ctx)?); + assert!(is_constant(&array, &mut ctx)?); Ok(()) } } diff --git a/encodings/fastlanes/src/bitpacking/compute/mod.rs b/encodings/fastlanes/src/bitpacking/compute/mod.rs index f17054fc081..5923f80d45f 100644 --- a/encodings/fastlanes/src/bitpacking/compute/mod.rs +++ b/encodings/fastlanes/src/bitpacking/compute/mod.rs @@ -47,9 +47,17 @@ mod tests { use vortex_array::compute::conformance::consistency::test_array_consistency; use crate::BitPackedArray; - use crate::bitpack_compress::bitpack_encode; + use crate::bitpack_compress::BitPackedEncoder; use crate::bitpacking::compute::chunked_indices; + fn encode(array: &PrimitiveArray, bit_width: u8) -> BitPackedArray { + BitPackedEncoder::new(array) + .with_bit_width(bit_width) + .pack() + .unwrap() + .into_packed() + } + #[test] fn chunk_indices_repeated() { let mut called = false; @@ -63,35 +71,35 @@ mod tests { #[rstest] // Basic integer arrays that can be bitpacked - #[case::u8_small(bitpack_encode(&PrimitiveArray::from_iter([1u8, 2, 3, 4, 5]), 3, None).unwrap())] - #[case::u16_array(bitpack_encode(&PrimitiveArray::from_iter([10u16, 20, 30, 40, 50]), 6, None).unwrap())] - #[case::u32_array(bitpack_encode(&PrimitiveArray::from_iter([100u32, 200, 300, 400, 500]), 9, None).unwrap())] + #[case::u8_small(encode(&PrimitiveArray::from_iter([1u8, 2, 3, 4, 5]), 3))] + #[case::u16_array(encode(&PrimitiveArray::from_iter([10u16, 20, 30, 40, 50]), 6))] + #[case::u32_array(encode(&PrimitiveArray::from_iter([100u32, 200, 300, 400, 500]), 9))] // Arrays with nulls - #[case::nullable_u8(bitpack_encode(&PrimitiveArray::from_option_iter([Some(1u8), None, Some(3), Some(4), None]), 3, None).unwrap())] - #[case::nullable_u32(bitpack_encode(&PrimitiveArray::from_option_iter([Some(100u32), None, Some(300), Some(400), None]), 9, None).unwrap())] + #[case::nullable_u8(encode(&PrimitiveArray::from_option_iter([Some(1u8), None, Some(3), Some(4), None]), 3))] + #[case::nullable_u32(encode(&PrimitiveArray::from_option_iter([Some(100u32), None, Some(300), Some(400), None]), 9))] // Edge cases - #[case::single_element(bitpack_encode(&PrimitiveArray::from_iter([42u32]), 6, None).unwrap())] - #[case::all_zeros(bitpack_encode(&PrimitiveArray::from_iter([0u16; 100]), 1, None).unwrap())] + #[case::single_element(encode(&PrimitiveArray::from_iter([42u32]), 6))] + #[case::all_zeros(encode(&PrimitiveArray::from_iter([0u16; 100]), 1))] // Large arrays (multiple chunks - fastlanes uses 1024-element chunks) - #[case::large_u16(bitpack_encode(&PrimitiveArray::from_iter((0..2048).map(|i| (i % 256) as u16)), 8, None).unwrap())] - #[case::large_u32(bitpack_encode(&PrimitiveArray::from_iter((0..3000).map(|i| (i % 1024) as u32)), 10, None).unwrap())] - #[case::large_u8_many_chunks(bitpack_encode(&PrimitiveArray::from_iter((0..5120).map(|i| (i % 128) as u8)), 7, None).unwrap())] // 5 chunks - #[case::large_nullable(bitpack_encode(&PrimitiveArray::from_option_iter((0..2500).map(|i| if i % 10 == 0 { None } else { Some((i % 512) as u16) })), 9, None).unwrap())] + #[case::large_u16(encode(&PrimitiveArray::from_iter((0..2048).map(|i| (i % 256) as u16)), 8))] + #[case::large_u32(encode(&PrimitiveArray::from_iter((0..3000).map(|i| (i % 1024) as u32)), 10))] + #[case::large_u8_many_chunks(encode(&PrimitiveArray::from_iter((0..5120).map(|i| (i % 128) as u8)), 7))] // 5 chunks + #[case::large_nullable(encode(&PrimitiveArray::from_option_iter((0..2500).map(|i| if i % 10 == 0 { None } else { Some((i % 512) as u16) })), 9))] // Arrays with specific bit patterns - #[case::max_value_for_bits(bitpack_encode(&PrimitiveArray::from_iter([7u8, 7, 7, 7, 7]), 3, None).unwrap())] // max value for 3 bits - #[case::alternating_bits(bitpack_encode(&PrimitiveArray::from_iter([0u16, 255, 0, 255, 0, 255]), 8, None).unwrap())] + #[case::max_value_for_bits(encode(&PrimitiveArray::from_iter([7u8, 7, 7, 7, 7]), 3))] // max value for 3 bits + #[case::alternating_bits(encode(&PrimitiveArray::from_iter([0u16, 255, 0, 255, 0, 255]), 8))] fn test_bitpacked_consistency(#[case] array: BitPackedArray) { test_array_consistency(&array.into_array()); } #[rstest] - #[case::u8_basic(bitpack_encode(&PrimitiveArray::from_iter([1u8, 2, 3, 4, 5]), 3, None).unwrap())] - #[case::u16_basic(bitpack_encode(&PrimitiveArray::from_iter([10u16, 20, 30, 40, 50]), 6, None).unwrap())] - #[case::u32_basic(bitpack_encode(&PrimitiveArray::from_iter([100u32, 200, 300, 400, 500]), 9, None).unwrap())] - #[case::u64_basic(bitpack_encode(&PrimitiveArray::from_iter([1000u64, 2000, 3000, 4000, 5000]), 13, None).unwrap())] - #[case::i32_basic(bitpack_encode(&PrimitiveArray::from_iter([10i32, 20, 30, 40, 50]), 7, None).unwrap())] - #[case::large_u32(bitpack_encode(&PrimitiveArray::from_iter((0..100).map(|i| i as u32)), 7, None).unwrap())] + #[case::u8_basic(encode(&PrimitiveArray::from_iter([1u8, 2, 3, 4, 5]), 3))] + #[case::u16_basic(encode(&PrimitiveArray::from_iter([10u16, 20, 30, 40, 50]), 6))] + #[case::u32_basic(encode(&PrimitiveArray::from_iter([100u32, 200, 300, 400, 500]), 9))] + #[case::u64_basic(encode(&PrimitiveArray::from_iter([1000u64, 2000, 3000, 4000, 5000]), 13))] + #[case::i32_basic(encode(&PrimitiveArray::from_iter([10i32, 20, 30, 40, 50]), 7))] + #[case::large_u32(encode(&PrimitiveArray::from_iter((0..100).map(|i| i as u32)), 7))] fn test_bitpacked_binary_numeric(#[case] array: BitPackedArray) { test_binary_numeric_array(array.into_array()); } diff --git a/encodings/fastlanes/src/bitpacking/compute/slice.rs b/encodings/fastlanes/src/bitpacking/compute/slice.rs index a66b05d4736..9ef9d6930a6 100644 --- a/encodings/fastlanes/src/bitpacking/compute/slice.rs +++ b/encodings/fastlanes/src/bitpacking/compute/slice.rs @@ -35,11 +35,6 @@ impl SliceKernel for BitPacked { array.packed().slice(encoded_start..encoded_stop), array.dtype().clone(), array.validity()?.slice(range.clone())?, - array - .patches() - .map(|p| p.slice(range.clone())) - .transpose()? - .flatten(), array.bit_width(), range.len(), offset as u16, @@ -64,7 +59,7 @@ mod tests { use vortex_session::VortexSession; use crate::BitPacked; - use crate::bitpack_compress::bitpack_encode; + use crate::bitpack_compress::BitPackedEncoder; static SESSION: LazyLock = LazyLock::new(|| VortexSession::empty().with::()); @@ -72,7 +67,10 @@ mod tests { #[test] fn test_execute_parent_returns_bitpacked_slice() -> VortexResult<()> { let values = PrimitiveArray::from_iter(0u32..2048); - let bitpacked = bitpack_encode(&values, 11, None)?; + let bitpacked = BitPackedEncoder::new(&values) + .with_bit_width(11) + .pack()? + .into_packed(); let slice_array = SliceArray::new(bitpacked.clone().into_array(), 500..1500); diff --git a/encodings/fastlanes/src/bitpacking/compute/take.rs b/encodings/fastlanes/src/bitpacking/compute/take.rs index 4405645ace3..a1497ede094 100644 --- a/encodings/fastlanes/src/bitpacking/compute/take.rs +++ b/encodings/fastlanes/src/bitpacking/compute/take.rs @@ -55,7 +55,7 @@ impl TakeExecute for BitPacked { let indices = indices.clone().execute::(ctx)?; let taken = match_each_unsigned_integer_ptype!(ptype.to_unsigned(), |T| { match_each_integer_ptype!(indices.ptype(), |I| { - take_primitive::(array, &indices, taken_validity, ctx)? + take_primitive::(array, &indices, taken_validity)? }) }); Ok(Some(taken.reinterpret_cast(ptype).into_array())) @@ -66,7 +66,6 @@ fn take_primitive( array: &BitPackedArray, indices: &PrimitiveArray, taken_validity: Validity, - ctx: &mut ExecutionCtx, ) -> VortexResult { if indices.is_empty() { return Ok(PrimitiveArray::new(Buffer::::empty(), taken_validity)); @@ -134,12 +133,6 @@ fn take_primitive( if array.ptype().is_signed_int() { unpatched_taken = unpatched_taken.reinterpret_cast(array.ptype()); } - if let Some(patches) = array.patches() - && let Some(patches) = patches.take(&indices.clone().into_array(), ctx)? - { - let cast_patches = patches.cast_values(unpatched_taken.dtype())?; - return unpatched_taken.patch(&cast_patches, ctx); - } Ok(unpatched_taken) } @@ -153,17 +146,14 @@ mod test { use rstest::rstest; use vortex_array::DynArray; use vortex_array::IntoArray; - use vortex_array::LEGACY_SESSION; use vortex_array::ToCanonical; - use vortex_array::VortexSessionExecute; use vortex_array::arrays::PrimitiveArray; use vortex_array::assert_arrays_eq; use vortex_array::validity::Validity; use vortex_buffer::Buffer; use vortex_buffer::buffer; - use crate::BitPackedArray; - use crate::bitpacking::compute::take::take_primitive; + use crate::bitpack_compress::BitPackedEncoder; #[test] fn take_indices() { @@ -171,7 +161,11 @@ mod test { // Create a u8 array modulo 63. let unpacked = PrimitiveArray::from_iter((0..4096).map(|i| (i % 63) as u8)); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 6).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(6) + .pack() + .unwrap() + .into_packed(); let primitive_result = bitpacked.take(indices.to_array()).unwrap(); assert_arrays_eq!( @@ -182,8 +176,13 @@ mod test { #[test] fn take_with_patches() { - let unpacked = Buffer::from_iter(0u32..1024).into_array(); - let bitpacked = BitPackedArray::encode(&unpacked, 2).unwrap(); + let unpacked = PrimitiveArray::from_iter(0u32..1024); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(2) + .pack() + .unwrap() + .into_array() + .unwrap(); let indices = buffer![0, 2, 4, 6].into_array(); @@ -197,7 +196,11 @@ mod test { // Create a u8 array modulo 63. let unpacked = PrimitiveArray::from_iter((0..4096).map(|i| (i % 63) as u8)); - let bitpacked = BitPackedArray::encode(&unpacked.into_array(), 6).unwrap(); + let bitpacked = BitPackedEncoder::new(&unpacked) + .with_bit_width(6) + .pack() + .unwrap() + .into_packed(); let sliced = bitpacked.slice(128..2050).unwrap(); let primitive_result = sliced.take(indices.to_array()).unwrap(); @@ -210,8 +213,12 @@ mod test { let num_patches: usize = 128; let values = (0..u16::MAX as u32 + num_patches as u32).collect::>(); let uncompressed = PrimitiveArray::new(values.clone(), Validity::NonNullable); - let packed = BitPackedArray::encode(&uncompressed.into_array(), 16).unwrap(); - assert!(packed.patches().is_some()); + let packed_result = BitPackedEncoder::new(&uncompressed) + .with_bit_width(16) + .pack() + .unwrap(); + assert!(packed_result.has_patches()); + let packed = packed_result.into_array().unwrap(); let rng = rng(); let range = Uniform::new(0, values.len()).unwrap(); @@ -239,23 +246,30 @@ mod test { #[test] #[cfg_attr(miri, ignore)] fn take_signed_with_patches() { - let start = - BitPackedArray::encode(&buffer![1i32, 2i32, 3i32, 4i32].into_array(), 1).unwrap(); - - let taken_primitive = take_primitive::( - &start, - &PrimitiveArray::from_iter([0u64, 1, 2, 3]), - Validity::NonNullable, - &mut LEGACY_SESSION.create_execution_ctx(), - ) - .unwrap(); + let values = PrimitiveArray::from_iter([1i32, 2i32, 3i32, 4i32]); + let start = BitPackedEncoder::new(&values) + .with_bit_width(1) + .pack() + .unwrap() + .into_array() + .unwrap(); + + let taken_primitive = start + .take(buffer![0u64, 1, 2, 3].into_array()) + .unwrap() + .to_primitive(); assert_arrays_eq!(taken_primitive, PrimitiveArray::from_iter([1i32, 2, 3, 4])); } #[test] fn take_nullable_with_nullables() { - let start = - BitPackedArray::encode(&buffer![1i32, 2i32, 3i32, 4i32].into_array(), 1).unwrap(); + let values = PrimitiveArray::from_iter([1i32, 2i32, 3i32, 4i32]); + let start = BitPackedEncoder::new(&values) + .with_bit_width(1) + .pack() + .unwrap() + .into_array() + .unwrap(); let taken_primitive = start .take( @@ -269,18 +283,24 @@ mod test { assert_eq!(taken_primitive.to_primitive().invalid_count().unwrap(), 1); } + fn encode_bitpacked(parray: &PrimitiveArray, bit_width: u8) -> vortex_array::ArrayRef { + BitPackedEncoder::new(parray) + .with_bit_width(bit_width) + .pack() + .unwrap() + .into_array() + .unwrap() + } + #[rstest] - #[case(BitPackedArray::encode(&PrimitiveArray::from_iter((0..100).map(|i| (i % 63) as u8)).into_array(), 6).unwrap())] - #[case(BitPackedArray::encode(&PrimitiveArray::from_iter((0..256).map(|i| i as u32)).into_array(), 8).unwrap())] - #[case(BitPackedArray::encode(&buffer![1i32, 2, 3, 4, 5, 6, 7, 8].into_array(), 3).unwrap())] - #[case(BitPackedArray::encode( - &PrimitiveArray::from_option_iter([Some(10u16), None, Some(20), Some(30), None]).into_array(), - 5 - ).unwrap())] - #[case(BitPackedArray::encode(&buffer![42u32].into_array(), 6).unwrap())] - #[case(BitPackedArray::encode(&PrimitiveArray::from_iter((0..1024).map(|i| i as u32)).into_array(), 8).unwrap())] - fn test_take_bitpacked_conformance(#[case] bitpacked: BitPackedArray) { + #[case::u8_mod63(PrimitiveArray::from_iter((0..100).map(|i| (i % 63) as u8)), 6)] + #[case::u32_256(PrimitiveArray::from_iter((0..256).map(|i| i as u32)), 8)] + #[case::i32_small(PrimitiveArray::from_iter([1i32, 2, 3, 4, 5, 6, 7, 8]), 3)] + #[case::u16_nullable(PrimitiveArray::from_option_iter([Some(10u16), None, Some(20), Some(30), None]), 5)] + #[case::u32_single(PrimitiveArray::from_iter([42u32]), 6)] + #[case::u32_1024(PrimitiveArray::from_iter((0..1024).map(|i| i as u32)), 8)] + fn test_take_bitpacked_conformance(#[case] parray: PrimitiveArray, #[case] bit_width: u8) { use vortex_array::compute::conformance::take::test_take_conformance; - test_take_conformance(&bitpacked.into_array()); + test_take_conformance(&encode_bitpacked(&parray, bit_width)); } } diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index b748423c79f..6d4ad0b91d5 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -29,9 +29,6 @@ use vortex_array::vtable; use vortex_array::vtable::ArrayId; use vortex_array::vtable::VTable; use vortex_array::vtable::ValidityVTableFromValidityHelper; -use vortex_array::vtable::patches_child; -use vortex_array::vtable::patches_child_name; -use vortex_array::vtable::patches_nchildren; use vortex_array::vtable::validity_nchildren; use vortex_array::vtable::validity_to_child; use vortex_error::VortexExpect; @@ -103,7 +100,6 @@ impl VTable for BitPacked { array.dtype.hash(state); array.bit_width.hash(state); array.packed.array_hash(state, precision); - array.patches.array_hash(state, precision); array.validity.array_hash(state, precision); } @@ -113,7 +109,6 @@ impl VTable for BitPacked { && array.dtype == other.dtype && array.bit_width == other.bit_width && array.packed.array_eq(&other.packed, precision) - && array.patches.array_eq(&other.patches, precision) && array.validity.array_eq(&other.validity, precision) } @@ -136,19 +131,11 @@ impl VTable for BitPacked { } fn nchildren(array: &BitPackedArray) -> usize { - array.patches().map_or(0, patches_nchildren) + validity_nchildren(&array.validity) + validity_nchildren(&array.validity) } fn child(array: &BitPackedArray, idx: usize) -> ArrayRef { - let pc = array.patches().map_or(0, patches_nchildren); - if idx < pc { - patches_child( - array - .patches() - .vortex_expect("BitPackedArray child index out of bounds"), - idx, - ) - } else if idx < pc + validity_nchildren(&array.validity) { + if idx < validity_nchildren(&array.validity) { validity_to_child(&array.validity, array.len) .vortex_expect("BitPackedArray child index out of bounds") } else { @@ -156,13 +143,8 @@ impl VTable for BitPacked { } } - fn child_name(array: &BitPackedArray, idx: usize) -> String { - let pc = array.patches().map_or(0, patches_nchildren); - if idx < pc { - patches_child_name(idx).to_string() - } else { - "validity".to_string() - } + fn child_name(_array: &BitPackedArray, _index: usize) -> String { + "validity".to_string() } fn reduce_parent( @@ -173,62 +155,19 @@ impl VTable for BitPacked { RULES.evaluate(array, parent, child_idx) } - fn with_children(array: &mut Self::Array, children: Vec) -> VortexResult<()> { - // Children: patches (if present): indices, values, chunk_offsets; then validity (if present) - let patches_info = array - .patches() - .map(|p| (p.offset(), p.chunk_offsets().is_some())); - - let mut child_idx = 0; - let patches = if let Some((patch_offset, has_chunk_offsets)) = patches_info { - let patch_indices = children - .get(child_idx) - .ok_or_else(|| vortex_err!("Expected patch_indices child at index {}", child_idx))? - .clone(); - child_idx += 1; - - let patch_values = children - .get(child_idx) - .ok_or_else(|| vortex_err!("Expected patch_values child at index {}", child_idx))? - .clone(); - child_idx += 1; - - let patch_chunk_offsets = if has_chunk_offsets { - let offsets = children - .get(child_idx) - .ok_or_else(|| { - vortex_err!("Expected patch_chunk_offsets child at index {}", child_idx) - })? - .clone(); - child_idx += 1; - Some(offsets) - } else { - None - }; - - Some(Patches::new( - array.len(), - patch_offset, - patch_indices, - patch_values, - patch_chunk_offsets, - )?) + fn with_children(array: &mut Self::Array, mut children: Vec) -> VortexResult<()> { + let validity = if !children.is_empty() { + Validity::Array(children.remove(0)) } else { - None + Validity::from(array.dtype().nullability()) }; - let validity = if child_idx < children.len() { - Validity::Array(children[child_idx].clone()) + let expected_children = if matches!(validity, Validity::Array(_)) { + 1 } else { - Validity::from(array.dtype().nullability()) + 0 }; - let expected_children = child_idx - + if matches!(validity, Validity::Array(_)) { - 1 - } else { - 0 - }; vortex_ensure!( children.len() == expected_children, "Expected {} children, got {}", @@ -236,7 +175,6 @@ impl VTable for BitPacked { children.len() ); - array.patches = patches; array.validity = validity; Ok(()) @@ -246,10 +184,7 @@ impl VTable for BitPacked { Ok(ProstMetadata(BitPackedMetadata { bit_width: array.bit_width() as u32, offset: array.offset() as u32, - patches: array - .patches() - .map(|p| p.to_metadata(array.len(), array.dtype())) - .transpose()?, + patches: None, })) } @@ -313,7 +248,6 @@ impl VTable for BitPacked { packed, PType::try_from(dtype)?, validity, - None, u8::try_from(metadata.bit_width).map_err(|_| { vortex_err!( "BitPackedMetadata bit_width {} does not fit in u8", @@ -350,7 +284,7 @@ impl VTable for BitPacked { fn append_to_builder( array: &BitPackedArray, builder: &mut dyn ArrayBuilder, - ctx: &mut ExecutionCtx, + _ctx: &mut ExecutionCtx, ) -> VortexResult<()> { match_each_integer_ptype!(array.ptype(), |T| { unpack_into_primitive_builder::( @@ -359,15 +293,12 @@ impl VTable for BitPacked { .as_any_mut() .downcast_mut() .vortex_expect("bit packed array must canonicalize into a primitive array"), - ctx, ) }) } - fn execute(array: Arc, ctx: &mut ExecutionCtx) -> VortexResult { - Ok(ExecutionResult::done( - unpack_array(&array, ctx)?.into_array(), - )) + fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { + Ok(ExecutionResult::done(unpack_array(array)?.into_array())) } fn execute_parent( diff --git a/encodings/fastlanes/src/bitpacking/vtable/operations.rs b/encodings/fastlanes/src/bitpacking/vtable/operations.rs index da791e11f2a..c0fadafaf32 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/operations.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/operations.rs @@ -16,15 +16,7 @@ impl OperationsVTable for BitPacked { index: usize, _ctx: &mut ExecutionCtx, ) -> VortexResult { - Ok( - if let Some(patches) = array.patches() - && let Some(patch) = patches.get_patched(index)? - { - patch - } else { - bitpack_decompress::unpack_single(array, index) - }, - ) + Ok(bitpack_decompress::unpack_single(array, index)) } } @@ -40,22 +32,14 @@ mod test { use vortex_array::arrays::SliceArray; use vortex_array::assert_arrays_eq; use vortex_array::assert_nth_scalar; - use vortex_array::buffer::BufferHandle; - use vortex_array::dtype::DType; - use vortex_array::dtype::Nullability; - use vortex_array::dtype::PType; - use vortex_array::patches::Patches; - use vortex_array::scalar::Scalar; use vortex_array::session::ArraySession; - use vortex_array::validity::Validity; use vortex_array::vtable::VTable; - use vortex_buffer::Alignment; use vortex_buffer::Buffer; - use vortex_buffer::ByteBuffer; use vortex_buffer::buffer; use crate::BitPacked; use crate::BitPackedArray; + use crate::bitpack_compress::BitPackedEncoder; static SESSION: LazyLock = LazyLock::new(|| vortex_session::VortexSession::empty().with::()); @@ -72,11 +56,12 @@ mod test { #[test] pub fn slice_block() { - let arr = BitPackedArray::encode( - &PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)).into_array(), - 6, - ) - .unwrap(); + let values = PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)); + let arr = BitPackedEncoder::new(&values) + .with_bit_width(6) + .pack() + .unwrap() + .into_packed(); let sliced = slice_via_kernel(&arr, 1024..2048); assert_nth_scalar!(sliced, 0, 1024u32 % 64); assert_nth_scalar!(sliced, 1023, 2047u32 % 64); @@ -86,11 +71,12 @@ mod test { #[test] pub fn slice_within_block() { - let arr = BitPackedArray::encode( - &PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)).into_array(), - 6, - ) - .unwrap(); + let values = PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)); + let arr = BitPackedEncoder::new(&values) + .with_bit_width(6) + .pack() + .unwrap() + .into_packed(); let sliced = slice_via_kernel(&arr, 512..1434); assert_nth_scalar!(sliced, 0, 512u32 % 64); assert_nth_scalar!(sliced, 921, 1433u32 % 64); @@ -100,11 +86,13 @@ mod test { #[test] fn slice_within_block_u8s() { - let packed = BitPackedArray::encode( - &PrimitiveArray::from_iter((0..10_000).map(|i| (i % 63) as u8)).into_array(), - 7, - ) - .unwrap(); + let values = PrimitiveArray::from_iter((0..10_000).map(|i| (i % 63) as u8)); + let packed = BitPackedEncoder::new(&values) + .with_bit_width(7) + .pack() + .unwrap() + .into_array() + .unwrap(); let compressed = packed.slice(768..9999).unwrap(); assert_nth_scalar!(compressed, 0, (768 % 63) as u8); @@ -113,11 +101,13 @@ mod test { #[test] fn slice_block_boundary_u8s() { - let packed = BitPackedArray::encode( - &PrimitiveArray::from_iter((0..10_000).map(|i| (i % 63) as u8)).into_array(), - 7, - ) - .unwrap(); + let values = PrimitiveArray::from_iter((0..10_000).map(|i| (i % 63) as u8)); + let packed = BitPackedEncoder::new(&values) + .with_bit_width(7) + .pack() + .unwrap() + .into_array() + .unwrap(); let compressed = packed.slice(7168..9216).unwrap(); assert_nth_scalar!(compressed, 0, (7168 % 63) as u8); @@ -126,11 +116,12 @@ mod test { #[test] fn double_slice_within_block() { - let arr = BitPackedArray::encode( - &PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)).into_array(), - 6, - ) - .unwrap(); + let values = PrimitiveArray::from_iter((0u32..2048).map(|v| v % 64)); + let arr = BitPackedEncoder::new(&values) + .with_bit_width(6) + .pack() + .unwrap() + .into_packed(); let sliced = slice_via_kernel(&arr, 512..1434); assert_nth_scalar!(sliced, 0, 512u32 % 64); assert_nth_scalar!(sliced, 921, 1433u32 % 64); @@ -143,30 +134,16 @@ mod test { assert_eq!(doubly_sliced.len(), 784); } - #[test] - fn slice_empty_patches() { - // We create an array that has 1 element that does not fit in the 6-bit range. - let array = BitPackedArray::encode(&buffer![0u32..=64].into_array(), 6).unwrap(); - - assert!(array.patches().is_some()); - - let patch_indices = array.patches().unwrap().indices().clone(); - assert_eq!(patch_indices.len(), 1); - - // Slicing drops the empty patches array. - let sliced_bp = slice_via_kernel(&array, 0..64); - assert!(sliced_bp.patches().is_none()); - } - #[test] fn take_after_slice() { // Check that our take implementation respects the offsets applied after slicing. - - let array = BitPackedArray::encode( - &PrimitiveArray::from_iter((63u32..).take(3072)).into_array(), - 6, - ) - .unwrap(); + let values = PrimitiveArray::from_iter((63u32..).take(3072)); + let array = BitPackedEncoder::new(&values) + .with_bit_width(6) + .pack() + .unwrap() + .into_array() + .unwrap(); // Slice the array. // The resulting array will still have 3 1024-element chunks. @@ -183,52 +160,31 @@ mod test { assert_eq!(taken.len(), 3); } - #[test] - fn scalar_at_invalid_patches() { - let packed_array = unsafe { - BitPackedArray::new_unchecked( - BufferHandle::new_host(ByteBuffer::copy_from_aligned( - [0u8; 128], - Alignment::of::(), - )), - DType::Primitive(PType::U32, true.into()), - Validity::AllInvalid, - Some( - Patches::new( - 8, - 0, - buffer![1u32].into_array(), - PrimitiveArray::new(buffer![999u32], Validity::AllValid).into_array(), - None, - ) - .unwrap(), - ), - 1, - 8, - 0, - ) - .into_array() - }; - assert_eq!( - packed_array.scalar_at(1).unwrap(), - Scalar::null(DType::Primitive(PType::U32, Nullability::Nullable)) - ); - } - #[test] fn scalar_at() { let values = (0u32..257).collect::>(); - let uncompressed = values.clone().into_array(); - let packed = BitPackedArray::encode(&uncompressed, 8).unwrap(); - assert!(packed.patches().is_some()); + let parray = PrimitiveArray::from_iter(values.iter().copied()); + let packed = BitPackedEncoder::new(&parray) + .with_bit_width(8) + .pack() + .unwrap(); + assert!(packed.has_patches()); - let patches = packed.patches().unwrap().indices().clone(); + let patches = packed.unwrap_patches(); + let patch_indices = patches.indices().clone(); assert_eq!( - usize::try_from(&patches.scalar_at(0).unwrap()).unwrap(), + usize::try_from(&patch_indices.scalar_at(0).unwrap()).unwrap(), 256 ); + // Re-encode to get the array for comparison + let packed2 = BitPackedEncoder::new(&parray) + .with_bit_width(8) + .pack() + .unwrap(); + let array = packed2.into_array().unwrap(); + let expected = PrimitiveArray::from_iter(values.iter().copied()); - assert_arrays_eq!(packed, expected); + assert_arrays_eq!(array, expected); } } diff --git a/encodings/fastlanes/src/for/array/for_compress.rs b/encodings/fastlanes/src/for/array/for_compress.rs index 95277505360..0a519e55b81 100644 --- a/encodings/fastlanes/src/for/array/for_compress.rs +++ b/encodings/fastlanes/src/for/array/for_compress.rs @@ -67,7 +67,7 @@ mod test { use vortex_session::VortexSession; use super::*; - use crate::BitPackedArray; + use crate::bitpack_compress::BitPackedEncoder; use crate::r#for::array::for_decompress::decompress; use crate::r#for::array::for_decompress::fused_decompress; @@ -130,7 +130,11 @@ mod test { // Create a range offset by a million. let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10)); let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7)); - let bp = BitPackedArray::encode(&array.into_array(), 3).unwrap(); + let bp = BitPackedEncoder::new(&array) + .with_bit_width(3) + .pack() + .unwrap() + .into_packed(); let compressed = FoRArray::try_new(bp.into_array(), 10u32.into()).unwrap(); assert_arrays_eq!(compressed, expect); } @@ -140,7 +144,11 @@ mod test { // Create a range offset by a million. let expect = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7 + 10)); let array = PrimitiveArray::from_iter((0u32..1024).map(|x| x % 7)); - let bp = BitPackedArray::encode(&array.into_array(), 2).unwrap(); + let bp = BitPackedEncoder::new(&array) + .with_bit_width(2) + .pack() + .unwrap() + .into_packed(); let compressed = FoRArray::try_new(bp.clone().into_array(), 10u32.into()).unwrap(); let decompressed = fused_decompress::(&compressed, &bp, &mut SESSION.create_execution_ctx())?; diff --git a/encodings/fastlanes/src/for/array/for_decompress.rs b/encodings/fastlanes/src/for/array/for_decompress.rs index bffca15840b..24e70f8b0a0 100644 --- a/encodings/fastlanes/src/for/array/for_decompress.rs +++ b/encodings/fastlanes/src/for/array/for_decompress.rs @@ -20,7 +20,6 @@ use vortex_error::VortexResult; use crate::BitPacked; use crate::BitPackedArray; use crate::FoRArray; -use crate::bitpack_decompress; use crate::unpack_iter::UnpackStrategy; use crate::unpack_iter::UnpackedChunks; @@ -82,7 +81,7 @@ pub(crate) fn fused_decompress< >( for_: &FoRArray, bp: &BitPackedArray, - ctx: &mut ExecutionCtx, + _ctx: &mut ExecutionCtx, ) -> VortexResult { let ref_ = for_ .reference_scalar() @@ -117,14 +116,15 @@ pub(crate) fn fused_decompress< // Decode all chunks (initial, full, and trailer) in one call. unpacked.decode_into(uninit_slice); - if let Some(patches) = bp.patches() { - bitpack_decompress::apply_patches_to_uninit_range_fn( - &mut uninit_range, - patches, - ctx, - |v| v.wrapping_add(&ref_), - )?; - }; + // TODO(aduffy): make sure we do Patched(FOR(BP)) instead of FOR(Patched(BP)) + // if let Some(patches) = bp.patches() { + // bitpack_decompress::apply_patches_to_uninit_range_fn( + // &mut uninit_range, + // patches, + // ctx, + // |v| v.wrapping_add(&ref_), + // )?; + // }; // SAFETY: We have set a correct validity mask via `append_mask` with `array.len()` values and // initialized the same number of values needed via `decode_into`. diff --git a/vortex-btrblocks/src/compressor/integer/mod.rs b/vortex-btrblocks/src/compressor/integer/mod.rs index 58ee4f62e76..cb44fe493e6 100644 --- a/vortex-btrblocks/src/compressor/integer/mod.rs +++ b/vortex-btrblocks/src/compressor/integer/mod.rs @@ -26,8 +26,8 @@ use vortex_error::VortexResult; use vortex_error::vortex_bail; use vortex_error::vortex_err; use vortex_fastlanes::FoRArray; +use vortex_fastlanes::bitpack_compress::BitPackedEncoder; use vortex_fastlanes::bitpack_compress::bit_width_histogram; -use vortex_fastlanes::bitpack_compress::bitpack_encode; use vortex_fastlanes::bitpack_compress::find_best_bit_width; use vortex_runend::RunEndArray; use vortex_runend::compress::runend_encode; @@ -521,12 +521,13 @@ impl Scheme for BitPackingScheme { if bw as usize == stats.source().ptype().bit_width() { return Ok(stats.source().clone().into_array()); } - let mut packed = bitpack_encode(stats.source(), bw, Some(&histogram))?; - let patches = packed.patches().map(compress_patches).transpose()?; - packed.replace_patches(patches); - - Ok(packed.into_array()) + BitPackedEncoder::new(stats.source()) + .with_bit_width(bw) + .with_histogram(&histogram) + .pack()? + .map_patches(|p| compress_patches(&p))? + .into_array() } } diff --git a/vortex-cuda/benches/bitpacked_cuda.rs b/vortex-cuda/benches/bitpacked_cuda.rs index 44c911f545c..2bce5edbc7d 100644 --- a/vortex-cuda/benches/bitpacked_cuda.rs +++ b/vortex-cuda/benches/bitpacked_cuda.rs @@ -24,6 +24,7 @@ use vortex::array::validity::Validity::NonNullable; use vortex::buffer::Buffer; use vortex::dtype::NativePType; use vortex::encodings::fastlanes::BitPackedArray; +use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::encodings::fastlanes::unpack_iter::BitPacked; use vortex::error::VortexExpect; use vortex::session::VortexSession; @@ -56,8 +57,13 @@ where .collect(); let primitive_array = PrimitiveArray::new(Buffer::from(values), NonNullable); - BitPackedArray::encode(&primitive_array.into_array(), bit_width) - .vortex_expect("failed to create BitPacked array") + BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack() + .unwrap() + // TODO(aduffy): THIS WILL FAIL. I just need to get this to compile then come back + // and fix this. + .unwrap_unpatched() } /// Create a bit-packed array with the given bit width and patch frequency. @@ -95,9 +101,13 @@ where }) .collect(); - let primitive_array = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); - BitPackedArray::encode(&primitive_array, bit_width) - .vortex_expect("failed to create BitPacked array with patches") + let primitive_array = PrimitiveArray::from_iter(values); + BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack() + .unwrap() + // TODO(aduffy): THIS WILL FAIL. Need to come back and fix this + .unwrap_unpatched() } /// Generic benchmark function for a specific type and bit width diff --git a/vortex-cuda/benches/dynamic_dispatch_cuda.rs b/vortex-cuda/benches/dynamic_dispatch_cuda.rs index bf6fada88ab..4841259debd 100644 --- a/vortex-cuda/benches/dynamic_dispatch_cuda.rs +++ b/vortex-cuda/benches/dynamic_dispatch_cuda.rs @@ -165,97 +165,97 @@ impl BenchRunner { } } -// --------------------------------------------------------------------------- -// Benchmark: FoR(BitPacked) -// --------------------------------------------------------------------------- -fn bench_for_bitpacked(c: &mut Criterion) { - let mut group = c.benchmark_group("for_bitpacked_6bw"); - group.sample_size(10); - - let bit_width: u8 = 6; - let reference = 100_000u32; - - for (len, len_str) in BENCH_ARGS { - group.throughput(Throughput::Bytes((len * size_of::()) as u64)); - - // FoR(BitPacked): residuals 0..max_val, reference adds 100_000 - let max_val = (1u64 << bit_width).saturating_sub(1); - let residuals: Vec = (0..*len) - .map(|i| (i as u64 % (max_val + 1)) as u32) - .collect(); - let prim = PrimitiveArray::new(Buffer::from(residuals), NonNullable); - let bp = BitPackedArray::encode(&prim.into_array(), bit_width).vortex_expect("bitpack"); - let for_arr = - FoRArray::try_new(bp.into_array(), Scalar::from(reference)).vortex_expect("for"); - let array = for_arr.into_array(); - - group.bench_with_input( - BenchmarkId::new("dynamic_dispatch_u32", len_str), - len, - |b, &n| { - let mut cuda_ctx = - CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); - - let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); - - b.iter_custom(|iters| { - let mut total_time = Duration::ZERO; - for _ in 0..iters { - total_time += bench_runner.run(&mut cuda_ctx); - } - total_time - }); - }, - ); - } - - group.finish(); -} - -// --------------------------------------------------------------------------- -// Benchmark: Dict(codes=BitPacked, values=Primitive) -// --------------------------------------------------------------------------- -fn bench_dict_bp_codes(c: &mut Criterion) { - let mut group = c.benchmark_group("dict_256vals_bp8bw_codes"); - group.sample_size(10); - - let dict_size: usize = 256; - let dict_bit_width: u8 = 8; - let dict_values: Vec = (0..dict_size as u32).map(|i| i * 1000 + 42).collect(); - - for (len, len_str) in BENCH_ARGS { - group.throughput(Throughput::Bytes((len * size_of::()) as u64)); - - let codes: Vec = (0..*len).map(|i| (i % dict_size) as u32).collect(); - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), dict_bit_width) - .vortex_expect("bitpack codes"); - let values_prim = PrimitiveArray::new(Buffer::from(dict_values.clone()), NonNullable); - let dict = DictArray::new(codes_bp.into_array(), values_prim.into_array()); - let array = dict.into_array(); - - group.bench_with_input( - BenchmarkId::new("dynamic_dispatch_u32", len_str), - len, - |b, &n| { - let mut cuda_ctx = - CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); - - let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); - - b.iter_custom(|iters| { - let mut total_time = Duration::ZERO; - for _ in 0..iters { - total_time += bench_runner.run(&mut cuda_ctx); - } - total_time - }); - }, - ); - } - - group.finish(); -} +// // --------------------------------------------------------------------------- +// // Benchmark: FoR(BitPacked) +// // --------------------------------------------------------------------------- +// fn bench_for_bitpacked(c: &mut Criterion) { +// let mut group = c.benchmark_group("for_bitpacked_6bw"); +// group.sample_size(10); +// +// let bit_width: u8 = 6; +// let reference = 100_000u32; +// +// for (len, len_str) in BENCH_ARGS { +// group.throughput(Throughput::Bytes((len * size_of::()) as u64)); +// +// // FoR(BitPacked): residuals 0..max_val, reference adds 100_000 +// let max_val = (1u64 << bit_width).saturating_sub(1); +// let residuals: Vec = (0..*len) +// .map(|i| (i as u64 % (max_val + 1)) as u32) +// .collect(); +// let prim = PrimitiveArray::new(Buffer::from(residuals), NonNullable); +// let bp = BitPackedArray::encode(&prim.into_array(), bit_width).vortex_expect("bitpack"); +// let for_arr = +// FoRArray::try_new(bp.into_array(), Scalar::from(reference)).vortex_expect("for"); +// let array = for_arr.into_array(); +// +// group.bench_with_input( +// BenchmarkId::new("dynamic_dispatch_u32", len_str), +// len, +// |b, &n| { +// let mut cuda_ctx = +// CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); +// +// let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); +// +// b.iter_custom(|iters| { +// let mut total_time = Duration::ZERO; +// for _ in 0..iters { +// total_time += bench_runner.run(&mut cuda_ctx); +// } +// total_time +// }); +// }, +// ); +// } +// +// group.finish(); +// } + +// // --------------------------------------------------------------------------- +// // Benchmark: Dict(codes=BitPacked, values=Primitive) +// // --------------------------------------------------------------------------- +// fn bench_dict_bp_codes(c: &mut Criterion) { +// let mut group = c.benchmark_group("dict_256vals_bp8bw_codes"); +// group.sample_size(10); +// +// let dict_size: usize = 256; +// let dict_bit_width: u8 = 8; +// let dict_values: Vec = (0..dict_size as u32).map(|i| i * 1000 + 42).collect(); +// +// for (len, len_str) in BENCH_ARGS { +// group.throughput(Throughput::Bytes((len * size_of::()) as u64)); +// +// let codes: Vec = (0..*len).map(|i| (i % dict_size) as u32).collect(); +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), dict_bit_width) +// .vortex_expect("bitpack codes"); +// let values_prim = PrimitiveArray::new(Buffer::from(dict_values.clone()), NonNullable); +// let dict = DictArray::new(codes_bp.into_array(), values_prim.into_array()); +// let array = dict.into_array(); +// +// group.bench_with_input( +// BenchmarkId::new("dynamic_dispatch_u32", len_str), +// len, +// |b, &n| { +// let mut cuda_ctx = +// CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); +// +// let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); +// +// b.iter_custom(|iters| { +// let mut total_time = Duration::ZERO; +// for _ in 0..iters { +// total_time += bench_runner.run(&mut cuda_ctx); +// } +// total_time +// }); +// }, +// ); +// } +// +// group.finish(); +// } // --------------------------------------------------------------------------- // Benchmark: RunEnd(ends=Prim, values=Prim) @@ -301,124 +301,124 @@ fn bench_runend(c: &mut Criterion) { group.finish(); } -// --------------------------------------------------------------------------- -// Benchmark: Dict(codes=BitPacked, values=FoR(BitPacked)) -// --------------------------------------------------------------------------- -fn bench_dict_bp_codes_bp_for_values(c: &mut Criterion) { - let mut group = c.benchmark_group("dict_64vals_bp6bw_codes_for_bp6bw_values"); - group.sample_size(10); - - let dict_size: usize = 64; - let dict_bit_width: u8 = 6; - let dict_reference = 1_000_000u32; - let codes_bit_width: u8 = 6; - - // Dict values: residuals 0..63 bitpacked, FoR adds 1_000_000 - let dict_residuals: Vec = (0..dict_size as u32).collect(); - let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); - let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), dict_bit_width) - .vortex_expect("bitpack dict"); - let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference)) - .vortex_expect("for dict"); - - for (len, len_str) in BENCH_ARGS { - group.throughput(Throughput::Bytes((len * size_of::()) as u64)); - - let codes: Vec = (0..*len).map(|i| (i % dict_size) as u32).collect(); - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), codes_bit_width) - .vortex_expect("bitpack codes"); - - let dict = DictArray::new(codes_bp.into_array(), dict_for.clone().into_array()); - let array = dict.into_array(); - - group.bench_with_input( - BenchmarkId::new("dynamic_dispatch_u32", len_str), - len, - |b, &n| { - let mut cuda_ctx = - CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); - - let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); - - b.iter_custom(|iters| { - let mut total_time = Duration::ZERO; - for _ in 0..iters { - total_time += bench_runner.run(&mut cuda_ctx); - } - total_time - }); - }, - ); - } - - group.finish(); -} - -// --------------------------------------------------------------------------- -// Benchmark: ALP(FoR(BitPacked)) for f32 -// --------------------------------------------------------------------------- -fn bench_alp_for_bitpacked(c: &mut Criterion) { - let mut group = c.benchmark_group("alp_for_bp_6bw_f32"); - group.sample_size(10); - - let exponents = Exponents { e: 2, f: 0 }; - let bit_width: u8 = 6; - - for (len, len_str) in BENCH_ARGS { - group.throughput(Throughput::Bytes((len * size_of::()) as u64)); - - // Generate f32 values that ALP-encode without patches. - let floats: Vec = (0..*len) - .map(|i| ::decode_single(10 + (i as i32 % 64), exponents)) - .collect(); - let float_prim = PrimitiveArray::new(Buffer::from(floats), NonNullable); - - // Encode: ALP → FoR → BitPacked - let alp = alp_encode(&float_prim, Some(exponents)).vortex_expect("alp_encode"); - assert!(alp.patches().is_none()); - let for_arr = FoRArray::encode(alp.encoded().to_primitive()).vortex_expect("for encode"); - let bp = - BitPackedArray::encode(for_arr.encoded(), bit_width).vortex_expect("bitpack encode"); - - let tree = ALPArray::new( - FoRArray::try_new(bp.into_array(), for_arr.reference_scalar().clone()) - .vortex_expect("for_new") - .into_array(), - exponents, - None, - ); - let array = tree.into_array(); - - group.bench_with_input( - BenchmarkId::new("dynamic_dispatch_f32", len_str), - len, - |b, &n| { - let mut cuda_ctx = - CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); - - let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); - - b.iter_custom(|iters| { - let mut total_time = Duration::ZERO; - for _ in 0..iters { - total_time += bench_runner.run(&mut cuda_ctx); - } - total_time - }); - }, - ); - } - - group.finish(); -} +// // --------------------------------------------------------------------------- +// // Benchmark: Dict(codes=BitPacked, values=FoR(BitPacked)) +// // --------------------------------------------------------------------------- +// fn bench_dict_bp_codes_bp_for_values(c: &mut Criterion) { +// let mut group = c.benchmark_group("dict_64vals_bp6bw_codes_for_bp6bw_values"); +// group.sample_size(10); +// +// let dict_size: usize = 64; +// let dict_bit_width: u8 = 6; +// let dict_reference = 1_000_000u32; +// let codes_bit_width: u8 = 6; +// +// // Dict values: residuals 0..63 bitpacked, FoR adds 1_000_000 +// let dict_residuals: Vec = (0..dict_size as u32).collect(); +// let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); +// let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), dict_bit_width) +// .vortex_expect("bitpack dict"); +// let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference)) +// .vortex_expect("for dict"); +// +// for (len, len_str) in BENCH_ARGS { +// group.throughput(Throughput::Bytes((len * size_of::()) as u64)); +// +// let codes: Vec = (0..*len).map(|i| (i % dict_size) as u32).collect(); +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), codes_bit_width) +// .vortex_expect("bitpack codes"); +// +// let dict = DictArray::new(codes_bp.into_array(), dict_for.clone().into_array()); +// let array = dict.into_array(); +// +// group.bench_with_input( +// BenchmarkId::new("dynamic_dispatch_u32", len_str), +// len, +// |b, &n| { +// let mut cuda_ctx = +// CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); +// +// let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); +// +// b.iter_custom(|iters| { +// let mut total_time = Duration::ZERO; +// for _ in 0..iters { +// total_time += bench_runner.run(&mut cuda_ctx); +// } +// total_time +// }); +// }, +// ); +// } +// +// group.finish(); +// } + +// // --------------------------------------------------------------------------- +// // Benchmark: ALP(FoR(BitPacked)) for f32 +// // --------------------------------------------------------------------------- +// fn bench_alp_for_bitpacked(c: &mut Criterion) { +// let mut group = c.benchmark_group("alp_for_bp_6bw_f32"); +// group.sample_size(10); +// +// let exponents = Exponents { e: 2, f: 0 }; +// let bit_width: u8 = 6; +// +// for (len, len_str) in BENCH_ARGS { +// group.throughput(Throughput::Bytes((len * size_of::()) as u64)); +// +// // Generate f32 values that ALP-encode without patches. +// let floats: Vec = (0..*len) +// .map(|i| ::decode_single(10 + (i as i32 % 64), exponents)) +// .collect(); +// let float_prim = PrimitiveArray::new(Buffer::from(floats), NonNullable); +// +// // Encode: ALP → FoR → BitPacked +// let alp = alp_encode(&float_prim, Some(exponents)).vortex_expect("alp_encode"); +// assert!(alp.patches().is_none()); +// let for_arr = FoRArray::encode(alp.encoded().to_primitive()).vortex_expect("for encode"); +// let bp = +// BitPackedArray::encode(for_arr.encoded(), bit_width).vortex_expect("bitpack encode"); +// +// let tree = ALPArray::new( +// FoRArray::try_new(bp.into_array(), for_arr.reference_scalar().clone()) +// .vortex_expect("for_new") +// .into_array(), +// exponents, +// None, +// ); +// let array = tree.into_array(); +// +// group.bench_with_input( +// BenchmarkId::new("dynamic_dispatch_f32", len_str), +// len, +// |b, &n| { +// let mut cuda_ctx = +// CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); +// +// let bench_runner = BenchRunner::new(&array, n, &cuda_ctx); +// +// b.iter_custom(|iters| { +// let mut total_time = Duration::ZERO; +// for _ in 0..iters { +// total_time += bench_runner.run(&mut cuda_ctx); +// } +// total_time +// }); +// }, +// ); +// } +// +// group.finish(); +// } fn benchmark_dynamic_dispatch(c: &mut Criterion) { - bench_for_bitpacked(c); - bench_dict_bp_codes(c); + // bench_for_bitpacked(c); + // bench_dict_bp_codes(c); bench_runend(c); - bench_dict_bp_codes_bp_for_values(c); - bench_alp_for_bitpacked(c); + // bench_dict_bp_codes_bp_for_values(c); + // bench_alp_for_bitpacked(c); } criterion::criterion_group!(benches, benchmark_dynamic_dispatch); diff --git a/vortex-cuda/benches/for_cuda.rs b/vortex-cuda/benches/for_cuda.rs index 31f7b270e92..4182915bd83 100644 --- a/vortex-cuda/benches/for_cuda.rs +++ b/vortex-cuda/benches/for_cuda.rs @@ -21,12 +21,10 @@ use cudarc::driver::DeviceRepr; use futures::executor::block_on; use vortex::array::IntoArray; use vortex::array::arrays::PrimitiveArray; -use vortex::array::validity::Validity; -use vortex::buffer::Buffer; use vortex::dtype::NativePType; use vortex::dtype::PType; -use vortex::encodings::fastlanes::BitPackedArray; use vortex::encodings::fastlanes::FoRArray; +use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::error::VortexExpect; use vortex::scalar::Scalar; use vortex::session::VortexSession; @@ -51,15 +49,18 @@ where .map(|i| >::from((i % 256) as u8)) .collect(); - let primitive_array = - PrimitiveArray::new(Buffer::from(data), Validity::NonNullable).into_array(); + let primitive_array = PrimitiveArray::from_iter(data); if bp && T::PTYPE != PType::U8 { - let child = BitPackedArray::encode(&primitive_array, 8).vortex_expect("failed to bitpack"); - FoRArray::try_new(child.into_array(), reference.into()) - .vortex_expect("failed to create FoR array") + let child = BitPackedEncoder::new(&primitive_array) + .with_bit_width(8) + .pack() + .unwrap() + .into_array() + .unwrap(); + FoRArray::try_new(child, reference.into()).vortex_expect("failed to create FoR array") } else { - FoRArray::try_new(primitive_array, reference.into()) + FoRArray::try_new(primitive_array.into_array(), reference.into()) .vortex_expect("failed to create FoR array") } } diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index c31befb902b..89a179e307a 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -405,13 +405,13 @@ impl UnmaterializedPlan { offset, bit_width, packed, - patches, + // patches, .. } = bp.into_parts(); - if patches.is_some() { - vortex_bail!("Dynamic dispatch does not support BitPackedArray with patches"); - } + // if patches.is_some() { + // vortex_bail!("Dynamic dispatch does not support BitPackedArray with patches"); + // } let buf_index = self.source_buffers.len(); self.source_buffers.push(packed); diff --git a/vortex-cuda/src/kernel/encodings/bitpacked.rs b/vortex-cuda/src/kernel/encodings/bitpacked.rs index 2e98b4f0544..fd73f64eee6 100644 --- a/vortex-cuda/src/kernel/encodings/bitpacked.rs +++ b/vortex-cuda/src/kernel/encodings/bitpacked.rs @@ -29,7 +29,7 @@ use crate::CudaDeviceBuffer; use crate::executor::CudaExecute; use crate::executor::CudaExecutionCtx; use crate::kernel::patches::gpu::GPUPatches; -use crate::kernel::patches::types::transpose_patches; +use crate::kernel::patches::types::DevicePatches; /// CUDA decoder for bit-packed arrays. #[derive(Debug)] @@ -101,7 +101,6 @@ where bit_width, len, packed, - patches, validity, } = array.into_parts(); @@ -123,11 +122,13 @@ where let config = bitpacked_cuda_launch_config(output_width, len)?; // We hold this here to keep the device buffers alive. - let device_patches = if let Some(patches) = patches { - Some(transpose_patches(&patches, ctx).await?) - } else { - None - }; + // TODO(aduffy): add kernel for PatchedArray(BitPacked) so this gets fused. + let device_patches: Option = None; + // let device_patches = if let Some(patches) = patches { + // Some(transpose_patches(&patches, ctx).await?) + // } else { + // None + // }; let patches_arg = if let Some(p) = &device_patches { GPUPatches { @@ -178,8 +179,10 @@ mod tests { use vortex::array::validity::Validity::NonNullable; use vortex::array::vtable::VTable; use vortex::buffer::Buffer; + use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::error::VortexExpect; use vortex::session::VortexSession; + use vortex_array::arrays::Patched; use super::*; use crate::CanonicalCudaExt; @@ -201,8 +204,11 @@ mod tests { let array = PrimitiveArray::new(iter.collect::>(), NonNullable); // Last two items should be patched - let bp_with_patches = BitPackedArray::encode(&array.into_array(), bw)?; - assert!(bp_with_patches.patches().is_some()); + let bp_with_patches = BitPackedEncoder::new(&array) + .with_bit_width(bw) + .pack()? + .into_array()?; + assert!(bp_with_patches.is::()); let cpu_result = bp_with_patches.to_canonical()?.into_array(); @@ -232,8 +238,11 @@ mod tests { ); // Last two items should be patched - let bp_with_patches = BitPackedArray::encode(&array.into_array(), 9)?; - assert!(bp_with_patches.patches().is_some()); + let bp_with_patches = BitPackedEncoder::new(&array) + .with_bit_width(9) + .pack()? + .into_array()?; + assert!(bp_with_patches.is::()); let cpu_result = bp_with_patches.to_canonical()?.into_array(); @@ -274,8 +283,10 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) - .vortex_expect("operation should succeed in test"); + let bitpacked_array = BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack()? + .into_array()?; let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { @@ -323,8 +334,10 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) - .vortex_expect("operation should succeed in test"); + let bitpacked_array = BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack()? + .into_array()?; let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { @@ -388,8 +401,10 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) - .vortex_expect("operation should succeed in test"); + let bitpacked_array = BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack()? + .into_array()?; let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { @@ -485,8 +500,10 @@ mod tests { NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) - .vortex_expect("operation should succeed in test"); + let bitpacked_array = BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack()? + .into_array()?; let cpu_result = bitpacked_array.to_canonical()?; let gpu_result = block_on(async { BitPackedExecutor @@ -512,14 +529,15 @@ mod tests { let max_val = (1u64 << bit_width).saturating_sub(1); let primitive_array = PrimitiveArray::new( - (0u64..4096) - .map(|i| i % (max_val + 1)) - .collect::>(), + (0u64..4096).map(|i| i % max_val).collect::>(), NonNullable, ); - let bitpacked_array = BitPackedArray::encode(&primitive_array.into_array(), bit_width) - .vortex_expect("operation should succeed in test"); + let bitpacked_array = BitPackedEncoder::new(&primitive_array) + .with_bit_width(bit_width) + .pack()? + .unwrap_unpatched(); + let slice_ref = bitpacked_array.clone().into_array().slice(67..3969)?; let mut exec_ctx = ExecutionCtx::new(VortexSession::empty().with::()); let sliced_array = diff --git a/vortex-cuda/src/kernel/encodings/for_.rs b/vortex-cuda/src/kernel/encodings/for_.rs index 29e00f4ec92..82b4ebfac87 100644 --- a/vortex-cuda/src/kernel/encodings/for_.rs +++ b/vortex-cuda/src/kernel/encodings/for_.rs @@ -129,6 +129,7 @@ mod tests { use vortex::dtype::NativePType; use vortex::encodings::fastlanes::BitPackedArray; use vortex::encodings::fastlanes::FoRArray; + use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::error::VortexExpect; use vortex::scalar::Scalar; use vortex::session::VortexSession; @@ -175,12 +176,13 @@ mod tests { let mut cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty()) .vortex_expect("failed to create execution context"); - let values = (0i8..8i8) - .cycle() - .take(1024) - .collect::>() - .into_array(); - let packed = BitPackedArray::encode(&values, 3).unwrap().into_array(); + let values = PrimitiveArray::from_iter((0i8..8i8).cycle().take(1024)); + let packed = BitPackedEncoder::new(&values) + .with_bit_width(3) + .pack() + .unwrap() + .into_array() + .unwrap(); let for_array = FoRArray::try_new(packed, (-8i8).into()).unwrap(); let cpu_result = for_array.to_canonical().unwrap(); diff --git a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/bitpacked.rs b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/bitpacked.rs index facbab894f7..3392c52ccc7 100644 --- a/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/bitpacked.rs +++ b/vortex-test/compat-gen/src/fixtures/arrays/synthetic/encodings/bitpacked.rs @@ -9,7 +9,7 @@ use vortex::array::dtype::FieldNames; use vortex::array::validity::Validity; use vortex::array::vtable::ArrayId; use vortex::encodings::fastlanes::BitPacked; -use vortex::encodings::fastlanes::bitpack_compress::bitpack_encode; +use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::error::VortexResult; use super::N; @@ -79,21 +79,66 @@ impl FlatLayoutFixture for BitPackedFixture { "u16_head_tail_nulls", ]), vec![ - bitpack_encode(&u32_8bit, 8, None)?.into_array(), - bitpack_encode(&u64_12bit, 12, None)?.into_array(), - bitpack_encode(&u16_4bit, 4, None)?.into_array(), - bitpack_encode(&u16_1bit, 1, None)?.into_array(), - bitpack_encode(&u32_nullable, 7, None)?.into_array(), - bitpack_encode(&u32_all_zero, 1, None)?.into_array(), - bitpack_encode(&u16_all_equal, 3, None)?.into_array(), - bitpack_encode(&u16_15bit, 15, None)?.into_array(), - bitpack_encode(&u32_31bit, 31, None)?.into_array(), - bitpack_encode(&u64_63bit, 63, None)?.into_array(), - bitpack_encode(&u8_3bit, 3, None)?.into_array(), - bitpack_encode(&u8_5bit, 5, None)?.into_array(), - bitpack_encode(&u16_9bit, 9, None)?.into_array(), - bitpack_encode(&u32_17bit, 17, None)?.into_array(), - bitpack_encode(&u16_head_tail_nulls, 5, None)?.into_array(), + BitPackedEncoder::new(&u32_8bit) + .with_bit_width(8) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u64_12bit) + .with_bit_width(2) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u16_4bit) + .with_bit_width(4) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u16_1bit) + .with_bit_width(1) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u32_nullable) + .with_bit_width(7) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u32_all_zero) + .with_bit_width(1) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u16_all_equal) + .with_bit_width(3) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u16_15bit) + .with_bit_width(5) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u32_31bit) + .with_bit_width(1) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u64_63bit) + .with_bit_width(3) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u8_3bit) + .with_bit_width(3) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u8_5bit) + .with_bit_width(5) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u16_9bit) + .with_bit_width(9) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u32_17bit) + .with_bit_width(7) + .pack()? + .into_array()?, + BitPackedEncoder::new(&u16_head_tail_nulls) + .with_bit_width(5) + .pack()? + .into_array()?, ], N, Validity::NonNullable, diff --git a/vortex/benches/common_encoding_tree_throughput.rs b/vortex/benches/common_encoding_tree_throughput.rs index 4d88546d2df..04fe708067a 100644 --- a/vortex/benches/common_encoding_tree_throughput.rs +++ b/vortex/benches/common_encoding_tree_throughput.rs @@ -35,7 +35,6 @@ use vortex::encodings::fsst::fsst_compress; use vortex::encodings::fsst::fsst_train_compressor; use vortex::encodings::runend::RunEndArray; use vortex::extension::datetime::TimeUnit; -use vortex_fastlanes::BitPackedArray; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; @@ -61,6 +60,7 @@ fn with_byte_counter<'a, 'b>(bencher: Bencher<'a, 'b>, bytes: u64) -> Bencher<'a mod setup { use rand::rngs::StdRng; + use vortex_fastlanes::bitpack_compress::BitPackedEncoder; use super::*; @@ -88,7 +88,12 @@ mod setup { let (uint_array, ..) = setup_primitive_arrays(); let compressed = FoRArray::encode(uint_array).unwrap(); let inner = compressed.encoded(); - let bp = BitPackedArray::encode(inner, 8).unwrap(); + let bp = BitPackedEncoder::new(&inner.to_primitive()) + .with_bit_width(8) + .pack() + .unwrap() + .into_array() + .unwrap(); FoRArray::try_new(bp.into_array(), compressed.reference_scalar().clone()) .unwrap() .into_array() @@ -102,7 +107,12 @@ mod setup { // Manually construct ALP <- FoR <- BitPacked tree let for_array = FoRArray::encode(alp_compressed.encoded().to_primitive()).unwrap(); let inner = for_array.encoded(); - let bp = BitPackedArray::encode(inner, 8).unwrap(); + let bp = BitPackedEncoder::new(&inner.to_primitive()) + .with_bit_width(8) + .pack() + .unwrap() + .into_array() + .unwrap(); let for_with_bp = FoRArray::try_new(bp.into_array(), for_array.reference_scalar().clone()).unwrap(); @@ -137,9 +147,12 @@ mod setup { let codes_prim = PrimitiveArray::from_iter(codes); // Compress codes with BitPacked (6 bits should be enough for ~50 unique values) - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), 6) + let codes_bp = BitPackedEncoder::new(&codes_prim) + .with_bit_width(6) + .pack() .unwrap() - .into_array(); + .into_array() + .unwrap(); // Create values array let values_array = VarBinViewArray::from_iter_str(unique_strings).into_array(); @@ -174,7 +187,12 @@ mod setup { let ends_prim = runend.ends().to_primitive(); let ends_for = FoRArray::encode(ends_prim).unwrap(); let ends_inner = ends_for.encoded(); - let ends_bp = BitPackedArray::encode(ends_inner, 8).unwrap(); + let ends_bp = BitPackedEncoder::new(&ends_inner.to_primitive()) + .with_bit_width(8) + .pack() + .unwrap() + .into_array() + .unwrap(); let compressed_ends = FoRArray::try_new(ends_bp.into_array(), ends_for.reference_scalar().clone()) .unwrap() @@ -182,9 +200,12 @@ mod setup { // Compress the values with BitPacked let values_prim = runend.values().to_primitive(); - let compressed_values = BitPackedArray::encode(&values_prim.into_array(), 8) + let compressed_values = BitPackedEncoder::new(&values_prim) + .with_bit_width(8) + .pack() .unwrap() - .into_array(); + .into_array() + .unwrap(); RunEndArray::try_new(compressed_ends, compressed_values) .unwrap() @@ -246,7 +267,12 @@ mod setup { // Compress the VarBin offsets with BitPacked let codes = fsst.codes(); let offsets_prim = codes.offsets().to_primitive(); - let offsets_bp = BitPackedArray::encode(&offsets_prim.into_array(), 20).unwrap(); + let offsets_bp = BitPackedEncoder::new(&offsets_prim) + .with_bit_width(20) + .pack() + .unwrap() + .into_array() + .unwrap(); // Rebuild VarBin with compressed offsets let compressed_codes = VarBinArray::try_new( @@ -299,7 +325,12 @@ mod setup { let days_prim = parts.days.to_primitive(); let days_for = FoRArray::encode(days_prim).unwrap(); let days_inner = days_for.encoded(); - let days_bp = BitPackedArray::encode(days_inner, 16).unwrap(); + let days_bp = BitPackedEncoder::new(&days_inner.to_primitive()) + .with_bit_width(16) + .pack() + .unwrap() + .into_array() + .unwrap(); let compressed_days = FoRArray::try_new(days_bp.into_array(), days_for.reference_scalar().clone()) .unwrap() @@ -309,7 +340,12 @@ mod setup { let seconds_prim = parts.seconds.to_primitive(); let seconds_for = FoRArray::encode(seconds_prim).unwrap(); let seconds_inner = seconds_for.encoded(); - let seconds_bp = BitPackedArray::encode(seconds_inner, 17).unwrap(); + let seconds_bp = BitPackedEncoder::new(&seconds_inner.to_primitive()) + .with_bit_width(17) + .pack() + .unwrap() + .into_array() + .unwrap(); let compressed_seconds = FoRArray::try_new( seconds_bp.into_array(), seconds_for.reference_scalar().clone(), @@ -321,7 +357,12 @@ mod setup { let subseconds_prim = parts.subseconds.to_primitive(); let subseconds_for = FoRArray::encode(subseconds_prim).unwrap(); let subseconds_inner = subseconds_for.encoded(); - let subseconds_bp = BitPackedArray::encode(subseconds_inner, 20).unwrap(); + let subseconds_bp = BitPackedEncoder::new(&subseconds_inner.to_primitive()) + .with_bit_width(20) + .pack() + .unwrap() + .into_array() + .unwrap(); let compressed_subseconds = FoRArray::try_new( subseconds_bp.into_array(), subseconds_for.reference_scalar().clone(), From c25da2d1106f99cd8ee8c217c8a1e93df2b4ba6c Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 14:58:39 -0400 Subject: [PATCH 17/19] CUDA patched execution Signed-off-by: Andrew Duffy --- vortex-cuda/src/kernel/mod.rs | 1 + vortex-cuda/src/kernel/patched/mod.rs | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 vortex-cuda/src/kernel/patched/mod.rs diff --git a/vortex-cuda/src/kernel/mod.rs b/vortex-cuda/src/kernel/mod.rs index 93ffd768df5..92280102e89 100644 --- a/vortex-cuda/src/kernel/mod.rs +++ b/vortex-cuda/src/kernel/mod.rs @@ -24,6 +24,7 @@ use vortex::utils::aliases::dash_map::DashMap; mod arrays; mod encodings; mod filter; +mod patched; mod patches; mod slice; diff --git a/vortex-cuda/src/kernel/patched/mod.rs b/vortex-cuda/src/kernel/patched/mod.rs new file mode 100644 index 00000000000..0d735177e5d --- /dev/null +++ b/vortex-cuda/src/kernel/patched/mod.rs @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors From e32503921c70f450b754dd1ecf8f45a3ead01900 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 17:58:00 -0400 Subject: [PATCH 18/19] fixup append_to_builder Signed-off-by: Andrew Duffy --- vortex-array/src/arrays/patched/vtable/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/vortex-array/src/arrays/patched/vtable/mod.rs b/vortex-array/src/arrays/patched/vtable/mod.rs index 093083a1748..869cd2bbec4 100644 --- a/vortex-array/src/arrays/patched/vtable/mod.rs +++ b/vortex-array/src/arrays/patched/vtable/mod.rs @@ -216,8 +216,6 @@ impl VTable for Patched { let output = typed_builder.values_mut(); let trailer = output.len() - len; - let trailer = values.len() - len; - apply_patches_primitive::( &mut output[trailer..], offset, From e6588118efb9b24b4e7c9f06385c9986fd8fdc59 Mon Sep 17 00:00:00 2001 From: Andrew Duffy Date: Tue, 24 Mar 2026 18:31:24 -0400 Subject: [PATCH 19/19] save Signed-off-by: Andrew Duffy --- .../fastlanes/src/bitpacking/vtable/mod.rs | 6 +- .../src/delta/array/delta_compress.rs | 18 +- .../arrays/lazy_patched/vtable/operations.rs | 7 +- vortex-array/src/arrays/patched/array.rs | 13 + vortex-array/src/vtable/dyn_.rs | 1 - vortex-cuda/benches/dynamic_dispatch_cuda.rs | 9 - vortex-cuda/src/dynamic_dispatch/mod.rs | 1926 +++++++++-------- .../src/dynamic_dispatch/plan_builder.rs | 5 +- vortex-cuda/src/hybrid_dispatch/mod.rs | 171 +- vortex-cuda/src/kernel/encodings/for_.rs | 1 - vortex/benches/single_encoding_throughput.rs | 12 +- 11 files changed, 1093 insertions(+), 1076 deletions(-) diff --git a/encodings/fastlanes/src/bitpacking/vtable/mod.rs b/encodings/fastlanes/src/bitpacking/vtable/mod.rs index 6d4ad0b91d5..f91904bc3a4 100644 --- a/encodings/fastlanes/src/bitpacking/vtable/mod.rs +++ b/encodings/fastlanes/src/bitpacking/vtable/mod.rs @@ -297,8 +297,10 @@ impl VTable for BitPacked { }) } - fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult { - Ok(ExecutionResult::done(unpack_array(array)?.into_array())) + fn execute(array: Arc, _ctx: &mut ExecutionCtx) -> VortexResult { + Ok(ExecutionResult::done( + unpack_array(array.as_ref())?.into_array(), + )) } fn execute_parent( diff --git a/encodings/fastlanes/src/delta/array/delta_compress.rs b/encodings/fastlanes/src/delta/array/delta_compress.rs index 94862ff26e6..b76178c2897 100644 --- a/encodings/fastlanes/src/delta/array/delta_compress.rs +++ b/encodings/fastlanes/src/delta/array/delta_compress.rs @@ -106,7 +106,7 @@ mod tests { use vortex_session::VortexSession; use crate::DeltaArray; - use crate::bitpack_compress::bitpack_encode; + use crate::bitpack_compress::BitPackedEncoder; use crate::delta::array::delta_decompress::delta_decompress; use crate::delta_compress; @@ -137,14 +137,14 @@ mod tests { (0u8..200).map(|i| (!(50..100).contains(&i)).then_some(i)), ); let (bases, deltas) = delta_compress(&array, &mut SESSION.create_execution_ctx()).unwrap(); - let bitpacked_deltas = bitpack_encode(&deltas, 1, None).unwrap(); - let packed_delta = DeltaArray::try_new( - bases.into_array(), - bitpacked_deltas.into_array(), - 0, - array.len(), - ) - .unwrap(); + let bitpacked_deltas = BitPackedEncoder::new(&deltas) + .with_bit_width(1) + .pack() + .unwrap() + .into_array() + .unwrap(); + let packed_delta = + DeltaArray::try_new(bases.into_array(), bitpacked_deltas, 0, array.len()).unwrap(); assert_arrays_eq!(packed_delta.to_primitive(), array); } } diff --git a/vortex-array/src/arrays/lazy_patched/vtable/operations.rs b/vortex-array/src/arrays/lazy_patched/vtable/operations.rs index d782960af2b..dc7e4fc6085 100644 --- a/vortex-array/src/arrays/lazy_patched/vtable/operations.rs +++ b/vortex-array/src/arrays/lazy_patched/vtable/operations.rs @@ -4,13 +4,18 @@ use vortex_error::VortexResult; use crate::DynArray; +use crate::ExecutionCtx; use crate::arrays::lazy_patched::LazyPatched; use crate::arrays::lazy_patched::LazyPatchedArray; use crate::scalar::Scalar; use crate::vtable::OperationsVTable; impl OperationsVTable for LazyPatched { - fn scalar_at(array: &LazyPatchedArray, index: usize) -> VortexResult { + fn scalar_at( + array: &LazyPatchedArray, + index: usize, + _ctx: &mut ExecutionCtx, + ) -> VortexResult { Ok(if let Some(scalar) = array.patches.get_patched(index)? { scalar } else { diff --git a/vortex-array/src/arrays/patched/array.rs b/vortex-array/src/arrays/patched/array.rs index 5a2aa9fa03e..69a31b01510 100644 --- a/vortex-array/src/arrays/patched/array.rs +++ b/vortex-array/src/arrays/patched/array.rs @@ -26,6 +26,19 @@ use crate::patches::Patches; use crate::stats::ArrayStats; use crate::validity::Validity; +/// Shredded components of the [`PatchedArray`]. +/// +/// This is created when you consume the arrary using [`PatchedArray::into_parts`]. +pub struct PatchedArrayParts { + pub inner: ArrayRef, + pub n_chunks: usize, + pub n_lanes: usize, + pub offset: usize, + pub lane_offsets: BufferHandle, + pub indices: BufferHandle, + pub values: ArrayRef, +} + /// An array that partially "patches" another array with new values. #[derive(Debug, Clone)] pub struct PatchedArray { diff --git a/vortex-array/src/vtable/dyn_.rs b/vortex-array/src/vtable/dyn_.rs index 11db3853ee0..02430d5288d 100644 --- a/vortex-array/src/vtable/dyn_.rs +++ b/vortex-array/src/vtable/dyn_.rs @@ -19,7 +19,6 @@ use crate::buffer::BufferHandle; use crate::dtype::DType; use crate::executor::ExecutionCtx; use crate::serde::ArrayChildren; -use crate::stats::ArrayStats; use crate::vtable::Array; use crate::vtable::VTable; diff --git a/vortex-cuda/benches/dynamic_dispatch_cuda.rs b/vortex-cuda/benches/dynamic_dispatch_cuda.rs index 4841259debd..78914203a3d 100644 --- a/vortex-cuda/benches/dynamic_dispatch_cuda.rs +++ b/vortex-cuda/benches/dynamic_dispatch_cuda.rs @@ -17,19 +17,10 @@ use cudarc::driver::LaunchConfig; use cudarc::driver::PushKernelArg; use cudarc::driver::sys::CUevent_flags; use vortex::array::IntoArray; -use vortex::array::ToCanonical; -use vortex::array::arrays::DictArray; use vortex::array::arrays::PrimitiveArray; -use vortex::array::scalar::Scalar; use vortex::array::validity::Validity::NonNullable; use vortex::buffer::Buffer; use vortex::dtype::PType; -use vortex::encodings::alp::ALPArray; -use vortex::encodings::alp::ALPFloat; -use vortex::encodings::alp::Exponents; -use vortex::encodings::alp::alp_encode; -use vortex::encodings::fastlanes::BitPackedArray; -use vortex::encodings::fastlanes::FoRArray; use vortex::encodings::runend::RunEndArray; use vortex::error::VortexExpect; use vortex::error::VortexResult; diff --git a/vortex-cuda/src/dynamic_dispatch/mod.rs b/vortex-cuda/src/dynamic_dispatch/mod.rs index 3bdd3f25315..af2d8d9fde2 100644 --- a/vortex-cuda/src/dynamic_dispatch/mod.rs +++ b/vortex-cuda/src/dynamic_dispatch/mod.rs @@ -273,963 +273,969 @@ impl MaterializedPlan { } } -#[cfg(test)] -mod tests { - use std::sync::Arc; - - use cudarc::driver::DevicePtr; - use cudarc::driver::LaunchConfig; - use cudarc::driver::PushKernelArg; - use rstest::rstest; - use vortex::array::IntoArray; - use vortex::array::ToCanonical; - use vortex::array::arrays::DictArray; - use vortex::array::arrays::PrimitiveArray; - use vortex::array::scalar::Scalar; - use vortex::array::validity::Validity::NonNullable; - use vortex::buffer::Buffer; - use vortex::dtype::PType; - use vortex::encodings::alp::ALPArray; - use vortex::encodings::alp::ALPFloat; - use vortex::encodings::alp::Exponents; - use vortex::encodings::alp::alp_encode; - use vortex::encodings::fastlanes::BitPackedArray; - use vortex::encodings::fastlanes::FoRArray; - use vortex::encodings::runend::RunEndArray; - use vortex::encodings::zigzag::ZigZagArray; - use vortex::error::VortexExpect; - use vortex::error::VortexResult; - use vortex::session::VortexSession; - - use super::CudaDispatchPlan; - use super::SMEM_TILE_SIZE; - use super::ScalarOp; - use super::SourceOp; - use super::Stage; - use super::UnmaterializedPlan; - use crate::CudaBufferExt; - use crate::CudaDeviceBuffer; - use crate::CudaExecutionCtx; - use crate::session::CudaSession; - - fn make_bitpacked_array_u32(bit_width: u8, len: usize) -> BitPackedArray { - let max_val = (1u64 << bit_width).saturating_sub(1); - let values: Vec = (0..len) - .map(|i| ((i as u64) % (max_val + 1)) as u32) - .collect(); - let primitive = PrimitiveArray::new(Buffer::from(values), NonNullable); - BitPackedArray::encode(&primitive.into_array(), bit_width) - .vortex_expect("failed to create BitPacked array") - } - - #[crate::test] - fn test_max_scalar_ops() -> VortexResult<()> { - let bit_width: u8 = 6; - let len = 2050; - let references: [u32; 4] = [1, 2, 4, 8]; - let total_reference: u32 = references.iter().sum(); - - let max_val = (1u64 << bit_width).saturating_sub(1); - let expected: Vec = (0..len) - .map(|i| ((i as u64) % (max_val + 1)) as u32 + total_reference) - .collect(); - - let bitpacked = make_bitpacked_array_u32(bit_width, len); - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let packed = bitpacked.packed().clone(); - let device_input = futures::executor::block_on(cuda_ctx.ensure_on_device(packed))?; - let input_ptr = device_input.cuda_device_ptr()?; - - let scalar_ops: Vec = references - .iter() - .map(|&r| ScalarOp::frame_of_ref(r as u64)) - .collect(); - - let plan = CudaDispatchPlan::new([Stage::new( - input_ptr, - 0, - len as u32, - SourceOp::bitunpack(bit_width, 0), - &scalar_ops, - )]); - assert_eq!(plan.stages[0].num_scalar_ops, 4); - - let smem_bytes = (SMEM_TILE_SIZE) * size_of::() as u32; - let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan, smem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_plan_structure() { - // Stage 0: input dict values (BP→FoR) into smem[0..256) - // Stage 1: output codes (BP→FoR→DICT) into smem[256..2304), gather from smem[0] - let plan = CudaDispatchPlan::new([ - Stage::new( - 0xAAAA, - 0, - 256, - SourceOp::bitunpack(4, 0), - &[ScalarOp::frame_of_ref(10)], - ), - Stage::new( - 0xBBBB, - 256, - 1024, - SourceOp::bitunpack(6, 0), - &[ScalarOp::frame_of_ref(42), ScalarOp::dict(0)], - ), - ]); - - assert_eq!(plan.num_stages, 2); - - // Input stage - assert_eq!(plan.stages[0].smem_offset, 0); - assert_eq!(plan.stages[0].len, 256); - assert_eq!(plan.stages[0].input_ptr, 0xAAAA); - - // Output stage - assert_eq!(plan.stages[1].smem_offset, 256); - assert_eq!(plan.stages[1].len, SMEM_TILE_SIZE); - assert_eq!(plan.stages[1].input_ptr, 0xBBBB); - assert_eq!(plan.stages[1].num_scalar_ops, 2); - assert_eq!( - unsafe { plan.stages[1].scalar_ops[1].params.dict.values_smem_offset }, - 0 - ); - } - - /// Copy a raw u32 slice to device memory and return (device_ptr, handle). - fn copy_raw_to_device( - cuda_ctx: &CudaExecutionCtx, - data: &[u32], - ) -> VortexResult<(u64, Arc>)> { - let device_buf = Arc::new(cuda_ctx.stream().clone_htod(data).expect("htod")); - let (ptr, _) = device_buf.device_ptr(cuda_ctx.stream()); - Ok((ptr, device_buf)) - } - - #[crate::test] - fn test_load_for_zigzag_alp() -> VortexResult<()> { - // Max scalar ops depth with LOAD source: LOAD → FoR → ZigZag → ALP - // (Exercises all four scalar op types without DICT) - let len = 2048; - let reference = 5u32; - let alp_f = 10.0f32; - let alp_e = 0.1f32; - - let data: Vec = (0..len).map(|i| (i as u32) % 64).collect(); - let expected: Vec = data - .iter() - .map(|&v| { - let after_for = v + reference; - let after_zz = (after_for >> 1) ^ (0u32.wrapping_sub(after_for & 1)); - let float_val = (after_zz as i32) as f32 * alp_f * alp_e; - float_val.to_bits() - }) - .collect(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let (input_ptr, _di) = copy_raw_to_device(&cuda_ctx, &data)?; - - let plan = CudaDispatchPlan::new([Stage::new( - input_ptr, - 0, - len as u32, - SourceOp::load(), - &[ - ScalarOp::frame_of_ref(reference as u64), - ScalarOp::zigzag(), - ScalarOp::alp(alp_f, alp_e), - ], - )]); - - let smem_bytes = (100 + SMEM_TILE_SIZE) * size_of::() as u32; - let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan, smem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - /// Runs a dynamic dispatch plan on the GPU. - fn run_dynamic_dispatch_plan( - cuda_ctx: &CudaExecutionCtx, - output_len: usize, - plan: &CudaDispatchPlan, - shared_mem_bytes: u32, - ) -> VortexResult> { - let output_slice = cuda_ctx - .device_alloc::(output_len) - .vortex_expect("alloc output"); - let output_buf = CudaDeviceBuffer::new(output_slice); - let output_view = output_buf.as_view::(); - let (output_ptr, record_output) = output_view.device_ptr(cuda_ctx.stream()); - - let device_plan = Arc::new( - cuda_ctx - .stream() - .clone_htod(std::slice::from_ref(plan)) - .expect("copy plan to device"), - ); - let (plan_ptr, record_plan) = device_plan.device_ptr(cuda_ctx.stream()); - let array_len_u64 = output_len as u64; - - cuda_ctx.stream().synchronize().expect("sync"); - - let cuda_function = cuda_ctx - .load_function("dynamic_dispatch", &[PType::U32]) - .vortex_expect("load kernel"); - let mut launch_builder = cuda_ctx.launch_builder(&cuda_function); - launch_builder.arg(&output_ptr); - launch_builder.arg(&array_len_u64); - launch_builder.arg(&plan_ptr); - - let num_blocks = u32::try_from(output_len.div_ceil(2048))?; - let config = LaunchConfig { - grid_dim: (num_blocks, 1, 1), - block_dim: (64, 1, 1), - shared_mem_bytes, - }; - unsafe { - launch_builder.launch(config).expect("kernel launch"); - } - drop((record_output, record_plan)); - - Ok(cuda_ctx - .stream() - .clone_dtoh(&output_buf.as_view::()) - .expect("copy back")) - } - - fn run_dispatch_plan_f32( - cuda_ctx: &CudaExecutionCtx, - output_len: usize, - plan: &CudaDispatchPlan, - shared_mem_bytes: u32, - ) -> VortexResult> { - let actual = run_dynamic_dispatch_plan(cuda_ctx, output_len, plan, shared_mem_bytes)?; - // SAFETY: f32 and u32 have identical size and alignment. - Ok(unsafe { std::mem::transmute::, Vec>(actual) }) - } - - #[crate::test] - fn test_bitpacked() -> VortexResult<()> { - let bit_width: u8 = 10; - let len = 3000; - let max_val = (1u64 << bit_width).saturating_sub(1); - let expected: Vec = (0..len) - .map(|i| ((i as u64) % (max_val + 1)) as u32) - .collect(); - - let bp = make_bitpacked_array_u32(bit_width, len); - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&bp.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_for_bitpacked() -> VortexResult<()> { - let bit_width: u8 = 6; - let len = 3000; - let reference = 42u32; - let max_val = (1u64 << bit_width).saturating_sub(1); - - let raw: Vec = (0..len) - .map(|i| ((i as u64) % (max_val + 1)) as u32) - .collect(); - let expected: Vec = raw.iter().map(|&v| v + reference).collect(); - - let bp = make_bitpacked_array_u32(bit_width, len); - let for_arr = FoRArray::try_new(bp.into_array(), Scalar::from(reference))?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&for_arr.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_runend() -> VortexResult<()> { - let ends: Vec = vec![1000, 2000, 3000]; - let values: Vec = vec![10, 20, 30]; - let len = 3000; - - let mut expected = Vec::with_capacity(len); - for i in 0..len { - let run = ends.iter().position(|&e| (i as u32) < e).unwrap(); - expected.push(values[run]); - } - - let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); - let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); - let re = RunEndArray::new(ends_arr, values_arr); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&re.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_dict_for_bp_values_bp_codes() -> VortexResult<()> { - // Dict where both codes and values are BitPacked+FoR. - let dict_reference = 1_000_000u32; - let dict_residuals: Vec = (0..64).collect(); - let dict_expected: Vec = dict_residuals.iter().map(|&r| r + dict_reference).collect(); - let dict_size = dict_residuals.len(); - - let len = 3000; - let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); - let expected: Vec = codes.iter().map(|&c| dict_expected[c as usize]).collect(); - - // BitPack+FoR the dict values - let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); - let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), 6)?; - let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference))?; - - // BitPack the codes - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), 6)?; - - let dict = DictArray::try_new(codes_bp.into_array(), dict_for.into_array())?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&dict.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_alp_for_bitpacked() -> VortexResult<()> { - // ALP(FoR(BitPacked)): encode each layer, then reassemble the tree - // bottom-up because encode() methods produce flat outputs. - let len = 3000; - let exponents = Exponents { e: 2, f: 0 }; - let floats: Vec = (0..len) - .map(|i| ::decode_single(10 + (i as i32 % 64), exponents)) - .collect(); - let float_prim = PrimitiveArray::new(Buffer::from(floats.clone()), NonNullable); - - let alp = alp_encode(&float_prim, Some(exponents))?; - assert!(alp.patches().is_none()); - let for_arr = FoRArray::encode(alp.encoded().to_primitive())?; - let bp = BitPackedArray::encode(for_arr.encoded(), 6)?; - - let tree = ALPArray::new( - FoRArray::try_new(bp.into_array(), for_arr.reference_scalar().clone())?.into_array(), - exponents, - None, - ); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&tree.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dispatch_plan_f32(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, floats); - - Ok(()) - } - - #[crate::test] - fn test_zigzag_bitpacked() -> VortexResult<()> { - // ZigZag(BitPacked): unpack then zigzag-decode. - let bit_width: u8 = 4; - let len = 3000; - let max_val = (1u64 << bit_width).saturating_sub(1); - - let raw: Vec = (0..len) - .map(|i| ((i as u64) % (max_val + 1)) as u32) - .collect(); - let expected: Vec = raw - .iter() - .map(|&v| (v >> 1) ^ (0u32.wrapping_sub(v & 1))) - .collect(); - - let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); - let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; - let zz = ZigZagArray::try_new(bp.into_array())?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&zz.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_for_runend() -> VortexResult<()> { - // FoR(RunEnd): expand runs then add constant. - let ends: Vec = vec![500, 1000, 1500, 2000, 2500, 3000]; - let values: Vec = vec![1, 2, 3, 4, 5, 6]; - let len = 3000; - let reference = 1000u32; - - let mut expected = Vec::with_capacity(len); - for i in 0..len { - let run = ends.iter().position(|&e| (i as u32) < e).unwrap(); - expected.push(values[run] + reference); - } - - let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); - let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); - let re = RunEndArray::new(ends_arr, values_arr); - let for_arr = FoRArray::try_new(re.into_array(), Scalar::from(reference))?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&for_arr.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_for_dict() -> VortexResult<()> { - // FoR(Dict(codes=Primitive, values=Primitive)): gather then add constant. - let dict_values: Vec = vec![100, 200, 300, 400]; - let dict_size = dict_values.len(); - let reference = 5000u32; - let len = 3000; - - let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); - let expected: Vec = codes - .iter() - .map(|&c| dict_values[c as usize] + reference) - .collect(); - - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); - let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?; - let for_arr = FoRArray::try_new(dict.into_array(), Scalar::from(reference))?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&for_arr.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_dict_for_bp_codes() -> VortexResult<()> { - // Dict(codes=FoR(BitPacked), values=primitive) - let dict_values: Vec = (0..8).map(|i| i * 1000 + 7).collect(); - let dict_size = dict_values.len(); - let len = 3000; - let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); - let expected: Vec = codes.iter().map(|&c| dict_values[c as usize]).collect(); - - // BitPack codes, then wrap in FoR (reference=0 so values unchanged) - let bit_width: u8 = 3; - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), bit_width)?; - let codes_for = FoRArray::try_new(codes_bp.into_array(), Scalar::from(0u32))?; - - let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); - let dict = DictArray::try_new(codes_for.into_array(), values_prim.into_array())?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&dict.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_dict_primitive_values_bp_codes() -> VortexResult<()> { - let dict_values: Vec = vec![100, 200, 300, 400]; - let dict_size = dict_values.len(); - let len = 3000; - let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); - let expected: Vec = codes.iter().map(|&c| dict_values[c as usize]).collect(); - - let bit_width: u8 = 2; - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), bit_width)?; - let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); - - let dict = DictArray::try_new(codes_bp.into_array(), values_prim.into_array())?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&dict.into_array())?.materialize(&cuda_ctx)?; - - let actual = - run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[crate::test] - fn test_dict_mismatched_ptypes_rejected() -> VortexResult<()> { - let dict_values: Vec = vec![100, 200, 300, 400]; - let len = 3000; - let codes: Vec = (0..len).map(|i| (i % dict_values.len()) as u8).collect(); - - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); - let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?; - - // UnmaterializedPlan::new should fail because u8 codes != u32 values in byte width. - assert!(UnmaterializedPlan::new(&dict.into_array()).is_err()); - - Ok(()) - } - - #[crate::test] - fn test_runend_mismatched_ptypes_rejected() -> VortexResult<()> { - let ends: Vec = vec![1000, 2000, 3000]; - let values: Vec = vec![10, 20, 30]; - - let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); - let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); - let re = RunEndArray::new(ends_arr, values_arr); - - // UnmaterializedPlan::new should fail because u64 ends != i32 values in byte width. - assert!(UnmaterializedPlan::new(&re.into_array()).is_err()); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(500, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_primitive( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let len = 5000; - let data: Vec = (0..len).map(|i| (i * 7) % 1000).collect(); - - let prim = PrimitiveArray::new(Buffer::from(data.clone()), NonNullable); - - let sliced = prim.into_array().slice(slice_start..slice_end)?; - - let expected: Vec = data[slice_start..slice_end].to_vec(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(500, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_zigzag_bitpacked( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let bit_width = 10u8; - let max_val = (1u32 << bit_width) - 1; - let len = 5000; - - let raw: Vec = (0..len).map(|i| (i as u32) % max_val).collect(); - let all_decoded: Vec = raw - .iter() - .map(|&v| (v >> 1) ^ (0u32.wrapping_sub(v & 1))) - .collect(); - - let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); - let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; - let zz = ZigZagArray::try_new(bp.into_array())?; - - let sliced = zz.into_array().slice(slice_start..slice_end)?; - let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(500, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_dict_with_primitive_codes( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let dict_values: Vec = vec![100, 200, 300, 400, 500]; - let dict_size = dict_values.len(); - let len = 5000; - let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); - - let codes_prim = PrimitiveArray::new(Buffer::from(codes.clone()), NonNullable); - let values_prim = PrimitiveArray::new(Buffer::from(dict_values.clone()), NonNullable); - let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?; - - let sliced = dict.into_array().slice(slice_start..slice_end)?; - - let expected: Vec = codes[slice_start..slice_end] - .iter() - .map(|&c| dict_values[c as usize]) - .collect(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(500, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_bitpacked( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let bit_width = 10u8; - let max_val = (1u32 << bit_width) - 1; - let len = 5000; - - let data: Vec = (0..len).map(|i| (i as u32) % max_val).collect(); - let prim = PrimitiveArray::new(Buffer::from(data.clone()), NonNullable); - let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; - - let sliced = bp.into_array().slice(slice_start..slice_end)?; - let expected: Vec = data[slice_start..slice_end].to_vec(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(500, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_for_bitpacked( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let reference = 100u32; - let bit_width = 10u8; - let max_val = (1u32 << bit_width) - 1; - let len = 5000; - - let encoded_data: Vec = (0..len).map(|i| (i as u32) % max_val).collect(); - let prim = PrimitiveArray::new(Buffer::from(encoded_data.clone()), NonNullable); - let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; - let for_arr = FoRArray::try_new(bp.into_array(), Scalar::from(reference))?; - - let all_decoded: Vec = encoded_data.iter().map(|&v| v + reference).collect(); - - let sliced = for_arr.into_array().slice(slice_start..slice_end)?; - let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(400, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_runend( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let ends: Vec = vec![500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]; - let values: Vec = vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 100]; - let len = 5000; - - let all_decoded: Vec = (0..len) - .map(|i| { - let run = ends.iter().position(|&e| (i as u32) < e).unwrap(); - values[run] - }) - .collect(); - - let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); - let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); - let re = RunEndArray::new(ends_arr, values_arr); - - let sliced = re.into_array().slice(slice_start..slice_end)?; - let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0, 1024)] - #[case(0, 3000)] - #[case(0, 4096)] - #[case(500, 600)] - #[case(500, 1024)] - #[case(500, 2048)] - #[case(500, 4500)] - #[case(777, 3333)] - #[case(1024, 2048)] - #[case(1024, 4096)] - #[case(1500, 3500)] - #[case(2048, 4096)] - #[case(2500, 4500)] - #[case(3333, 4444)] - #[crate::test] - fn test_sliced_dict_for_bp_values_bp_codes( - #[case] slice_start: usize, - #[case] slice_end: usize, - ) -> VortexResult<()> { - let dict_reference = 1_000_000u32; - let dict_residuals: Vec = (0..64).collect(); - let dict_expected: Vec = dict_residuals.iter().map(|&r| r + dict_reference).collect(); - let dict_size = dict_residuals.len(); - - let len = 5000; - let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); - let all_decoded: Vec = codes.iter().map(|&c| dict_expected[c as usize]).collect(); - - // BitPack+FoR the dict values - let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); - let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), 6)?; - let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference))?; - - // BitPack the codes - let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); - let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), 6)?; - - let dict = DictArray::try_new(codes_bp.into_array(), dict_for.into_array())?; - - let sliced = dict.into_array().slice(slice_start..slice_end)?; - let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0u32, 1u32, 100)] - #[case(5u32, 3u32, 2048)] - #[case(0u32, 1u32, 4096)] - #[case(100u32, 7u32, 5000)] - #[crate::test] - fn test_sequence_unsigned( - #[case] base: u32, - #[case] multiplier: u32, - #[case] len: usize, - ) -> VortexResult<()> { - use vortex::dtype::Nullability; - use vortex::encodings::sequence::SequenceArray; - - let expected: Vec = (0..len).map(|i| base + (i as u32) * multiplier).collect(); - - let seq = SequenceArray::try_new_typed(base, multiplier, Nullability::NonNullable, len)?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&seq.into_array())?.materialize(&cuda_ctx)?; - - let actual = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - assert_eq!(actual, expected); - - Ok(()) - } - - #[rstest] - #[case(0i32, 1i32, 100)] - #[case(-10i32, 3i32, 2048)] - #[case(100i32, -1i32, 100)] - #[case(-500i32, -7i32, 50)] - #[case(0i32, 1i32, 5000)] - #[crate::test] - fn test_sequence_signed( - #[case] base: i32, - #[case] multiplier: i32, - #[case] len: usize, - ) -> VortexResult<()> { - use vortex::dtype::Nullability; - use vortex::encodings::sequence::SequenceArray; - - let expected: Vec = (0..len).map(|i| base + (i as i32) * multiplier).collect(); - - let seq = SequenceArray::try_new_typed(base, multiplier, Nullability::NonNullable, len)?; - - let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; - let plan = UnmaterializedPlan::new(&seq.into_array())?.materialize(&cuda_ctx)?; - - let actual_u32 = run_dynamic_dispatch_plan( - &cuda_ctx, - expected.len(), - &plan.dispatch_plan, - plan.shared_mem_bytes, - )?; - let actual: Vec = actual_u32.into_iter().map(|v| v as i32).collect(); - assert_eq!(actual, expected); - - Ok(()) - } -} +// #[cfg(test)] +// mod tests { +// use std::sync::Arc; +// +// use cudarc::driver::DevicePtr; +// use cudarc::driver::LaunchConfig; +// use cudarc::driver::PushKernelArg; +// use rstest::rstest; +// use vortex::array::IntoArray; +// use vortex::array::ToCanonical; +// use vortex::array::arrays::DictArray; +// use vortex::array::arrays::PrimitiveArray; +// use vortex::array::scalar::Scalar; +// use vortex::array::validity::Validity::NonNullable; +// use vortex::buffer::Buffer; +// use vortex::dtype::PType; +// use vortex::encodings::alp::ALPArray; +// use vortex::encodings::alp::ALPFloat; +// use vortex::encodings::alp::Exponents; +// use vortex::encodings::alp::alp_encode; +// use vortex::encodings::fastlanes::BitPackedArray; +// use vortex::encodings::fastlanes::FoRArray; +// use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; +// use vortex::encodings::runend::RunEndArray; +// use vortex::encodings::zigzag::ZigZagArray; +// use vortex::error::VortexExpect; +// use vortex::error::VortexResult; +// use vortex::session::VortexSession; +// use vortex_array::ArrayRef; +// +// use super::DynamicDispatchPlan; +// use super::SMEM_TILE_SIZE; +// use super::ScalarOp; +// use super::SourceOp; +// use super::Stage; +// use super::UnmaterializedPlan; +// use crate::CudaBufferExt; +// use crate::CudaDeviceBuffer; +// use crate::CudaExecutionCtx; +// use crate::session::CudaSession; +// +// fn make_bitpacked_array_u32(bit_width: u8, len: usize) -> BitPackedArray { +// let max_val = (1u64 << bit_width).saturating_sub(1); +// let values: Vec = (0..len) +// .map(|i| ((i as u64) % (max_val + 1)) as u32) +// .collect(); +// let primitive = PrimitiveArray::new(Buffer::from(values), NonNullable); +// BitPackedEncoder::new(&primitive) +// .with_bit_width(bit_width) +// .pack() +// .unwrap() +// .into_array() +// .unwrap() +// } +// +// #[crate::test] +// fn test_max_scalar_ops() -> VortexResult<()> { +// let bit_width: u8 = 6; +// let len = 2050; +// let references: [u32; 4] = [1, 2, 4, 8]; +// let total_reference: u32 = references.iter().sum(); +// +// let max_val = (1u64 << bit_width).saturating_sub(1); +// let expected: Vec = (0..len) +// .map(|i| ((i as u64) % (max_val + 1)) as u32 + total_reference) +// .collect(); +// +// let bitpacked = make_bitpacked_array_u32(bit_width, len); +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let packed = bitpacked.packed().clone(); +// let device_input = futures::executor::block_on(cuda_ctx.ensure_on_device(packed))?; +// let input_ptr = device_input.cuda_device_ptr()?; +// +// let scalar_ops: Vec = references +// .iter() +// .map(|&r| ScalarOp::frame_of_ref(r as u64)) +// .collect(); +// +// let plan = CudaDispatchPlan::new([Stage::new( +// input_ptr, +// 0, +// len as u32, +// SourceOp::bitunpack(bit_width, 0), +// &scalar_ops, +// )]); +// assert_eq!(plan.stages[0].num_scalar_ops, 4); +// +// let smem_bytes = (SMEM_TILE_SIZE) * size_of::() as u32; +// let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan, smem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_plan_structure() { +// // Stage 0: input dict values (BP→FoR) into smem[0..256) +// // Stage 1: output codes (BP→FoR→DICT) into smem[256..2304), gather from smem[0] +// let plan = CudaDispatchPlan::new([ +// Stage::new( +// 0xAAAA, +// 0, +// 256, +// SourceOp::bitunpack(4, 0), +// &[ScalarOp::frame_of_ref(10)], +// ), +// Stage::new( +// 0xBBBB, +// 256, +// 1024, +// SourceOp::bitunpack(6, 0), +// &[ScalarOp::frame_of_ref(42), ScalarOp::dict(0)], +// ), +// ]); +// +// assert_eq!(plan.num_stages, 2); +// +// // Input stage +// assert_eq!(plan.stages[0].smem_offset, 0); +// assert_eq!(plan.stages[0].len, 256); +// assert_eq!(plan.stages[0].input_ptr, 0xAAAA); +// +// // Output stage +// assert_eq!(plan.stages[1].smem_offset, 256); +// assert_eq!(plan.stages[1].len, SMEM_TILE_SIZE); +// assert_eq!(plan.stages[1].input_ptr, 0xBBBB); +// assert_eq!(plan.stages[1].num_scalar_ops, 2); +// assert_eq!( +// unsafe { plan.stages[1].scalar_ops[1].params.dict.values_smem_offset }, +// 0 +// ); +// } +// +// /// Copy a raw u32 slice to device memory and return (device_ptr, handle). +// fn copy_raw_to_device( +// cuda_ctx: &CudaExecutionCtx, +// data: &[u32], +// ) -> VortexResult<(u64, Arc>)> { +// let device_buf = Arc::new(cuda_ctx.stream().clone_htod(data).expect("htod")); +// let (ptr, _) = device_buf.device_ptr(cuda_ctx.stream()); +// Ok((ptr, device_buf)) +// } +// +// #[crate::test] +// fn test_load_for_zigzag_alp() -> VortexResult<()> { +// // Max scalar ops depth with LOAD source: LOAD → FoR → ZigZag → ALP +// // (Exercises all four scalar op types without DICT) +// let len = 2048; +// let reference = 5u32; +// let alp_f = 10.0f32; +// let alp_e = 0.1f32; +// +// let data: Vec = (0..len).map(|i| (i as u32) % 64).collect(); +// let expected: Vec = data +// .iter() +// .map(|&v| { +// let after_for = v + reference; +// let after_zz = (after_for >> 1) ^ (0u32.wrapping_sub(after_for & 1)); +// let float_val = (after_zz as i32) as f32 * alp_f * alp_e; +// float_val.to_bits() +// }) +// .collect(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let (input_ptr, _di) = copy_raw_to_device(&cuda_ctx, &data)?; +// +// let plan = CudaDispatchPlan::new([Stage::new( +// input_ptr, +// 0, +// len as u32, +// SourceOp::load(), +// &[ +// ScalarOp::frame_of_ref(reference as u64), +// ScalarOp::zigzag(), +// ScalarOp::alp(alp_f, alp_e), +// ], +// )]); +// +// let smem_bytes = (100 + SMEM_TILE_SIZE) * size_of::() as u32; +// let actual = run_dynamic_dispatch_plan(&cuda_ctx, len, &plan, smem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// /// Runs a dynamic dispatch plan on the GPU. +// fn run_dynamic_dispatch_plan( +// cuda_ctx: &CudaExecutionCtx, +// output_len: usize, +// plan: &CudaDispatchPlan, +// shared_mem_bytes: u32, +// ) -> VortexResult> { +// let output_slice = cuda_ctx +// .device_alloc::(output_len) +// .vortex_expect("alloc output"); +// let output_buf = CudaDeviceBuffer::new(output_slice); +// let output_view = output_buf.as_view::(); +// let (output_ptr, record_output) = output_view.device_ptr(cuda_ctx.stream()); +// +// let device_plan = Arc::new( +// cuda_ctx +// .stream() +// .clone_htod(std::slice::from_ref(plan)) +// .expect("copy plan to device"), +// ); +// let (plan_ptr, record_plan) = device_plan.device_ptr(cuda_ctx.stream()); +// let array_len_u64 = output_len as u64; +// +// cuda_ctx.stream().synchronize().expect("sync"); +// +// let cuda_function = cuda_ctx +// .load_function("dynamic_dispatch", &[PType::U32]) +// .vortex_expect("load kernel"); +// let mut launch_builder = cuda_ctx.launch_builder(&cuda_function); +// launch_builder.arg(&output_ptr); +// launch_builder.arg(&array_len_u64); +// launch_builder.arg(&plan_ptr); +// +// let num_blocks = u32::try_from(output_len.div_ceil(2048))?; +// let config = LaunchConfig { +// grid_dim: (num_blocks, 1, 1), +// block_dim: (64, 1, 1), +// shared_mem_bytes, +// }; +// unsafe { +// launch_builder.launch(config).expect("kernel launch"); +// } +// drop((record_output, record_plan)); +// +// Ok(cuda_ctx +// .stream() +// .clone_dtoh(&output_buf.as_view::()) +// .expect("copy back")) +// } +// +// fn run_dispatch_plan_f32( +// cuda_ctx: &CudaExecutionCtx, +// output_len: usize, +// plan: &CudaDispatchPlan, +// shared_mem_bytes: u32, +// ) -> VortexResult> { +// let actual = run_dynamic_dispatch_plan(cuda_ctx, output_len, plan, shared_mem_bytes)?; +// // SAFETY: f32 and u32 have identical size and alignment. +// Ok(unsafe { std::mem::transmute::, Vec>(actual) }) +// } +// +// #[crate::test] +// fn test_bitpacked() -> VortexResult<()> { +// let bit_width: u8 = 10; +// let len = 3000; +// let max_val = (1u64 << bit_width).saturating_sub(1); +// let expected: Vec = (0..len) +// .map(|i| ((i as u64) % (max_val + 1)) as u32) +// .collect(); +// +// let bp = make_bitpacked_array_u32(bit_width, len); +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&bp.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_for_bitpacked() -> VortexResult<()> { +// let bit_width: u8 = 6; +// let len = 3000; +// let reference = 42u32; +// let max_val = (1u64 << bit_width).saturating_sub(1); +// +// let raw: Vec = (0..len) +// .map(|i| ((i as u64) % (max_val + 1)) as u32) +// .collect(); +// let expected: Vec = raw.iter().map(|&v| v + reference).collect(); +// +// let bp = make_bitpacked_array_u32(bit_width, len); +// let for_arr = FoRArray::try_new(bp, Scalar::from(reference))?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&for_arr.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_runend() -> VortexResult<()> { +// let ends: Vec = vec![1000, 2000, 3000]; +// let values: Vec = vec![10, 20, 30]; +// let len = 3000; +// +// let mut expected = Vec::with_capacity(len); +// for i in 0..len { +// let run = ends.iter().position(|&e| (i as u32) < e).unwrap(); +// expected.push(values[run]); +// } +// +// let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); +// let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); +// let re = RunEndArray::new(ends_arr, values_arr); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&re.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_dict_for_bp_values_bp_codes() -> VortexResult<()> { +// // Dict where both codes and values are BitPacked+FoR. +// let dict_reference = 1_000_000u32; +// let dict_residuals: Vec = (0..64).collect(); +// let dict_expected: Vec = dict_residuals.iter().map(|&r| r + dict_reference).collect(); +// let dict_size = dict_residuals.len(); +// +// let len = 3000; +// let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); +// let expected: Vec = codes.iter().map(|&c| dict_expected[c as usize]).collect(); +// +// // BitPack+FoR the dict values +// let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); +// let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), 6)?; +// let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference))?; +// +// // BitPack the codes +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), 6)?; +// +// let dict = DictArray::try_new(codes_bp.into_array(), dict_for.into_array())?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&dict.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_alp_for_bitpacked() -> VortexResult<()> { +// // ALP(FoR(BitPacked)): encode each layer, then reassemble the tree +// // bottom-up because encode() methods produce flat outputs. +// let len = 3000; +// let exponents = Exponents { e: 2, f: 0 }; +// let floats: Vec = (0..len) +// .map(|i| ::decode_single(10 + (i as i32 % 64), exponents)) +// .collect(); +// let float_prim = PrimitiveArray::new(Buffer::from(floats.clone()), NonNullable); +// +// let alp = alp_encode(&float_prim, Some(exponents))?; +// assert!(alp.patches().is_none()); +// let for_arr = FoRArray::encode(alp.encoded().to_primitive())?; +// let bp = BitPackedArray::encode(for_arr.encoded(), 6)?; +// +// let tree = ALPArray::new( +// FoRArray::try_new(bp.into_array(), for_arr.reference_scalar().clone())?.into_array(), +// exponents, +// None, +// ); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&tree.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dispatch_plan_f32(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, floats); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_zigzag_bitpacked() -> VortexResult<()> { +// // ZigZag(BitPacked): unpack then zigzag-decode. +// let bit_width: u8 = 4; +// let len = 3000; +// let max_val = (1u64 << bit_width).saturating_sub(1); +// +// let raw: Vec = (0..len) +// .map(|i| ((i as u64) % (max_val + 1)) as u32) +// .collect(); +// let expected: Vec = raw +// .iter() +// .map(|&v| (v >> 1) ^ (0u32.wrapping_sub(v & 1))) +// .collect(); +// +// let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); +// let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; +// let zz = ZigZagArray::try_new(bp.into_array())?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&zz.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_for_runend() -> VortexResult<()> { +// // FoR(RunEnd): expand runs then add constant. +// let ends: Vec = vec![500, 1000, 1500, 2000, 2500, 3000]; +// let values: Vec = vec![1, 2, 3, 4, 5, 6]; +// let len = 3000; +// let reference = 1000u32; +// +// let mut expected = Vec::with_capacity(len); +// for i in 0..len { +// let run = ends.iter().position(|&e| (i as u32) < e).unwrap(); +// expected.push(values[run] + reference); +// } +// +// let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); +// let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); +// let re = RunEndArray::new(ends_arr, values_arr); +// let for_arr = FoRArray::try_new(re.into_array(), Scalar::from(reference))?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&for_arr.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_for_dict() -> VortexResult<()> { +// // FoR(Dict(codes=Primitive, values=Primitive)): gather then add constant. +// let dict_values: Vec = vec![100, 200, 300, 400]; +// let dict_size = dict_values.len(); +// let reference = 5000u32; +// let len = 3000; +// +// let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); +// let expected: Vec = codes +// .iter() +// .map(|&c| dict_values[c as usize] + reference) +// .collect(); +// +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); +// let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?; +// let for_arr = FoRArray::try_new(dict.into_array(), Scalar::from(reference))?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&for_arr.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_dict_for_bp_codes() -> VortexResult<()> { +// // Dict(codes=FoR(BitPacked), values=primitive) +// let dict_values: Vec = (0..8).map(|i| i * 1000 + 7).collect(); +// let dict_size = dict_values.len(); +// let len = 3000; +// let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); +// let expected: Vec = codes.iter().map(|&c| dict_values[c as usize]).collect(); +// +// // BitPack codes, then wrap in FoR (reference=0 so values unchanged) +// let bit_width: u8 = 3; +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), bit_width)?; +// let codes_for = FoRArray::try_new(codes_bp.into_array(), Scalar::from(0u32))?; +// +// let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); +// let dict = DictArray::try_new(codes_for.into_array(), values_prim.into_array())?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&dict.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_dict_primitive_values_bp_codes() -> VortexResult<()> { +// let dict_values: Vec = vec![100, 200, 300, 400]; +// let dict_size = dict_values.len(); +// let len = 3000; +// let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); +// let expected: Vec = codes.iter().map(|&c| dict_values[c as usize]).collect(); +// +// let bit_width: u8 = 2; +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), bit_width)?; +// let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); +// +// let dict = DictArray::try_new(codes_bp.into_array(), values_prim.into_array())?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&dict.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = +// run_dynamic_dispatch_plan(&cuda_ctx, len, &plan.dispatch_plan, plan.shared_mem_bytes)?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_dict_mismatched_ptypes_rejected() -> VortexResult<()> { +// let dict_values: Vec = vec![100, 200, 300, 400]; +// let len = 3000; +// let codes: Vec = (0..len).map(|i| (i % dict_values.len()) as u8).collect(); +// +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let values_prim = PrimitiveArray::new(Buffer::from(dict_values), NonNullable); +// let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?; +// +// // UnmaterializedPlan::new should fail because u8 codes != u32 values in byte width. +// assert!(UnmaterializedPlan::new(&dict.into_array()).is_err()); +// +// Ok(()) +// } +// +// #[crate::test] +// fn test_runend_mismatched_ptypes_rejected() -> VortexResult<()> { +// let ends: Vec = vec![1000, 2000, 3000]; +// let values: Vec = vec![10, 20, 30]; +// +// let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); +// let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); +// let re = RunEndArray::new(ends_arr, values_arr); +// +// // UnmaterializedPlan::new should fail because u64 ends != i32 values in byte width. +// assert!(UnmaterializedPlan::new(&re.into_array()).is_err()); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(500, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_primitive( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let len = 5000; +// let data: Vec = (0..len).map(|i| (i * 7) % 1000).collect(); +// +// let prim = PrimitiveArray::new(Buffer::from(data.clone()), NonNullable); +// +// let sliced = prim.into_array().slice(slice_start..slice_end)?; +// +// let expected: Vec = data[slice_start..slice_end].to_vec(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(500, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_zigzag_bitpacked( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let bit_width = 10u8; +// let max_val = (1u32 << bit_width) - 1; +// let len = 5000; +// +// let raw: Vec = (0..len).map(|i| (i as u32) % max_val).collect(); +// let all_decoded: Vec = raw +// .iter() +// .map(|&v| (v >> 1) ^ (0u32.wrapping_sub(v & 1))) +// .collect(); +// +// let prim = PrimitiveArray::new(Buffer::from(raw), NonNullable); +// let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; +// let zz = ZigZagArray::try_new(bp.into_array())?; +// +// let sliced = zz.into_array().slice(slice_start..slice_end)?; +// let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(500, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_dict_with_primitive_codes( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let dict_values: Vec = vec![100, 200, 300, 400, 500]; +// let dict_size = dict_values.len(); +// let len = 5000; +// let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); +// +// let codes_prim = PrimitiveArray::new(Buffer::from(codes.clone()), NonNullable); +// let values_prim = PrimitiveArray::new(Buffer::from(dict_values.clone()), NonNullable); +// let dict = DictArray::try_new(codes_prim.into_array(), values_prim.into_array())?; +// +// let sliced = dict.into_array().slice(slice_start..slice_end)?; +// +// let expected: Vec = codes[slice_start..slice_end] +// .iter() +// .map(|&c| dict_values[c as usize]) +// .collect(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(500, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_bitpacked( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let bit_width = 10u8; +// let max_val = (1u32 << bit_width) - 1; +// let len = 5000; +// +// let data: Vec = (0..len).map(|i| (i as u32) % max_val).collect(); +// let prim = PrimitiveArray::new(Buffer::from(data.clone()), NonNullable); +// let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; +// +// let sliced = bp.into_array().slice(slice_start..slice_end)?; +// let expected: Vec = data[slice_start..slice_end].to_vec(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(500, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_for_bitpacked( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let reference = 100u32; +// let bit_width = 10u8; +// let max_val = (1u32 << bit_width) - 1; +// let len = 5000; +// +// let encoded_data: Vec = (0..len).map(|i| (i as u32) % max_val).collect(); +// let prim = PrimitiveArray::new(Buffer::from(encoded_data.clone()), NonNullable); +// let bp = BitPackedArray::encode(&prim.into_array(), bit_width)?; +// let for_arr = FoRArray::try_new(bp.into_array(), Scalar::from(reference))?; +// +// let all_decoded: Vec = encoded_data.iter().map(|&v| v + reference).collect(); +// +// let sliced = for_arr.into_array().slice(slice_start..slice_end)?; +// let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(400, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_runend( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let ends: Vec = vec![500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000]; +// let values: Vec = vec![10, 20, 30, 40, 50, 60, 70, 80, 90, 100]; +// let len = 5000; +// +// let all_decoded: Vec = (0..len) +// .map(|i| { +// let run = ends.iter().position(|&e| (i as u32) < e).unwrap(); +// values[run] +// }) +// .collect(); +// +// let ends_arr = PrimitiveArray::new(Buffer::from(ends), NonNullable).into_array(); +// let values_arr = PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(); +// let re = RunEndArray::new(ends_arr, values_arr); +// +// let sliced = re.into_array().slice(slice_start..slice_end)?; +// let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0, 1024)] +// #[case(0, 3000)] +// #[case(0, 4096)] +// #[case(500, 600)] +// #[case(500, 1024)] +// #[case(500, 2048)] +// #[case(500, 4500)] +// #[case(777, 3333)] +// #[case(1024, 2048)] +// #[case(1024, 4096)] +// #[case(1500, 3500)] +// #[case(2048, 4096)] +// #[case(2500, 4500)] +// #[case(3333, 4444)] +// #[crate::test] +// fn test_sliced_dict_for_bp_values_bp_codes( +// #[case] slice_start: usize, +// #[case] slice_end: usize, +// ) -> VortexResult<()> { +// let dict_reference = 1_000_000u32; +// let dict_residuals: Vec = (0..64).collect(); +// let dict_expected: Vec = dict_residuals.iter().map(|&r| r + dict_reference).collect(); +// let dict_size = dict_residuals.len(); +// +// let len = 5000; +// let codes: Vec = (0..len).map(|i| (i % dict_size) as u32).collect(); +// let all_decoded: Vec = codes.iter().map(|&c| dict_expected[c as usize]).collect(); +// +// // BitPack+FoR the dict values +// let dict_prim = PrimitiveArray::new(Buffer::from(dict_residuals), NonNullable); +// let dict_bp = BitPackedArray::encode(&dict_prim.into_array(), 6)?; +// let dict_for = FoRArray::try_new(dict_bp.into_array(), Scalar::from(dict_reference))?; +// +// // BitPack the codes +// let codes_prim = PrimitiveArray::new(Buffer::from(codes), NonNullable); +// let codes_bp = BitPackedArray::encode(&codes_prim.into_array(), 6)?; +// +// let dict = DictArray::try_new(codes_bp.into_array(), dict_for.into_array())?; +// +// let sliced = dict.into_array().slice(slice_start..slice_end)?; +// let expected: Vec = all_decoded[slice_start..slice_end].to_vec(); +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&sliced)?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0u32, 1u32, 100)] +// #[case(5u32, 3u32, 2048)] +// #[case(0u32, 1u32, 4096)] +// #[case(100u32, 7u32, 5000)] +// #[crate::test] +// fn test_sequence_unsigned( +// #[case] base: u32, +// #[case] multiplier: u32, +// #[case] len: usize, +// ) -> VortexResult<()> { +// use vortex::dtype::Nullability; +// use vortex::encodings::sequence::SequenceArray; +// +// let expected: Vec = (0..len).map(|i| base + (i as u32) * multiplier).collect(); +// +// let seq = SequenceArray::try_new_typed(base, multiplier, Nullability::NonNullable, len)?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&seq.into_array())?.materialize(&cuda_ctx)?; +// +// let actual = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// +// #[rstest] +// #[case(0i32, 1i32, 100)] +// #[case(-10i32, 3i32, 2048)] +// #[case(100i32, -1i32, 100)] +// #[case(-500i32, -7i32, 50)] +// #[case(0i32, 1i32, 5000)] +// #[crate::test] +// fn test_sequence_signed( +// #[case] base: i32, +// #[case] multiplier: i32, +// #[case] len: usize, +// ) -> VortexResult<()> { +// use vortex::dtype::Nullability; +// use vortex::encodings::sequence::SequenceArray; +// +// let expected: Vec = (0..len).map(|i| base + (i as i32) * multiplier).collect(); +// +// let seq = SequenceArray::try_new_typed(base, multiplier, Nullability::NonNullable, len)?; +// +// let cuda_ctx = CudaSession::create_execution_ctx(&VortexSession::empty())?; +// let plan = UnmaterializedPlan::new(&seq.into_array())?.materialize(&cuda_ctx)?; +// +// let actual_u32 = run_dynamic_dispatch_plan( +// &cuda_ctx, +// expected.len(), +// &plan.dispatch_plan, +// plan.shared_mem_bytes, +// )?; +// let actual: Vec = actual_u32.into_iter().map(|v| v as i32).collect(); +// assert_eq!(actual, expected); +// +// Ok(()) +// } +// } diff --git a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs index 89a179e307a..5ea80980cef 100644 --- a/vortex-cuda/src/dynamic_dispatch/plan_builder.rs +++ b/vortex-cuda/src/dynamic_dispatch/plan_builder.rs @@ -105,10 +105,7 @@ fn is_dyn_dispatch_compatible(array: &ArrayRef) -> bool { return false; } if id == BitPacked::ID { - if let Ok(a) = array.clone().try_into::() { - return a.patches().is_none(); - } - return false; + return true; } if id == Dict::ID { if let Ok(a) = array.clone().try_into::() { diff --git a/vortex-cuda/src/hybrid_dispatch/mod.rs b/vortex-cuda/src/hybrid_dispatch/mod.rs index 9f841649469..df53a6453cd 100644 --- a/vortex-cuda/src/hybrid_dispatch/mod.rs +++ b/vortex-cuda/src/hybrid_dispatch/mod.rs @@ -150,8 +150,8 @@ mod tests { use vortex::array::assert_arrays_eq; use vortex::array::validity::Validity::NonNullable; use vortex::buffer::Buffer; - use vortex::encodings::fastlanes::BitPackedArray; use vortex::encodings::fastlanes::FoRArray; + use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::error::VortexExpect; use vortex::error::VortexResult; use vortex::mask::Mask; @@ -167,12 +167,11 @@ mod tests { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); let values: Vec = (0..2048).map(|i| (i % 128) as u32).collect(); - let bp = BitPackedArray::encode( - &PrimitiveArray::new(Buffer::from(values), NonNullable).into_array(), - 7, - ) - .vortex_expect("bp"); - let arr = FoRArray::try_new(bp.into_array(), 1000u32.into()).vortex_expect("for"); + let bp = BitPackedEncoder::new(&PrimitiveArray::from_iter(values)) + .with_bit_width(7) + .pack()? + .into_array()?; + let arr = FoRArray::try_new(bp, 1000u32.into()).vortex_expect("for"); let cpu = arr.to_canonical()?.into_array(); let gpu = arr @@ -196,13 +195,12 @@ mod tests { let mut ctx = CudaSession::create_execution_ctx(&VortexSession::empty()).vortex_expect("ctx"); let encoded: Vec = (0i32..2048).map(|i| i % 500).collect(); - let bp = BitPackedArray::encode( - &PrimitiveArray::new(Buffer::from(encoded), NonNullable).into_array(), - 9, - ) - .vortex_expect("bp"); + let bp = BitPackedEncoder::new(&PrimitiveArray::from_iter(encoded)) + .with_bit_width(9) + .pack()? + .into_array()?; let alp = ALPArray::try_new( - FoRArray::try_new(bp.into_array(), 0i32.into()) + FoRArray::try_new(bp, 0i32.into()) .vortex_expect("for") .into_array(), Exponents { e: 0, f: 2 }, @@ -259,72 +257,73 @@ mod tests { Ok(()) } - /// Dict(values=ZstdBuffers(FoR(BP)), codes=FoR(BP)) — ZstdBuffers is - /// executed separately, then Dict+FoR+BP fuses with its output as a LOAD. - /// 3 launches: nvcomp + fused FoR+BP + fused LOAD+FoR+BP+DICT. - #[cfg(feature = "unstable_encodings")] - #[crate::test] - async fn test_partial_fusion() -> VortexResult<()> { - use vortex::array::arrays::DictArray; - use vortex::array::session::ArraySessionExt; - use vortex::encodings::fastlanes; - use vortex::encodings::zstd::ZstdBuffers; - use vortex::encodings::zstd::ZstdBuffersArray; - - let mut session = VortexSession::empty(); - fastlanes::initialize(&mut session); - session.arrays().register(ZstdBuffers); - let mut ctx = CudaSession::create_execution_ctx(&session).vortex_expect("ctx"); - - let num_values: u32 = 64; - let len: u32 = 2048; - - // values = ZstdBuffers(FoR(BitPacked)) - let vals = PrimitiveArray::new( - Buffer::from((0..num_values).collect::>()), - NonNullable, - ) - .into_array(); - let vals = FoRArray::try_new( - BitPackedArray::encode(&vals, 6) - .vortex_expect("bp") - .into_array(), - 0u32.into(), - ) - .vortex_expect("for"); - let vals = ZstdBuffersArray::compress(&vals.into_array(), 3).vortex_expect("zstd"); - - // codes = FoR(BitPacked) - let codes = PrimitiveArray::new( - Buffer::from((0..len).map(|i| i % num_values).collect::>()), - NonNullable, - ) - .into_array(); - let codes = FoRArray::try_new( - BitPackedArray::encode(&codes, 6) - .vortex_expect("bp") - .into_array(), - 0u32.into(), - ) - .vortex_expect("for"); - - let dict = DictArray::try_new(codes.into_array(), vals.into_array()).vortex_expect("dict"); - - let cpu = PrimitiveArray::new( - Buffer::from((0..len).map(|i| i % num_values).collect::>()), - NonNullable, - ) - .into_array(); - let gpu = dict - .into_array() - .execute_cuda(&mut ctx) - .await? - .into_host() - .await? - .into_array(); - assert_arrays_eq!(cpu, gpu); - Ok(()) - } + // TODO(aduffy): bring this back + // /// Dict(values=ZstdBuffers(FoR(BP)), codes=FoR(BP)) — ZstdBuffers is + // /// executed separately, then Dict+FoR+BP fuses with its output as a LOAD. + // /// 3 launches: nvcomp + fused FoR+BP + fused LOAD+FoR+BP+DICT. + // #[cfg(feature = "unstable_encodings")] + // #[crate::test] + // async fn test_partial_fusion() -> VortexResult<()> { + // use vortex::array::arrays::DictArray; + // use vortex::array::session::ArraySessionExt; + // use vortex::encodings::fastlanes; + // use vortex::encodings::zstd::ZstdBuffers; + // use vortex::encodings::zstd::ZstdBuffersArray; + // + // let mut session = VortexSession::empty(); + // fastlanes::initialize(&mut session); + // session.arrays().register(ZstdBuffers); + // let mut ctx = CudaSession::create_execution_ctx(&session).vortex_expect("ctx"); + // + // let num_values: u32 = 64; + // let len: u32 = 2048; + // + // // values = ZstdBuffers(FoR(BitPacked)) + // let vals = PrimitiveArray::new( + // Buffer::from((0..num_values).collect::>()), + // NonNullable, + // ) + // .into_array(); + // let vals = FoRArray::try_new( + // BitPackedArray::encode(&vals, 6) + // .vortex_expect("bp") + // .into_array(), + // 0u32.into(), + // ) + // .vortex_expect("for"); + // let vals = ZstdBuffersArray::compress(&vals.into_array(), 3).vortex_expect("zstd"); + // + // // codes = FoR(BitPacked) + // let codes = PrimitiveArray::new( + // Buffer::from((0..len).map(|i| i % num_values).collect::>()), + // NonNullable, + // ) + // .into_array(); + // let codes = FoRArray::try_new( + // BitPackedArray::encode(&codes, 6) + // .vortex_expect("bp") + // .into_array(), + // 0u32.into(), + // ) + // .vortex_expect("for"); + // + // let dict = DictArray::try_new(codes.into_array(), vals.into_array()).vortex_expect("dict"); + // + // let cpu = PrimitiveArray::new( + // Buffer::from((0..len).map(|i| i % num_values).collect::>()), + // NonNullable, + // ) + // .into_array(); + // let gpu = dict + // .into_array() + // .execute_cuda(&mut ctx) + // .await? + // .into_host() + // .await? + // .into_array(); + // assert_arrays_eq!(cpu, gpu); + // Ok(()) + // } /// Filter(FoR(BP), mask) — FoR+BP fuses via dyn dispatch, then CUB filters the result. #[crate::test] @@ -334,12 +333,14 @@ mod tests { let len = 2048u32; let data: Vec = (0..len).map(|i| i % 128).collect(); - let bp = BitPackedArray::encode( - &PrimitiveArray::new(Buffer::from(data.clone()), NonNullable).into_array(), - 7, - ) - .vortex_expect("bp"); - let for_arr = FoRArray::try_new(bp.into_array(), 100u32.into()).vortex_expect("for"); + let bp = BitPackedEncoder::new(&PrimitiveArray::new( + Buffer::from(data.clone()), + NonNullable, + )) + .with_bit_width(7) + .pack()? + .into_array()?; + let for_arr = FoRArray::try_new(bp, 100u32.into()).vortex_expect("for"); // Keep every other element. let mask = Mask::from_iter((0..len as usize).map(|i| i % 2 == 0)); diff --git a/vortex-cuda/src/kernel/encodings/for_.rs b/vortex-cuda/src/kernel/encodings/for_.rs index 82b4ebfac87..2520cf725af 100644 --- a/vortex-cuda/src/kernel/encodings/for_.rs +++ b/vortex-cuda/src/kernel/encodings/for_.rs @@ -127,7 +127,6 @@ mod tests { use vortex::array::validity::Validity::NonNullable; use vortex::buffer::Buffer; use vortex::dtype::NativePType; - use vortex::encodings::fastlanes::BitPackedArray; use vortex::encodings::fastlanes::FoRArray; use vortex::encodings::fastlanes::bitpack_compress::BitPackedEncoder; use vortex::error::VortexExpect; diff --git a/vortex/benches/single_encoding_throughput.rs b/vortex/benches/single_encoding_throughput.rs index 4776afa4a52..405b4996351 100644 --- a/vortex/benches/single_encoding_throughput.rs +++ b/vortex/benches/single_encoding_throughput.rs @@ -37,6 +37,7 @@ use vortex::encodings::zstd::ZstdArray; use vortex_array::VortexSessionExecute; use vortex_array::dtype::Nullability; use vortex_array::session::ArraySession; +use vortex_fastlanes::bitpack_compress::BitPackedEncoder; use vortex_sequence::SequenceArray; use vortex_session::VortexSession; @@ -114,15 +115,18 @@ fn bench_bitpacked_compress_u32(bencher: Bencher) { #[divan::bench(name = "bitpacked_decompress_u32")] fn bench_bitpacked_decompress_u32(bencher: Bencher) { - use vortex::encodings::fastlanes::bitpack_compress::bitpack_encode; - let (uint_array, ..) = setup_primitive_arrays(); let bit_width = 8; - let compressed = bitpack_encode(&uint_array, bit_width, None).unwrap(); + let compressed = BitPackedEncoder::new(&uint_array) + .with_bit_width(bit_width) + .pack() + .unwrap() + .into_array() + .unwrap(); with_byte_counter(bencher, NUM_VALUES * 4) .with_inputs(|| &compressed) - .bench_refs(|a| a.to_canonical()); + .bench_refs(|a| a.to_canonical().unwrap()); } #[divan::bench(name = "runend_compress_u32")]