Skip to content

Commit c2fc4fd

Browse files
committed
add a LazyPatchedArray
this lets us deserialize BPArray with Patches without eagerly transposing Signed-off-by: Andrew Duffy <andrew@a10y.dev> update VTable::build to return ArrayRef This lets us return something other than the original array encoding at read time. Currently we'll want this so that BitPacked::build returns a LazyPatched, but this is applicable for pretty much any back-compat preserving encoding rewrites. Signed-off-by: Andrew Duffy <andrew@a10y.dev> Remove patches from BitPackedArray removes both the patches field as well as all code for handling patches. this is safe to do now that we have updated the VTable build function to always read methods. note that we need to leave the metadata as-is. Signed-off-by: Andrew Duffy <andrew@a10y.dev> more Signed-off-by: Andrew Duffy <andrew@a10y.dev>
1 parent 5d0d627 commit c2fc4fd

92 files changed

Lines changed: 2500 additions & 1597 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

encodings/alp/public-api.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pub fn vortex_alp::ALP::buffer(_array: vortex_array::array::view::ArrayView<'_,
4242

4343
pub fn vortex_alp::ALP::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, _idx: usize) -> core::option::Option<alloc::string::String>
4444

45-
pub fn vortex_alp::ALP::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_alp::ALPData>
45+
pub fn vortex_alp::ALP::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
4646

4747
pub fn vortex_alp::ALP::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
4848

@@ -212,7 +212,7 @@ pub fn vortex_alp::ALPRD::buffer(_array: vortex_array::array::view::ArrayView<'_
212212

213213
pub fn vortex_alp::ALPRD::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, _idx: usize) -> core::option::Option<alloc::string::String>
214214

215-
pub fn vortex_alp::ALPRD::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_alp::ALPRDData>
215+
pub fn vortex_alp::ALPRD::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
216216

217217
pub fn vortex_alp::ALPRD::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
218218

encodings/alp/src/alp/array.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ impl VTable for ALP {
131131
metadata: &Self::Metadata,
132132
_buffers: &[BufferHandle],
133133
children: &dyn ArrayChildren,
134-
) -> VortexResult<ALPData> {
134+
) -> VortexResult<ArrayRef> {
135135
let encoded_ptype = match &dtype {
136136
DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
137137
DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
@@ -153,14 +153,15 @@ impl VTable for ALP {
153153
})
154154
.transpose()?;
155155

156-
ALPData::try_new(
156+
Ok(ALPData::try_new(
157157
encoded,
158158
Exponents {
159159
e: u8::try_from(metadata.exp_e)?,
160160
f: u8::try_from(metadata.exp_f)?,
161161
},
162162
patches,
163-
)
163+
)?
164+
.into_array())
164165
}
165166

166167
fn slots(array: ArrayView<'_, Self>) -> &[Option<ArrayRef>] {

encodings/alp/src/alp_rd/array.rs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ impl VTable for ALPRD {
164164
metadata: &Self::Metadata,
165165
_buffers: &[BufferHandle],
166166
children: &dyn ArrayChildren,
167-
) -> VortexResult<ALPRDData> {
167+
) -> VortexResult<ArrayRef> {
168168
if children.len() < 2 {
169169
vortex_bail!(
170170
"Expected at least 2 children for ALPRD encoding, found {}",
@@ -212,7 +212,7 @@ impl VTable for ALPRD {
212212
})
213213
.transpose()?;
214214

215-
ALPRDData::try_new(
215+
Ok(ALPRDData::try_new(
216216
dtype.clone(),
217217
left_parts,
218218
left_parts_dictionary,
@@ -224,7 +224,8 @@ impl VTable for ALPRD {
224224
)
225225
})?,
226226
left_parts_patches,
227-
)
227+
)?
228+
.into_array())
228229
}
229230

230231
fn slots(array: ArrayView<'_, Self>) -> &[Option<ArrayRef>] {

encodings/alp/src/alp_rd/mod.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use vortex_array::ExecutionCtx;
88
use vortex_array::IntoArray;
99
use vortex_array::patches::Patches;
1010
use vortex_array::validity::Validity;
11+
use vortex_fastlanes::bitpack_compress::BitPackedEncoder;
1112
use vortex_fastlanes::bitpack_compress::bitpack_encode_unchecked;
1213

1314
mod array;
@@ -228,20 +229,19 @@ impl RDEncoder {
228229

229230
// Bit-pack down the encoded left-parts array that have been dictionary encoded.
230231
let primitive_left = PrimitiveArray::new(left_parts, array.validity());
231-
// SAFETY: by construction, all values in left_parts can be packed to left_bit_width.
232-
let packed_left = unsafe {
233-
bitpack_encode_unchecked(primitive_left, left_bit_width as _)
234-
.vortex_expect("bitpack_encode_unchecked should succeed for left parts")
235-
.into_array()
236-
};
237-
232+
let packed_left = BitPackedEncoder::new(&primitive_left)
233+
.with_bit_width(left_bit_width as _)
234+
.pack()
235+
.vortex_expect("bitpack_encode_unchecked should succeed for left parts")
236+
.into_array()
237+
.vortex_expect("Packed::into_array");
238238
let primitive_right = PrimitiveArray::new(right_parts, Validity::NonNullable);
239-
// SAFETY: by construction, all values in right_parts are right_bit_width + leading zeros.
240-
let packed_right = unsafe {
241-
bitpack_encode_unchecked(primitive_right, self.right_bit_width as _)
242-
.vortex_expect("bitpack_encode_unchecked should succeed for right parts")
243-
.into_array()
244-
};
239+
let packed_right = BitPackedEncoder::new(&primitive_right)
240+
.with_bit_width(self.right_bit_width as _)
241+
.pack()
242+
.vortex_expect("bitpack_encode_unchecked should succeed for right parts")
243+
.into_array()
244+
.vortex_expect("Packed::into_array");
245245

246246
// Bit-pack the dict-encoded left-parts
247247
// Bit-pack the right-parts

encodings/bytebool/public-api.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pub fn vortex_bytebool::ByteBool::buffer(array: vortex_array::array::view::Array
3434

3535
pub fn vortex_bytebool::ByteBool::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> core::option::Option<alloc::string::String>
3636

37-
pub fn vortex_bytebool::ByteBool::build(dtype: &vortex_array::dtype::DType, len: usize, _metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_bytebool::ByteBoolData>
37+
pub fn vortex_bytebool::ByteBool::build(dtype: &vortex_array::dtype::DType, len: usize, _metadata: &Self::Metadata, buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
3838

3939
pub fn vortex_bytebool::ByteBool::deserialize(_bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
4040

encodings/bytebool/src/array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ impl VTable for ByteBool {
120120
_metadata: &Self::Metadata,
121121
buffers: &[BufferHandle],
122122
children: &dyn ArrayChildren,
123-
) -> VortexResult<ByteBoolData> {
123+
) -> VortexResult<ArrayRef> {
124124
let validity = if children.is_empty() {
125125
Validity::from(dtype.nullability())
126126
} else if children.len() == 1 {
@@ -135,7 +135,7 @@ impl VTable for ByteBool {
135135
}
136136
let buffer = buffers[0].clone();
137137

138-
Ok(ByteBoolData::new(buffer, validity))
138+
Ok(ByteBoolData::new(buffer, validity).into_array())
139139
}
140140

141141
fn slots(array: ArrayView<'_, Self>) -> &[Option<ArrayRef>] {

encodings/datetime-parts/public-api.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ pub fn vortex_datetime_parts::DateTimeParts::buffer(_array: vortex_array::array:
3636

3737
pub fn vortex_datetime_parts::DateTimeParts::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> core::option::Option<alloc::string::String>
3838

39-
pub fn vortex_datetime_parts::DateTimeParts::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_datetime_parts::DateTimePartsData>
39+
pub fn vortex_datetime_parts::DateTimeParts::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
4040

4141
pub fn vortex_datetime_parts::DateTimeParts::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
4242

encodings/datetime-parts/src/array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ impl VTable for DateTimeParts {
160160
metadata: &Self::Metadata,
161161
_buffers: &[BufferHandle],
162162
children: &dyn ArrayChildren,
163-
) -> VortexResult<DateTimePartsData> {
163+
) -> VortexResult<ArrayRef> {
164164
if children.len() != 3 {
165165
vortex_bail!(
166166
"Expected 3 children for datetime-parts encoding, found {}",
@@ -184,7 +184,7 @@ impl VTable for DateTimeParts {
184184
len,
185185
)?;
186186

187-
DateTimePartsData::try_new(dtype.clone(), days, seconds, subseconds)
187+
Ok(DateTimePartsData::try_new(dtype.clone(), days, seconds, subseconds)?.into_array())
188188
}
189189

190190
fn slots(array: ArrayView<'_, Self>) -> &[Option<ArrayRef>] {

encodings/decimal-byte-parts/public-api.lock

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pub fn vortex_decimal_byte_parts::DecimalByteParts::buffer(_array: vortex_array:
3434

3535
pub fn vortex_decimal_byte_parts::DecimalByteParts::buffer_name(_array: vortex_array::array::view::ArrayView<'_, Self>, idx: usize) -> core::option::Option<alloc::string::String>
3636

37-
pub fn vortex_decimal_byte_parts::DecimalByteParts::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_decimal_byte_parts::DecimalBytePartsData>
37+
pub fn vortex_decimal_byte_parts::DecimalByteParts::build(dtype: &vortex_array::dtype::DType, len: usize, metadata: &Self::Metadata, _buffers: &[vortex_array::buffer::BufferHandle], children: &dyn vortex_array::serde::ArrayChildren) -> vortex_error::VortexResult<vortex_array::array::erased::ArrayRef>
3838

3939
pub fn vortex_decimal_byte_parts::DecimalByteParts::deserialize(bytes: &[u8], _dtype: &vortex_array::dtype::DType, _len: usize, _buffers: &[vortex_array::buffer::BufferHandle], _session: &vortex_session::VortexSession) -> vortex_error::VortexResult<Self::Metadata>
4040

encodings/decimal-byte-parts/src/decimal_byte_parts/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ impl VTable for DecimalByteParts {
137137
metadata: &Self::Metadata,
138138
_buffers: &[BufferHandle],
139139
children: &dyn ArrayChildren,
140-
) -> VortexResult<DecimalBytePartsData> {
140+
) -> VortexResult<ArrayRef> {
141141
let Some(decimal_dtype) = dtype.as_decimal_opt() else {
142142
vortex_bail!("decoding decimal but given non decimal dtype {}", dtype)
143143
};
@@ -151,7 +151,7 @@ impl VTable for DecimalByteParts {
151151
"lower_part_count > 0 not currently supported"
152152
);
153153

154-
DecimalBytePartsData::try_new(msp, *decimal_dtype)
154+
Ok(DecimalBytePartsData::try_new(msp, *decimal_dtype)?.into_array())
155155
}
156156

157157
fn slots(array: ArrayView<'_, Self>) -> &[Option<ArrayRef>] {

0 commit comments

Comments
 (0)