Skip to content

Commit 822bd4a

Browse files
robert3005lwwmanning
authored andcommitted
Add compressor for constant nonnullable and all valid bool arrays (#7221)
Add compressor for constant bool arrays. This should make part of #7210 less unexpected --------- Signed-off-by: Robert Kruszewski <github@robertk.io> Signed-off-by: Will Manning <will@willmanning.io>
1 parent 69a61f1 commit 822bd4a

13 files changed

Lines changed: 406 additions & 1 deletion

File tree

vortex-btrblocks/public-api.lock

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ pub mod vortex_btrblocks
22

33
pub use vortex_btrblocks::ArrayAndStats
44

5+
pub use vortex_btrblocks::BoolStats
6+
57
pub use vortex_btrblocks::CascadingCompressor
68

79
pub use vortex_btrblocks::CompressorContext
@@ -28,6 +30,12 @@ pub use vortex_btrblocks::integer_dictionary_encode
2830

2931
pub mod vortex_btrblocks::schemes
3032

33+
pub mod vortex_btrblocks::schemes::bool
34+
35+
pub use vortex_btrblocks::schemes::bool::BoolConstantScheme
36+
37+
pub use vortex_btrblocks::schemes::bool::BoolStats
38+
3139
pub mod vortex_btrblocks::schemes::decimal
3240

3341
pub struct vortex_btrblocks::schemes::decimal::DecimalScheme

vortex-btrblocks/src/builder.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::CascadingCompressor;
1010
use crate::Scheme;
1111
use crate::SchemeExt;
1212
use crate::SchemeId;
13+
use crate::schemes::bool;
1314
use crate::schemes::decimal;
1415
use crate::schemes::float;
1516
use crate::schemes::integer;
@@ -22,6 +23,10 @@ use crate::schemes::temporal;
2223
/// This list is order-sensitive: the builder preserves this order when constructing
2324
/// the final scheme list, so that tie-breaking is deterministic.
2425
pub const ALL_SCHEMES: &[&dyn Scheme] = &[
26+
////////////////////////////////////////////////////////////////////////////////////////////////
27+
// Bool schemes.
28+
////////////////////////////////////////////////////////////////////////////////////////////////
29+
&bool::BoolConstantScheme,
2530
////////////////////////////////////////////////////////////////////////////////////////////////
2631
// Integer schemes.
2732
////////////////////////////////////////////////////////////////////////////////////////////////

vortex-btrblocks/src/canonical_compressor.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,14 @@ mod tests {
6262
use rstest::rstest;
6363
use vortex_array::DynArray;
6464
use vortex_array::IntoArray;
65+
use vortex_array::arrays::BoolArray;
66+
use vortex_array::arrays::Constant;
6567
use vortex_array::arrays::List;
6668
use vortex_array::arrays::ListView;
6769
use vortex_array::arrays::ListViewArray;
6870
use vortex_array::assert_arrays_eq;
6971
use vortex_array::validity::Validity;
72+
use vortex_buffer::BitBuffer;
7073
use vortex_buffer::buffer;
7174
use vortex_error::VortexResult;
7275

@@ -107,4 +110,61 @@ mod tests {
107110
assert_arrays_eq!(result, input);
108111
Ok(())
109112
}
113+
114+
#[test]
115+
fn test_constant_all_true() -> VortexResult<()> {
116+
let array = BoolArray::new(BitBuffer::from(vec![true; 100]), Validity::NonNullable);
117+
let btr = BtrBlocksCompressor::default();
118+
let compressed = btr.compress(&array.clone().into_array())?;
119+
assert!(compressed.is::<Constant>());
120+
assert_arrays_eq!(compressed, array);
121+
Ok(())
122+
}
123+
124+
#[test]
125+
fn test_constant_all_false() -> VortexResult<()> {
126+
let array = BoolArray::new(BitBuffer::from(vec![false; 100]), Validity::NonNullable);
127+
let btr = BtrBlocksCompressor::default();
128+
let compressed = btr.compress(&array.clone().into_array())?;
129+
assert!(compressed.is::<Constant>());
130+
assert_arrays_eq!(compressed, array);
131+
Ok(())
132+
}
133+
134+
#[test]
135+
fn test_nullable_all_valid_compressed() -> VortexResult<()> {
136+
let array = BoolArray::new(
137+
BitBuffer::from(vec![true; 100]),
138+
Validity::from(BitBuffer::from(vec![true; 100])),
139+
);
140+
let btr = BtrBlocksCompressor::default();
141+
let compressed = btr.compress(&array.clone().into_array())?;
142+
assert!(compressed.is::<Constant>());
143+
assert_arrays_eq!(compressed, array);
144+
Ok(())
145+
}
146+
147+
#[test]
148+
fn test_nullable_with_nulls_not_compressed() -> VortexResult<()> {
149+
let validity = Validity::from(BitBuffer::from_iter((0..100).map(|i| i % 3 != 0)));
150+
let array = BoolArray::new(BitBuffer::from(vec![true; 100]), validity);
151+
let btr = BtrBlocksCompressor::default();
152+
let compressed = btr.compress(&array.clone().into_array())?;
153+
assert!(!compressed.is::<Constant>());
154+
assert_arrays_eq!(compressed, array);
155+
Ok(())
156+
}
157+
158+
#[test]
159+
fn test_mixed_not_constant() -> VortexResult<()> {
160+
let array = BoolArray::new(
161+
BitBuffer::from(vec![true, false, true, false, true]),
162+
Validity::NonNullable,
163+
);
164+
let btr = BtrBlocksCompressor::default();
165+
let compressed = btr.compress(&array.clone().into_array())?;
166+
assert!(!compressed.is::<Constant>());
167+
assert_arrays_eq!(compressed, array);
168+
Ok(())
169+
}
110170
}

vortex-btrblocks/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ pub use vortex_compressor::scheme::SchemeExt;
7676
pub use vortex_compressor::scheme::SchemeId;
7777
pub use vortex_compressor::scheme::estimate_compression_ratio_with_sampling;
7878
pub use vortex_compressor::stats::ArrayAndStats;
79+
pub use vortex_compressor::stats::BoolStats;
7980
pub use vortex_compressor::stats::FloatStats;
8081
pub use vortex_compressor::stats::GenerateStatsOptions;
8182
pub use vortex_compressor::stats::IntegerStats;
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Bool compression schemes.
5+
6+
pub use vortex_compressor::builtins::BoolConstantScheme;
7+
pub use vortex_compressor::stats::BoolStats;

vortex-btrblocks/src/schemes/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
//! Compression scheme implementations.
55
6+
pub mod bool;
67
pub mod float;
78
pub mod integer;
89
pub mod string;

vortex-compressor/public-api.lock

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,46 @@ pub mod vortex_compressor
22

33
pub mod vortex_compressor::builtins
44

5+
pub struct vortex_compressor::builtins::BoolConstantScheme
6+
7+
impl core::clone::Clone for vortex_compressor::builtins::BoolConstantScheme
8+
9+
pub fn vortex_compressor::builtins::BoolConstantScheme::clone(&self) -> vortex_compressor::builtins::BoolConstantScheme
10+
11+
impl core::cmp::Eq for vortex_compressor::builtins::BoolConstantScheme
12+
13+
impl core::cmp::PartialEq for vortex_compressor::builtins::BoolConstantScheme
14+
15+
pub fn vortex_compressor::builtins::BoolConstantScheme::eq(&self, other: &vortex_compressor::builtins::BoolConstantScheme) -> bool
16+
17+
impl core::fmt::Debug for vortex_compressor::builtins::BoolConstantScheme
18+
19+
pub fn vortex_compressor::builtins::BoolConstantScheme::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
20+
21+
impl core::marker::Copy for vortex_compressor::builtins::BoolConstantScheme
22+
23+
impl core::marker::StructuralPartialEq for vortex_compressor::builtins::BoolConstantScheme
24+
25+
impl vortex_compressor::scheme::Scheme for vortex_compressor::builtins::BoolConstantScheme
26+
27+
pub fn vortex_compressor::builtins::BoolConstantScheme::ancestor_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::AncestorExclusion>
28+
29+
pub fn vortex_compressor::builtins::BoolConstantScheme::compress(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
30+
31+
pub fn vortex_compressor::builtins::BoolConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
32+
33+
pub fn vortex_compressor::builtins::BoolConstantScheme::detects_constant(&self) -> bool
34+
35+
pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
36+
37+
pub fn vortex_compressor::builtins::BoolConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
38+
39+
pub fn vortex_compressor::builtins::BoolConstantScheme::num_children(&self) -> usize
40+
41+
pub fn vortex_compressor::builtins::BoolConstantScheme::scheme_name(&self) -> &'static str
42+
43+
pub fn vortex_compressor::builtins::BoolConstantScheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions
44+
545
pub struct vortex_compressor::builtins::FloatConstantScheme
646

747
impl core::clone::Clone for vortex_compressor::builtins::FloatConstantScheme
@@ -246,6 +286,8 @@ pub fn vortex_compressor::builtins::float_dictionary_encode(stats: &vortex_compr
246286

247287
pub fn vortex_compressor::builtins::integer_dictionary_encode(stats: &vortex_compressor::stats::IntegerStats) -> vortex_array::arrays::dict::array::DictArray
248288

289+
pub fn vortex_compressor::builtins::is_bool(canonical: &vortex_array::canonical::Canonical) -> bool
290+
249291
pub fn vortex_compressor::builtins::is_float_primitive(canonical: &vortex_array::canonical::Canonical) -> bool
250292

251293
pub fn vortex_compressor::builtins::is_integer_primitive(canonical: &vortex_array::canonical::Canonical) -> bool
@@ -386,6 +428,26 @@ pub fn vortex_compressor::scheme::Scheme::scheme_name(&self) -> &'static str
386428

387429
pub fn vortex_compressor::scheme::Scheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions
388430

431+
impl vortex_compressor::scheme::Scheme for vortex_compressor::builtins::BoolConstantScheme
432+
433+
pub fn vortex_compressor::builtins::BoolConstantScheme::ancestor_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::AncestorExclusion>
434+
435+
pub fn vortex_compressor::builtins::BoolConstantScheme::compress(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, _ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<vortex_array::array::ArrayRef>
436+
437+
pub fn vortex_compressor::builtins::BoolConstantScheme::descendant_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::DescendantExclusion>
438+
439+
pub fn vortex_compressor::builtins::BoolConstantScheme::detects_constant(&self) -> bool
440+
441+
pub fn vortex_compressor::builtins::BoolConstantScheme::expected_compression_ratio(&self, _compressor: &vortex_compressor::CascadingCompressor, data: &mut vortex_compressor::stats::ArrayAndStats, ctx: vortex_compressor::ctx::CompressorContext) -> vortex_error::VortexResult<f64>
442+
443+
pub fn vortex_compressor::builtins::BoolConstantScheme::matches(&self, canonical: &vortex_array::canonical::Canonical) -> bool
444+
445+
pub fn vortex_compressor::builtins::BoolConstantScheme::num_children(&self) -> usize
446+
447+
pub fn vortex_compressor::builtins::BoolConstantScheme::scheme_name(&self) -> &'static str
448+
449+
pub fn vortex_compressor::builtins::BoolConstantScheme::stats_options(&self) -> vortex_compressor::stats::GenerateStatsOptions
450+
389451
impl vortex_compressor::scheme::Scheme for vortex_compressor::builtins::FloatConstantScheme
390452

391453
pub fn vortex_compressor::builtins::FloatConstantScheme::ancestor_exclusions(&self) -> alloc::vec::Vec<vortex_compressor::scheme::AncestorExclusion>
@@ -624,6 +686,8 @@ impl vortex_compressor::stats::ArrayAndStats
624686

625687
pub fn vortex_compressor::stats::ArrayAndStats::array(&self) -> &vortex_array::array::ArrayRef
626688

689+
pub fn vortex_compressor::stats::ArrayAndStats::bool_stats(&mut self) -> &vortex_compressor::stats::BoolStats
690+
627691
pub fn vortex_compressor::stats::ArrayAndStats::float_stats(&mut self) -> &vortex_compressor::stats::FloatStats
628692

629693
pub fn vortex_compressor::stats::ArrayAndStats::get_or_insert_with<T: 'static>(&mut self, f: impl core::ops::function::FnOnce() -> T) -> &T
@@ -636,6 +700,30 @@ pub fn vortex_compressor::stats::ArrayAndStats::new(array: vortex_array::array::
636700

637701
pub fn vortex_compressor::stats::ArrayAndStats::string_stats(&mut self) -> &vortex_compressor::stats::StringStats
638702

703+
pub struct vortex_compressor::stats::BoolStats
704+
705+
impl vortex_compressor::stats::BoolStats
706+
707+
pub fn vortex_compressor::stats::BoolStats::generate(input: &vortex_array::arrays::bool::array::BoolArray) -> vortex_error::VortexResult<Self>
708+
709+
pub fn vortex_compressor::stats::BoolStats::is_constant(&self) -> bool
710+
711+
pub fn vortex_compressor::stats::BoolStats::null_count(&self) -> u32
712+
713+
pub fn vortex_compressor::stats::BoolStats::source(&self) -> &vortex_array::arrays::bool::array::BoolArray
714+
715+
pub fn vortex_compressor::stats::BoolStats::true_count(&self) -> u32
716+
717+
pub fn vortex_compressor::stats::BoolStats::value_count(&self) -> u32
718+
719+
impl core::clone::Clone for vortex_compressor::stats::BoolStats
720+
721+
pub fn vortex_compressor::stats::BoolStats::clone(&self) -> vortex_compressor::stats::BoolStats
722+
723+
impl core::fmt::Debug for vortex_compressor::stats::BoolStats
724+
725+
pub fn vortex_compressor::stats::BoolStats::fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result
726+
639727
pub struct vortex_compressor::stats::FloatDistinctInfo<T>
640728

641729
impl<T> vortex_compressor::stats::FloatDistinctInfo<T>

vortex-compressor/src/builtins/constant.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use vortex_array::scalar::Scalar;
1414
use vortex_array::vtable::ValidityHelper;
1515
use vortex_error::VortexResult;
1616

17+
use super::is_bool;
1718
use super::is_float_primitive;
1819
use super::is_integer_primitive;
1920
use super::is_utf8_string;
@@ -22,6 +23,58 @@ use crate::ctx::CompressorContext;
2223
use crate::scheme::Scheme;
2324
use crate::stats::ArrayAndStats;
2425

26+
/// Constant encoding for bool arrays where all valid values are the same.
27+
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
28+
pub struct BoolConstantScheme;
29+
30+
impl Scheme for BoolConstantScheme {
31+
fn scheme_name(&self) -> &'static str {
32+
"vortex.bool.constant"
33+
}
34+
35+
fn matches(&self, canonical: &Canonical) -> bool {
36+
is_bool(canonical)
37+
}
38+
39+
fn detects_constant(&self) -> bool {
40+
true
41+
}
42+
43+
fn expected_compression_ratio(
44+
&self,
45+
_compressor: &CascadingCompressor,
46+
data: &mut ArrayAndStats,
47+
ctx: CompressorContext,
48+
) -> VortexResult<f64> {
49+
if ctx.is_sample() {
50+
return Ok(0.0);
51+
}
52+
53+
let stats = data.bool_stats();
54+
55+
// Only compress non-nullable or all-valid nullable arrays.
56+
if stats.source().dtype().is_nullable() && stats.null_count() > 0 {
57+
return Ok(0.0);
58+
}
59+
60+
if !stats.is_constant() {
61+
return Ok(0.0);
62+
}
63+
64+
Ok(stats.value_count() as f64)
65+
}
66+
67+
fn compress(
68+
&self,
69+
_compressor: &CascadingCompressor,
70+
data: &mut ArrayAndStats,
71+
_ctx: CompressorContext,
72+
) -> VortexResult<ArrayRef> {
73+
let stats = data.bool_stats();
74+
Ok(ConstantArray::new(stats.source().scalar_at(0)?, stats.source().len()).into_array())
75+
}
76+
}
77+
2578
/// Constant encoding for integer arrays with a single distinct value.
2679
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
2780
pub struct IntConstantScheme;

vortex-compressor/src/builtins/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//! [`DictArray`]: vortex_array::arrays::DictArray
1111
//! [`MaskedArray`]: vortex_array::arrays::MaskedArray
1212
13+
pub use constant::BoolConstantScheme;
1314
pub use constant::FloatConstantScheme;
1415
pub use constant::IntConstantScheme;
1516
pub use constant::StringConstantScheme;
@@ -26,6 +27,11 @@ use vortex_array::Canonical;
2627
use vortex_array::dtype::DType;
2728
use vortex_array::dtype::Nullability;
2829

30+
/// Returns `true` if the canonical array is a bool type.
31+
pub fn is_bool(canonical: &Canonical) -> bool {
32+
matches!(canonical, Canonical::Bool(_))
33+
}
34+
2935
/// Returns `true` if the canonical array is a primitive with an integer ptype.
3036
pub fn is_integer_primitive(canonical: &Canonical) -> bool {
3137
matches!(canonical, Canonical::Primitive(p) if p.ptype().is_int())

vortex-compressor/src/compressor.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ impl CascadingCompressor {
172172
) -> VortexResult<ArrayRef> {
173173
match array {
174174
Canonical::Null(null_array) => Ok(null_array.into_array()),
175-
Canonical::Bool(bool_array) => Ok(bool_array.into_array()),
175+
Canonical::Bool(bool_array) => {
176+
self.choose_and_compress(Canonical::Bool(bool_array), ctx)
177+
}
176178
Canonical::Primitive(primitive) => {
177179
self.choose_and_compress(Canonical::Primitive(primitive), ctx)
178180
}

0 commit comments

Comments
 (0)