@@ -11,6 +11,7 @@ use vortex_array::Canonical;
1111use vortex_array:: IntoArray ;
1212use vortex_array:: ToCanonical ;
1313use vortex_array:: dtype:: PType ;
14+ use vortex_compressor:: estimate:: CompressionEstimate ;
1415use vortex_compressor:: scheme:: ChildSelection ;
1516use vortex_compressor:: scheme:: DescendantExclusion ;
1617use vortex_error:: VortexResult ;
@@ -25,7 +26,6 @@ use crate::CompressorContext;
2526use crate :: Scheme ;
2627use crate :: SchemeExt ;
2728use crate :: compress_patches;
28- use crate :: estimate_compression_ratio_with_sampling;
2929
3030/// ALP (Adaptive Lossless floating-Point) encoding.
3131#[ derive( Debug , Copy , Clone , PartialEq , Eq ) ]
@@ -70,22 +70,21 @@ impl Scheme for ALPScheme {
7070
7171 fn expected_compression_ratio (
7272 & self ,
73- compressor : & CascadingCompressor ,
7473 data : & mut ArrayAndStats ,
7574 ctx : CompressorContext ,
76- ) -> VortexResult < f64 > {
75+ ) -> CompressionEstimate {
7776 // ALP encodes floats as integers. Without integer compression afterward, the encoded ints
7877 // are the same size.
7978 if ctx. finished_cascading ( ) {
80- return Ok ( 0.0 ) ;
79+ return CompressionEstimate :: Skip ;
8180 }
8281
8382 // We don't support ALP for f16.
84- if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
85- return Ok ( 0.0 ) ;
83+ if data. array_as_primitive ( ) . ptype ( ) == PType :: F16 {
84+ return CompressionEstimate :: Skip ;
8685 }
8786
88- estimate_compression_ratio_with_sampling ( self , compressor , data . array ( ) , ctx )
87+ CompressionEstimate :: Sample
8988 }
9089
9190 fn compress (
@@ -94,9 +93,7 @@ impl Scheme for ALPScheme {
9493 data : & mut ArrayAndStats ,
9594 ctx : CompressorContext ,
9695 ) -> VortexResult < ArrayRef > {
97- let stats = data. float_stats ( ) ;
98-
99- let alp_encoded = alp_encode ( & stats. source ( ) . to_primitive ( ) , None ) ?;
96+ let alp_encoded = alp_encode ( data. array_as_primitive ( ) , None ) ?;
10097
10198 // Compress the ALP ints.
10299 let compressed_alp_ints =
@@ -121,15 +118,15 @@ impl Scheme for ALPRDScheme {
121118
122119 fn expected_compression_ratio (
123120 & self ,
124- compressor : & CascadingCompressor ,
125121 data : & mut ArrayAndStats ,
126- ctx : CompressorContext ,
127- ) -> VortexResult < f64 > {
128- if data. float_stats ( ) . source ( ) . ptype ( ) == PType :: F16 {
129- return Ok ( 0.0 ) ;
122+ _ctx : CompressorContext ,
123+ ) -> CompressionEstimate {
124+ // We don't support ALPRD for f16.
125+ if data. array_as_primitive ( ) . ptype ( ) == PType :: F16 {
126+ return CompressionEstimate :: Skip ;
130127 }
131128
132- estimate_compression_ratio_with_sampling ( self , compressor , data . array ( ) , ctx )
129+ CompressionEstimate :: Sample
133130 }
134131
135132 fn compress (
@@ -138,15 +135,15 @@ impl Scheme for ALPRDScheme {
138135 data : & mut ArrayAndStats ,
139136 _ctx : CompressorContext ,
140137 ) -> VortexResult < ArrayRef > {
141- let stats = data. float_stats ( ) ;
138+ let primitive_array = data. array_as_primitive ( ) ;
142139
143- let encoder = match stats . source ( ) . ptype ( ) {
144- PType :: F32 => RDEncoder :: new ( stats . source ( ) . as_slice :: < f32 > ( ) ) ,
145- PType :: F64 => RDEncoder :: new ( stats . source ( ) . as_slice :: < f64 > ( ) ) ,
140+ let encoder = match primitive_array . ptype ( ) {
141+ PType :: F32 => RDEncoder :: new ( primitive_array . as_slice :: < f32 > ( ) ) ,
142+ PType :: F64 => RDEncoder :: new ( primitive_array . as_slice :: < f64 > ( ) ) ,
146143 ptype => vortex_panic ! ( "cannot ALPRD compress ptype {ptype}" ) ,
147144 } ;
148145
149- let mut alp_rd = encoder. encode ( stats . source ( ) ) ;
146+ let mut alp_rd = encoder. encode ( primitive_array ) ;
150147
151148 let patches = alp_rd
152149 . left_parts_patches ( )
@@ -182,24 +179,19 @@ impl Scheme for NullDominatedSparseScheme {
182179
183180 fn expected_compression_ratio (
184181 & self ,
185- _compressor : & CascadingCompressor ,
186182 data : & mut ArrayAndStats ,
187183 _ctx : CompressorContext ,
188- ) -> VortexResult < f64 > {
184+ ) -> CompressionEstimate {
185+ let len = data. array_len ( ) as f64 ;
189186 let stats = data. float_stats ( ) ;
190187
191- if stats. value_count ( ) == 0 {
192- // All nulls should use ConstantScheme instead of this.
193- return Ok ( 0.0 ) ;
194- }
195-
196188 // If the majority (90%) of values is null, this will compress well.
197- if stats. null_count ( ) as f64 / stats . source ( ) . len ( ) as f64 > 0.9 {
198- return Ok ( stats . source ( ) . len ( ) as f64 / stats. value_count ( ) as f64 ) ;
189+ if stats. null_count ( ) as f64 / len > 0.9 {
190+ return CompressionEstimate :: Ratio ( len / stats. value_count ( ) as f64 ) ;
199191 }
200192
201193 // Otherwise we don't go this route.
202- Ok ( 0.0 )
194+ CompressionEstimate :: Skip
203195 }
204196
205197 fn compress (
@@ -208,10 +200,8 @@ impl Scheme for NullDominatedSparseScheme {
208200 data : & mut ArrayAndStats ,
209201 ctx : CompressorContext ,
210202 ) -> VortexResult < ArrayRef > {
211- let stats = data. float_stats ( ) ;
212-
213203 // We pass None as we only run this pathway for NULL-dominated float arrays.
214- let sparse_encoded = SparseArray :: encode ( & stats . source ( ) . clone ( ) . into_array ( ) , None ) ?;
204+ let sparse_encoded = SparseArray :: encode ( data . array ( ) , None ) ?;
215205
216206 if let Some ( sparse) = sparse_encoded. as_opt :: < Sparse > ( ) {
217207 let indices = sparse. patches ( ) . indices ( ) . to_primitive ( ) . narrow ( ) ?;
@@ -241,15 +231,22 @@ impl Scheme for PcoScheme {
241231 is_float_primitive ( canonical)
242232 }
243233
234+ fn expected_compression_ratio (
235+ & self ,
236+ _data : & mut ArrayAndStats ,
237+ _ctx : CompressorContext ,
238+ ) -> CompressionEstimate {
239+ CompressionEstimate :: Sample
240+ }
241+
244242 fn compress (
245243 & self ,
246244 _compressor : & CascadingCompressor ,
247245 data : & mut ArrayAndStats ,
248246 _ctx : CompressorContext ,
249247 ) -> VortexResult < ArrayRef > {
250- let stats = data. float_stats ( ) ;
251248 Ok ( vortex_pco:: PcoArray :: from_primitive (
252- stats . source ( ) ,
249+ data . array_as_primitive ( ) ,
253250 pco:: DEFAULT_COMPRESSION_LEVEL ,
254251 8192 ,
255252 ) ?
0 commit comments