1515use std:: collections:: { HashMap , HashSet } ;
1616use std:: path:: Path ;
1717use std:: str:: FromStr ;
18- use std:: sync:: { Arc , LazyLock } ;
18+ use std:: sync:: Arc ;
1919
2020use anyhow:: Context ;
2121use futures:: future;
@@ -27,8 +27,8 @@ use quickwit_common::uri::Uri;
2727use quickwit_metastore:: SplitMetadata ;
2828use quickwit_proto:: metastore:: MetastoreServiceClient ;
2929use quickwit_proto:: search:: {
30- LeafListFieldsRequest , ListFields , ListFieldsEntryResponse , ListFieldsRequest ,
31- ListFieldsResponse , SplitIdAndFooterOffsets , deserialize_split_fields,
30+ LeafListFieldsRequest , ListFieldSortOrder , ListFields , ListFieldsEntryResponse ,
31+ ListFieldsRequest , ListFieldsResponse , SplitIdAndFooterOffsets , deserialize_split_fields,
3232} ;
3333use quickwit_proto:: types:: { IndexId , IndexUid } ;
3434use quickwit_storage:: Storage ;
@@ -41,16 +41,6 @@ use crate::{
4141 search_thread_pool,
4242} ;
4343
44- /// QW_FIELD_LIST_SIZE_LIMIT defines a hard limit on the number of fields that
45- /// can be returned (error otherwise).
46- ///
47- /// Having many fields can happen when a user is creating fields dynamically in
48- /// a JSON type with random field names. This leads to huge memory consumption
49- /// when building the response. This is a workaround until a way is found to
50- /// prune the long tail of rare fields.
51- static FIELD_LIST_SIZE_LIMIT : LazyLock < usize > =
52- LazyLock :: new ( || quickwit_common:: get_from_env ( "QW_FIELD_LIST_SIZE_LIMIT" , 100_000 , false ) ) ;
53-
5444const DYNAMIC_FIELD_PREFIX : & str = "_dynamic." ;
5545
5646/// Get the list of fields in the given split.
@@ -231,9 +221,8 @@ struct ListFieldMerger<I: Iterator<Item = ListFieldsEntryResponse>> {
231221
232222impl < I : Iterator < Item = ListFieldsEntryResponse > > ListFieldMerger < I > {
233223 fn new ( iterators : impl Iterator < Item = I > ) -> Self {
234- //TODO: sort
235224 let cmp_fn: fn ( & ListFieldsEntryResponse , & ListFieldsEntryResponse ) -> bool =
236- |a, b| ( & a . field_name , a . field_type ) <= ( & b . field_name , b . field_type ) ;
225+ |a, b| field_order ( a , b ) == std :: cmp :: Ordering :: Less ;
237226
238227 let merged = iterators. kmerge_by ( cmp_fn) ;
239228 Self {
@@ -396,6 +385,34 @@ pub struct IndexMetasForLeafSearch {
396385 pub index_uri : Uri ,
397386}
398387
388+ /// Builds a comparison function for sorting list fields entries.
389+ ///
390+ /// When multiple sort orders are provided, they are applied in sequence (primary, secondary, etc.).
391+ /// A final tiebreaker on `(field_name, field_type)` is always appended to ensure deterministic
392+ /// ordering.
393+ fn sort_list_fields ( fields : & mut [ ListFieldsEntryResponse ] , sort_orders : & [ i32 ] ) {
394+ fields. sort_unstable_by ( |left, right| {
395+ let mut ordering = std:: cmp:: Ordering :: Equal ;
396+ for sort_order in sort_orders {
397+ ordering = ordering. then_with ( || {
398+ match ListFieldSortOrder :: try_from ( * sort_order) {
399+ Ok ( ListFieldSortOrder :: Alphabetical ) => left. field_name . cmp ( & right. field_name ) ,
400+ Ok ( ListFieldSortOrder :: Occurrence ) => {
401+ // Descending: more occurrences first
402+ right. index_ids . len ( ) . cmp ( & left. index_ids . len ( ) )
403+ }
404+ Ok ( ListFieldSortOrder :: Type ) => left. field_type . cmp ( & right. field_type ) ,
405+ Err ( _) => std:: cmp:: Ordering :: Equal ,
406+ }
407+ } ) ;
408+ }
409+ // Tiebreaker: deterministic ordering by (field_name, field_type)
410+ ordering
411+ . then_with ( || left. field_name . cmp ( & right. field_name ) )
412+ . then_with ( || left. field_type . cmp ( & right. field_type ) )
413+ } ) ;
414+ }
415+
399416/// Performs a distributed list fields request.
400417/// 1. Sends leaf requests over gRPC to multiple leaf nodes.
401418/// 2. Merges the search results.
@@ -460,19 +477,15 @@ pub async fn root_list_fields(
460477 let fields_iter = leaf_list_fields_protos
461478 . into_iter ( )
462479 . map ( |leaf_list_fields_proto| leaf_list_fields_proto. fields . into_iter ( ) ) ;
463- let mut fields_iter = ListFieldMerger :: new ( fields_iter) ;
464- let skipped = fields_iter
465- . by_ref ( )
466- . take ( list_fields_req. start_offset as usize )
467- . count ( ) ;
468-
469- let fields = fields_iter
470- . by_ref ( )
471- . take ( list_fields_req. max_fields as usize )
472- . collect :: < Vec < _ > > ( ) ;
480+ let mut all_fields: Vec < ListFieldsEntryResponse > =
481+ ListFieldMerger :: new ( fields_iter) . collect ( ) ;
482+
483+ sort_list_fields ( & mut all_fields, & list_fields_req. sort_order ) ;
473484
474- let remaining = fields_iter. count ( ) ;
475- let num_fields = ( skipped + fields. len ( ) + remaining) as u64 ;
485+ let num_fields = all_fields. len ( ) as u64 ;
486+ let start = list_fields_req. start_offset . min ( num_fields) as usize ;
487+ let end = ( start as u64 + list_fields_req. max_fields ) . min ( num_fields) as usize ;
488+ let fields = all_fields[ start..end] . to_vec ( ) ;
476489
477490 ( fields, num_fields)
478491 } )
0 commit comments