Skip to content

Commit 79c11e2

Browse files
committed
fix: minor things
1 parent 19323cb commit 79c11e2

4 files changed

Lines changed: 109 additions & 40 deletions

File tree

quickwit/quickwit-proto/protos/quickwit/search.proto

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ message ListFieldsRequest {
125125
optional int64 start_timestamp = 3;
126126
optional int64 end_timestamp = 4;
127127

128-
uint64 max_hits = 5;
128+
uint64 max_fields = 5;
129129
uint64 start_offset = 6;
130-
repeated SortField sort_fields = 7;
130+
repeated ListFieldSortOrder sort_order = 7;
131131

132132
// Control if the request will fail if split_ids contains a split that does not exist.
133133
// optional bool fail_on_missing_index = 6;
@@ -170,6 +170,12 @@ message ListFieldsEntryResponse {
170170
repeated string non_aggregatable_index_ids = 7;
171171
}
172172

173+
enum ListFieldSortOrder {
174+
ALPHABETICAL = 0;
175+
OCCURRENCE = 1;
176+
TYPE = 2;
177+
}
178+
173179
enum ListFieldType {
174180
STR = 0;
175181
U64 = 1;

quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs

Lines changed: 34 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

quickwit/quickwit-search/src/list_fields.rs

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
use std::collections::{HashMap, HashSet};
1616
use std::path::Path;
1717
use std::str::FromStr;
18-
use std::sync::{Arc, LazyLock};
18+
use std::sync::Arc;
1919

2020
use anyhow::Context;
2121
use futures::future;
@@ -27,8 +27,8 @@ use quickwit_common::uri::Uri;
2727
use quickwit_metastore::SplitMetadata;
2828
use quickwit_proto::metastore::MetastoreServiceClient;
2929
use quickwit_proto::search::{
30-
LeafListFieldsRequest, ListFields, ListFieldsEntryResponse, ListFieldsRequest,
31-
ListFieldsResponse, SplitIdAndFooterOffsets, deserialize_split_fields,
30+
LeafListFieldsRequest, ListFieldSortOrder, ListFields, ListFieldsEntryResponse,
31+
ListFieldsRequest, ListFieldsResponse, SplitIdAndFooterOffsets, deserialize_split_fields,
3232
};
3333
use quickwit_proto::types::{IndexId, IndexUid};
3434
use quickwit_storage::Storage;
@@ -41,16 +41,6 @@ use crate::{
4141
search_thread_pool,
4242
};
4343

44-
/// QW_FIELD_LIST_SIZE_LIMIT defines a hard limit on the number of fields that
45-
/// can be returned (error otherwise).
46-
///
47-
/// Having many fields can happen when a user is creating fields dynamically in
48-
/// a JSON type with random field names. This leads to huge memory consumption
49-
/// when building the response. This is a workaround until a way is found to
50-
/// prune the long tail of rare fields.
51-
static FIELD_LIST_SIZE_LIMIT: LazyLock<usize> =
52-
LazyLock::new(|| quickwit_common::get_from_env("QW_FIELD_LIST_SIZE_LIMIT", 100_000, false));
53-
5444
const DYNAMIC_FIELD_PREFIX: &str = "_dynamic.";
5545

5646
/// Get the list of fields in the given split.
@@ -231,9 +221,8 @@ struct ListFieldMerger<I: Iterator<Item = ListFieldsEntryResponse>> {
231221

232222
impl<I: Iterator<Item = ListFieldsEntryResponse>> ListFieldMerger<I> {
233223
fn new(iterators: impl Iterator<Item = I>) -> Self {
234-
//TODO: sort
235224
let cmp_fn: fn(&ListFieldsEntryResponse, &ListFieldsEntryResponse) -> bool =
236-
|a, b| (&a.field_name, a.field_type) <= (&b.field_name, b.field_type);
225+
|a, b| field_order(a, b) == std::cmp::Ordering::Less;
237226

238227
let merged = iterators.kmerge_by(cmp_fn);
239228
Self {
@@ -396,6 +385,34 @@ pub struct IndexMetasForLeafSearch {
396385
pub index_uri: Uri,
397386
}
398387

388+
/// Builds a comparison function for sorting list fields entries.
389+
///
390+
/// When multiple sort orders are provided, they are applied in sequence (primary, secondary, etc.).
391+
/// A final tiebreaker on `(field_name, field_type)` is always appended to ensure deterministic
392+
/// ordering.
393+
fn sort_list_fields(fields: &mut [ListFieldsEntryResponse], sort_orders: &[i32]) {
394+
fields.sort_unstable_by(|left, right| {
395+
let mut ordering = std::cmp::Ordering::Equal;
396+
for sort_order in sort_orders {
397+
ordering = ordering.then_with(|| {
398+
match ListFieldSortOrder::try_from(*sort_order) {
399+
Ok(ListFieldSortOrder::Alphabetical) => left.field_name.cmp(&right.field_name),
400+
Ok(ListFieldSortOrder::Occurrence) => {
401+
// Descending: more occurrences first
402+
right.index_ids.len().cmp(&left.index_ids.len())
403+
}
404+
Ok(ListFieldSortOrder::Type) => left.field_type.cmp(&right.field_type),
405+
Err(_) => std::cmp::Ordering::Equal,
406+
}
407+
});
408+
}
409+
// Tiebreaker: deterministic ordering by (field_name, field_type)
410+
ordering
411+
.then_with(|| left.field_name.cmp(&right.field_name))
412+
.then_with(|| left.field_type.cmp(&right.field_type))
413+
});
414+
}
415+
399416
/// Performs a distributed list fields request.
400417
/// 1. Sends leaf requests over gRPC to multiple leaf nodes.
401418
/// 2. Merges the search results.
@@ -460,19 +477,15 @@ pub async fn root_list_fields(
460477
let fields_iter = leaf_list_fields_protos
461478
.into_iter()
462479
.map(|leaf_list_fields_proto| leaf_list_fields_proto.fields.into_iter());
463-
let mut fields_iter = ListFieldMerger::new(fields_iter);
464-
let skipped = fields_iter
465-
.by_ref()
466-
.take(list_fields_req.start_offset as usize)
467-
.count();
468-
469-
let fields = fields_iter
470-
.by_ref()
471-
.take(list_fields_req.max_fields as usize)
472-
.collect::<Vec<_>>();
480+
let mut all_fields: Vec<ListFieldsEntryResponse> =
481+
ListFieldMerger::new(fields_iter).collect();
482+
483+
sort_list_fields(&mut all_fields, &list_fields_req.sort_order);
473484

474-
let remaining = fields_iter.count();
475-
let num_fields = (skipped + fields.len() + remaining) as u64;
485+
let num_fields = all_fields.len() as u64;
486+
let start = list_fields_req.start_offset.min(num_fields) as usize;
487+
let end = (start as u64 + list_fields_req.max_fields).min(num_fields) as usize;
488+
let fields = all_fields[start..end].to_vec();
476489

477490
(fields, num_fields)
478491
})

quickwit/quickwit-serve/src/search_api/rest_handler.rs

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ use std::sync::Arc;
1717

1818
use percent_encoding::percent_decode_str;
1919
use quickwit_config::validate_index_id_pattern;
20-
use quickwit_proto::search::{CountHits, ListFieldsRequest, SortField, SortOrder};
20+
use quickwit_proto::search::{
21+
CountHits, ListFieldSortOrder, ListFieldsRequest, SortField, SortOrder,
22+
};
2123
use quickwit_query::query_ast::query_ast_from_user_text;
2224
use quickwit_search::{SearchError, SearchPlanResponseRest, SearchResponseRest, SearchService};
2325
use serde::{Deserialize, Deserializer, Serialize, Serializer};
@@ -44,6 +46,7 @@ use crate::{BodyFormat, with_arg};
4446
SearchResponseRest,
4547
SearchPlanResponseRest,
4648
ListFieldsQueryString,
49+
ListFieldSortOrderParam,
4750
SortBy,
4851
SortField,
4952
SortOrder,
@@ -145,6 +148,26 @@ fn default_max_hits() -> u64 {
145148
20
146149
}
147150

151+
/// Sort order for the list_fields endpoint.
152+
#[derive(Debug, Clone, Copy, Default, Eq, PartialEq, Serialize, Deserialize, utoipa::ToSchema)]
153+
#[serde(rename_all = "snake_case")]
154+
pub enum ListFieldSortOrderParam {
155+
#[default]
156+
Alphabetical,
157+
Occurrence,
158+
Type,
159+
}
160+
161+
impl From<ListFieldSortOrderParam> for ListFieldSortOrder {
162+
fn from(param: ListFieldSortOrderParam) -> Self {
163+
match param {
164+
ListFieldSortOrderParam::Alphabetical => ListFieldSortOrder::Alphabetical,
165+
ListFieldSortOrderParam::Occurrence => ListFieldSortOrder::Occurrence,
166+
ListFieldSortOrderParam::Type => ListFieldSortOrder::Type,
167+
}
168+
}
169+
}
170+
148171
/// This struct represents the QueryString passed to
149172
/// the rest API.
150173
#[derive(
@@ -275,13 +298,9 @@ pub struct ListFieldsQueryString {
275298
/// The output format.
276299
#[serde(default)]
277300
pub format: BodyFormat,
278-
/// Specifies how documents are sorted.
279-
#[serde(alias = "sort_by_field")]
280-
#[serde(deserialize_with = "sort_by_mini_dsl")]
301+
/// Specifies how fields are sorted. Possible values: "alphabetical", "occurrence", "type".
281302
#[serde(default)]
282-
#[serde(skip_serializing_if = "SortBy::is_empty")]
283-
#[param(value_type = String)]
284-
pub sort_by: SortBy,
303+
pub sort_order: ListFieldSortOrderParam,
285304
}
286305

287306
pub fn search_request_from_api_request(
@@ -422,7 +441,7 @@ async fn list_fields(
422441
},
423442
index_id_patterns,
424443
max_fields: request.max_fields,
425-
sort_fields: request.sort_by.sort_fields,
444+
sort_order: vec![ListFieldSortOrder::from(request.sort_order) as i32],
426445
start_offset: request.start_offset,
427446
};
428447
let result = search_service.root_list_fields(req).await;

0 commit comments

Comments
 (0)