Skip to content

Commit 1908ebe

Browse files
committed
feat: V2 named-argument syntax with NamedArgRewriter
Add NamedArgRewriter SqlShuttle that normalizes V2/PPL relevance syntax into MAP-based form before Calcite validation. Transforms positional and key=value arguments into MAP[paramName, value] pairs matching PPL's internal representation for uniform pushdown rules. Refactor UnifiedFunctionSpec to instance-based design with fluent builder and Category record for grouping. Use SqlUserDefinedFunction for consistency with PPL path. Add error tests and QueryErrorAssert to test base. Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 3ccc97a commit 1908ebe

8 files changed

Lines changed: 548 additions & 169 deletions

File tree

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.apache.calcite.schema.SchemaPlus;
2626
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2727
import org.apache.calcite.sql.parser.SqlParser;
28+
import org.apache.calcite.sql.util.SqlOperatorTables;
2829
import org.apache.calcite.tools.FrameworkConfig;
2930
import org.apache.calcite.tools.Frameworks;
3031
import org.apache.calcite.tools.Programs;
@@ -241,12 +242,13 @@ public List<?> getSettings() {
241242
private FrameworkConfig buildFrameworkConfig() {
242243
SchemaPlus rootSchema = CalciteSchema.createRootSchema(true, cacheMetadata).plus();
243244
catalogs.forEach(rootSchema::add);
244-
UnifiedFunctionSpec.registerAll(rootSchema);
245245

246246
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
247247
return Frameworks.newConfigBuilder()
248248
.parserConfig(buildParserConfig())
249-
.operatorTable(SqlStdOperatorTable.instance())
249+
.operatorTable(
250+
SqlOperatorTables.chain(
251+
SqlStdOperatorTable.instance(), UnifiedFunctionSpec.RELEVANCE.operatorTable()))
250252
.defaultSchema(defaultSchema)
251253
.traitDefs((List<RelTraitDef>) null)
252254
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))

api/src/main/java/org/opensearch/sql/api/UnifiedQueryPlanner.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.calcite.sql.SqlNode;
1818
import org.apache.calcite.tools.Frameworks;
1919
import org.apache.calcite.tools.Planner;
20+
import org.opensearch.sql.api.parser.NamedArgRewriter;
2021
import org.opensearch.sql.api.parser.UnifiedQueryParser;
2122
import org.opensearch.sql.ast.tree.UnresolvedPlan;
2223
import org.opensearch.sql.calcite.CalciteRelNodeVisitor;
@@ -81,7 +82,8 @@ private static class CalciteNativeStrategy implements PlanningStrategy {
8182
public RelNode plan(String query) throws Exception {
8283
try (Planner planner = Frameworks.getPlanner(context.getPlanContext().config)) {
8384
SqlNode parsed = planner.parse(query);
84-
SqlNode validated = planner.validate(parsed);
85+
SqlNode rewritten = parsed.accept(NamedArgRewriter.INSTANCE);
86+
SqlNode validated = planner.validate(rewritten);
8587
RelRoot relRoot = planner.rel(validated);
8688
return relRoot.project();
8789
}
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.parser;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import org.apache.calcite.sql.SqlBasicCall;
11+
import org.apache.calcite.sql.SqlCall;
12+
import org.apache.calcite.sql.SqlKind;
13+
import org.apache.calcite.sql.SqlLiteral;
14+
import org.apache.calcite.sql.SqlNode;
15+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
16+
import org.apache.calcite.sql.parser.SqlParserPos;
17+
import org.apache.calcite.sql.util.SqlShuttle;
18+
import org.checkerframework.checker.nullness.qual.Nullable;
19+
import org.opensearch.sql.api.spec.UnifiedFunctionSpec;
20+
21+
/**
22+
* Pre-validation rewriter that normalizes V2/PPL relevance function syntax into MAP-based form
23+
* matching PPL's internal representation. This ensures both SQL and PPL paths produce identical
24+
* query plans for pushdown rules.
25+
*
26+
* <p>Transforms: {@code match(name, 'John', operator='AND', boost=2.0)} into: {@code
27+
* match(MAP['field', name], MAP['query', 'John'], MAP['operator', 'AND'], MAP['boost', 2.0])}
28+
*
29+
* <p>Param names are resolved via {@link UnifiedFunctionSpec#getParamNames} from the operator's
30+
* {@link org.apache.calcite.sql.type.SqlOperandMetadata}. This runs before Calcite validation, when
31+
* operators are still unresolved.
32+
*/
33+
public final class NamedArgRewriter extends SqlShuttle {
34+
35+
public static final NamedArgRewriter INSTANCE = new NamedArgRewriter();
36+
37+
private NamedArgRewriter() {}
38+
39+
@Override
40+
public @Nullable SqlNode visit(SqlCall call) {
41+
SqlCall visited = (SqlCall) super.visit(call);
42+
if (visited == null) {
43+
return null;
44+
}
45+
UnifiedFunctionSpec spec = UnifiedFunctionSpec.of(visited.getOperator().getName());
46+
if (spec == null) {
47+
return visited;
48+
}
49+
return rewriteToMaps(visited, spec.getParamNames());
50+
}
51+
52+
/**
53+
* Wraps positional args in MAP[paramName, value] and flattens {@code key=value} comparisons into
54+
* MAP[key, value]. Produces the same MAP-based form as PPL's internal representation.
55+
*/
56+
private static SqlNode rewriteToMaps(SqlCall call, List<String> paramNames) {
57+
List<SqlNode> operands = call.getOperandList();
58+
List<SqlNode> result = new ArrayList<>();
59+
60+
for (int i = 0; i < operands.size(); i++) {
61+
SqlNode operand = operands.get(i);
62+
if (operand != null && operand.getKind() == SqlKind.EQUALS) {
63+
// Optional param: operator='AND' → MAP['operator', 'AND']
64+
SqlCall eq = (SqlCall) operand;
65+
result.add(toMap(eq.operand(0).toString(), eq.operand(1)));
66+
} else if (i < paramNames.size()) {
67+
// Required positional param: name → MAP['field', name]
68+
result.add(toMap(paramNames.get(i), operand));
69+
} else {
70+
result.add(operand);
71+
}
72+
}
73+
return new SqlBasicCall(
74+
call.getOperator(), result.toArray(SqlNode[]::new), call.getParserPosition());
75+
}
76+
77+
private static SqlNode toMap(String key, SqlNode value) {
78+
return new SqlBasicCall(
79+
SqlStdOperatorTable.MAP_VALUE_CONSTRUCTOR,
80+
new SqlNode[] {SqlLiteral.createCharString(key, SqlParserPos.ZERO), value},
81+
SqlParserPos.ZERO);
82+
}
83+
}

api/src/main/java/org/opensearch/sql/api/spec/UnifiedFunctionSpec.java

Lines changed: 103 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -5,129 +5,144 @@
55

66
package org.opensearch.sql.api.spec;
77

8+
import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN;
9+
810
import java.util.List;
911
import java.util.Map;
10-
import java.util.Set;
11-
import java.util.stream.IntStream;
12+
import java.util.Objects;
13+
import java.util.stream.Collectors;
14+
import java.util.stream.Stream;
15+
import lombok.AccessLevel;
16+
import lombok.Getter;
17+
import lombok.RequiredArgsConstructor;
1218
import org.apache.calcite.rel.type.RelDataType;
1319
import org.apache.calcite.rel.type.RelDataTypeFactory;
14-
import org.apache.calcite.schema.FunctionParameter;
15-
import org.apache.calcite.schema.ScalarFunction;
16-
import org.apache.calcite.schema.SchemaPlus;
17-
import org.apache.calcite.sql.type.SqlTypeName;
18-
import org.checkerframework.checker.nullness.qual.Nullable;
20+
import org.apache.calcite.sql.SqlCallBinding;
21+
import org.apache.calcite.sql.SqlIdentifier;
22+
import org.apache.calcite.sql.SqlKind;
23+
import org.apache.calcite.sql.SqlOperandCountRange;
24+
import org.apache.calcite.sql.SqlOperator;
25+
import org.apache.calcite.sql.SqlOperatorTable;
26+
import org.apache.calcite.sql.parser.SqlParserPos;
27+
import org.apache.calcite.sql.type.InferTypes;
28+
import org.apache.calcite.sql.type.SqlOperandCountRanges;
29+
import org.apache.calcite.sql.type.SqlOperandMetadata;
30+
import org.apache.calcite.sql.type.SqlReturnTypeInference;
31+
import org.apache.calcite.sql.util.SqlOperatorTables;
32+
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
1933

2034
/**
21-
* Central registry of language-specified function signatures (Unified Language Specification
22-
* layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required
23-
* parameters of type {@link SqlTypeName#ANY}.
24-
*
25-
* <p>This class defines <em>what functions exist</em> and their signatures. Function
26-
* <em>implementations</em> live in the Unified Execution Runtime (UER) layer — see {@link
27-
* org.opensearch.sql.api.function.UnifiedFunction} and {@link
28-
* org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions
29-
* (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER.
30-
*
31-
* <p>Named parameters enable SQL named-argument syntax ({@code match(field => col, query =>
32-
* 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no
33-
* optional params), <a href="https://issues.apache.org/jira/browse/CALCITE-5366">CALCITE-5366</a>
34-
* is avoided entirely.
35-
*
36-
* <p>Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)},
37-
* following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives
38-
* available regardless of catalog. Pushdown rules enforce data-source capability at optimization
39-
* time.
40-
*
41-
* @see org.opensearch.sql.api.function.UnifiedFunction
42-
* @see org.opensearch.sql.api.function.UnifiedFunctionRepository
35+
* Unified function specification. Each spec carries its name, param names, and Calcite {@link
36+
* SqlOperator}, built via a fluent builder.
4337
*/
44-
// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here,
45-
// rather than independently discovering from PPLBuiltinOperators. The spec is the source of
46-
// truth for what functions exist; UER provides how they execute. Decide whether to late-bind
47-
// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent
48-
// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only)
49-
// are registered here.
38+
@Getter
39+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
5040
public final class UnifiedFunctionSpec {
5141

52-
private UnifiedFunctionSpec() {}
53-
54-
/** Single-field relevance function params: (field, query). */
55-
private static final List<String> SINGLE_FIELD_PARAMS = List.of("field", "query");
42+
/** Function name as registered in the operator table (e.g., "match", "multi_match"). */
43+
private final String funcName;
5644

57-
/** Multi-field relevance function params: (fields, query). */
58-
private static final List<String> MULTI_FIELD_PARAMS = List.of("fields", "query");
45+
/**
46+
* Required param names used by {@link org.opensearch.sql.api.parser.NamedArgRewriter} to
47+
* normalize V2 syntax into MAP form.
48+
*/
49+
private final List<String> paramNames;
5950

60-
private static final Map<String, ScalarFunction> REGISTRY =
61-
Map.of(
62-
"match", scalarFunction(SINGLE_FIELD_PARAMS),
63-
"match_phrase", scalarFunction(SINGLE_FIELD_PARAMS),
64-
"match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS),
65-
"match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS),
66-
"multi_match", scalarFunction(MULTI_FIELD_PARAMS),
67-
"simple_query_string", scalarFunction(MULTI_FIELD_PARAMS),
68-
"query_string", scalarFunction(MULTI_FIELD_PARAMS));
51+
/** Calcite operator for chaining into the framework config's operator table. */
52+
private final SqlOperator operator;
6953

70-
/** Registers all language-specified functions on the given schema (typically root). */
71-
public static void registerAll(SchemaPlus schema) {
72-
REGISTRY.forEach(schema::add);
54+
/** A group of function specs that can be chained into Calcite's operator table. */
55+
public record Category(List<UnifiedFunctionSpec> specs) {
56+
public SqlOperatorTable operatorTable() {
57+
return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList());
58+
}
7359
}
7460

75-
/** Returns the canonical ScalarFunction for a language-specified function, or null. */
76-
public static @Nullable ScalarFunction get(String name) {
77-
return REGISTRY.get(name);
61+
/** Full-text search functions. */
62+
public static final Category RELEVANCE =
63+
new Category(
64+
List.of(
65+
function("match").vararg("field", "query").returnType(BOOLEAN).build(),
66+
function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(),
67+
function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
68+
function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(),
69+
function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(),
70+
function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(),
71+
function("query_string").vararg("fields", "query").returnType(BOOLEAN).build()));
72+
73+
/** All registered function specs, keyed by function name. */
74+
private static final Map<String, UnifiedFunctionSpec> ALL_SPECS =
75+
Stream.of(RELEVANCE)
76+
.flatMap(c -> c.specs().stream())
77+
.collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s));
78+
79+
/** Looks up a function spec by name across all categories. */
80+
public static UnifiedFunctionSpec of(String name) {
81+
return ALL_SPECS.get(name.toLowerCase());
7882
}
7983

80-
/** Returns true if the name is a language-specified function. */
81-
public static boolean isLanguageFunction(String name) {
82-
return REGISTRY.containsKey(name);
84+
public static Builder function(String name) {
85+
return new Builder(name);
8386
}
8487

85-
/** All registered language function names. */
86-
public static Set<String> names() {
87-
return REGISTRY.keySet();
88-
}
88+
/** Fluent builder for function specs. */
89+
@RequiredArgsConstructor(access = AccessLevel.PRIVATE)
90+
public static class Builder {
91+
private final String funcName;
92+
private List<String> paramNames = List.of();
93+
private SqlReturnTypeInference returnType;
8994

90-
private static ScalarFunction scalarFunction(List<String> paramNames) {
91-
List<FunctionParameter> params =
92-
IntStream.range(0, paramNames.size())
93-
.mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i)))
94-
.toList();
95-
return new BooleanScalarFunction(params);
96-
}
95+
public Builder vararg(String... names) {
96+
this.paramNames = List.of(names);
97+
return this;
98+
}
9799

98-
/** A ScalarFunction that returns BOOLEAN with the given parameters. */
99-
private record BooleanScalarFunction(List<FunctionParameter> params) implements ScalarFunction {
100-
@Override
101-
public List<FunctionParameter> getParameters() {
102-
return params;
100+
public Builder returnType(SqlReturnTypeInference type) {
101+
this.returnType = type;
102+
return this;
103103
}
104104

105-
@Override
106-
public RelDataType getReturnType(RelDataTypeFactory typeFactory) {
107-
return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
105+
public UnifiedFunctionSpec build() {
106+
Objects.requireNonNull(returnType, "returnType is required");
107+
return new UnifiedFunctionSpec(
108+
funcName,
109+
paramNames,
110+
new SqlUserDefinedFunction(
111+
new SqlIdentifier(funcName, SqlParserPos.ZERO),
112+
SqlKind.OTHER_FUNCTION,
113+
returnType,
114+
InferTypes.ANY_NULLABLE,
115+
new VariadicOperandMetadata(paramNames),
116+
List::of)); // Pushdown-only: no local implementation
108117
}
109118
}
110119

111-
/** A required function parameter of type ANY. */
112-
private record AnyParam(int ordinal, String name) implements FunctionParameter {
120+
/** Accepts required params + optional trailing params. Carries param names for rewriting. */
121+
private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata {
122+
123+
@Override
124+
public List<String> paramNames() {
125+
return paramNames;
126+
}
127+
113128
@Override
114-
public int getOrdinal() {
115-
return ordinal;
129+
public List<RelDataType> paramTypes(RelDataTypeFactory tf) {
130+
return List.of();
116131
}
117132

118133
@Override
119-
public String getName() {
120-
return name;
134+
public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) {
135+
return true; // Bypass: CALCITE-5366 breaks variadic type checking
121136
}
122137

123138
@Override
124-
public boolean isOptional() {
125-
return false;
139+
public SqlOperandCountRange getOperandCountRange() {
140+
return SqlOperandCountRanges.from(paramNames.size());
126141
}
127142

128143
@Override
129-
public RelDataType getType(RelDataTypeFactory typeFactory) {
130-
return typeFactory.createSqlType(SqlTypeName.ANY);
144+
public String getAllowedSignatures(SqlOperator op, String opName) {
145+
return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])";
131146
}
132147
}
133148
}

0 commit comments

Comments
 (0)