|
5 | 5 |
|
6 | 6 | package org.opensearch.sql.api.spec; |
7 | 7 |
|
| 8 | +import static org.apache.calcite.sql.type.ReturnTypes.BOOLEAN; |
| 9 | + |
8 | 10 | import java.util.List; |
9 | 11 | import java.util.Map; |
10 | | -import java.util.Set; |
11 | | -import java.util.stream.IntStream; |
| 12 | +import java.util.Objects; |
| 13 | +import java.util.stream.Collectors; |
| 14 | +import java.util.stream.Stream; |
| 15 | +import lombok.AccessLevel; |
| 16 | +import lombok.Getter; |
| 17 | +import lombok.RequiredArgsConstructor; |
12 | 18 | import org.apache.calcite.rel.type.RelDataType; |
13 | 19 | import org.apache.calcite.rel.type.RelDataTypeFactory; |
14 | | -import org.apache.calcite.schema.FunctionParameter; |
15 | | -import org.apache.calcite.schema.ScalarFunction; |
16 | | -import org.apache.calcite.schema.SchemaPlus; |
17 | | -import org.apache.calcite.sql.type.SqlTypeName; |
18 | | -import org.checkerframework.checker.nullness.qual.Nullable; |
| 20 | +import org.apache.calcite.sql.SqlCallBinding; |
| 21 | +import org.apache.calcite.sql.SqlIdentifier; |
| 22 | +import org.apache.calcite.sql.SqlKind; |
| 23 | +import org.apache.calcite.sql.SqlOperandCountRange; |
| 24 | +import org.apache.calcite.sql.SqlOperator; |
| 25 | +import org.apache.calcite.sql.SqlOperatorTable; |
| 26 | +import org.apache.calcite.sql.parser.SqlParserPos; |
| 27 | +import org.apache.calcite.sql.type.InferTypes; |
| 28 | +import org.apache.calcite.sql.type.SqlOperandCountRanges; |
| 29 | +import org.apache.calcite.sql.type.SqlOperandMetadata; |
| 30 | +import org.apache.calcite.sql.type.SqlReturnTypeInference; |
| 31 | +import org.apache.calcite.sql.util.SqlOperatorTables; |
| 32 | +import org.apache.calcite.sql.validate.SqlUserDefinedFunction; |
19 | 33 |
|
20 | 34 | /** |
21 | | - * Central registry of language-specified function signatures (Unified Language Specification |
22 | | - * layer). Each entry maps a function name to a canonical {@link ScalarFunction} with named required |
23 | | - * parameters of type {@link SqlTypeName#ANY}. |
24 | | - * |
25 | | - * <p>This class defines <em>what functions exist</em> and their signatures. Function |
26 | | - * <em>implementations</em> live in the Unified Execution Runtime (UER) layer — see {@link |
27 | | - * org.opensearch.sql.api.function.UnifiedFunction} and {@link |
28 | | - * org.opensearch.sql.api.function.UnifiedFunctionRepository}. For data-source-specific functions |
29 | | - * (e.g., relevance search), execution is handled by adapter pushdown rules rather than UER. |
30 | | - * |
31 | | - * <p>Named parameters enable SQL named-argument syntax ({@code match(field => col, query => |
32 | | - * 'text')}) via Calcite's {@code ARGUMENT_ASSIGNMENT} operator. With fixed required parameters (no |
33 | | - * optional params), <a href="https://issues.apache.org/jira/browse/CALCITE-5366">CALCITE-5366</a> |
34 | | - * is avoided entirely. |
35 | | - * |
36 | | - * <p>Functions are registered globally on the root schema via {@link #registerAll(SchemaPlus)}, |
37 | | - * following the same pattern as Flink's {@code FlinkSqlOperatorTable} — engine-level primitives |
38 | | - * available regardless of catalog. Pushdown rules enforce data-source capability at optimization |
39 | | - * time. |
40 | | - * |
41 | | - * @see org.opensearch.sql.api.function.UnifiedFunction |
42 | | - * @see org.opensearch.sql.api.function.UnifiedFunctionRepository |
| 35 | + * Unified function specification. Each spec carries its name, param names, and Calcite {@link |
| 36 | + * SqlOperator}, built via a fluent builder. |
43 | 37 | */ |
44 | | -// TODO: UnifiedFunctionRepository should resolve implementations for functions defined here, |
45 | | -// rather than independently discovering from PPLBuiltinOperators. The spec is the source of |
46 | | -// truth for what functions exist; UER provides how they execute. Decide whether to late-bind |
47 | | -// UER implementations (ImplementableFunction) to spec-defined signatures for engine-independent |
48 | | -// functions (e.g., upper, lower). Currently only data-source-specific functions (pushdown-only) |
49 | | -// are registered here. |
| 38 | +@Getter |
| 39 | +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) |
50 | 40 | public final class UnifiedFunctionSpec { |
51 | 41 |
|
52 | | - private UnifiedFunctionSpec() {} |
53 | | - |
54 | | - /** Single-field relevance function params: (field, query). */ |
55 | | - private static final List<String> SINGLE_FIELD_PARAMS = List.of("field", "query"); |
| 42 | + /** Function name as registered in the operator table (e.g., "match", "multi_match"). */ |
| 43 | + private final String funcName; |
56 | 44 |
|
57 | | - /** Multi-field relevance function params: (fields, query). */ |
58 | | - private static final List<String> MULTI_FIELD_PARAMS = List.of("fields", "query"); |
| 45 | + /** |
| 46 | + * Required param names used by {@link org.opensearch.sql.api.parser.NamedArgRewriter} to |
| 47 | + * normalize V2 syntax into MAP form. |
| 48 | + */ |
| 49 | + private final List<String> paramNames; |
59 | 50 |
|
60 | | - private static final Map<String, ScalarFunction> REGISTRY = |
61 | | - Map.of( |
62 | | - "match", scalarFunction(SINGLE_FIELD_PARAMS), |
63 | | - "match_phrase", scalarFunction(SINGLE_FIELD_PARAMS), |
64 | | - "match_bool_prefix", scalarFunction(SINGLE_FIELD_PARAMS), |
65 | | - "match_phrase_prefix", scalarFunction(SINGLE_FIELD_PARAMS), |
66 | | - "multi_match", scalarFunction(MULTI_FIELD_PARAMS), |
67 | | - "simple_query_string", scalarFunction(MULTI_FIELD_PARAMS), |
68 | | - "query_string", scalarFunction(MULTI_FIELD_PARAMS)); |
| 51 | + /** Calcite operator for chaining into the framework config's operator table. */ |
| 52 | + private final SqlOperator operator; |
69 | 53 |
|
70 | | - /** Registers all language-specified functions on the given schema (typically root). */ |
71 | | - public static void registerAll(SchemaPlus schema) { |
72 | | - REGISTRY.forEach(schema::add); |
| 54 | + /** A group of function specs that can be chained into Calcite's operator table. */ |
| 55 | + public record Category(List<UnifiedFunctionSpec> specs) { |
| 56 | + public SqlOperatorTable operatorTable() { |
| 57 | + return SqlOperatorTables.of(specs.stream().map(UnifiedFunctionSpec::getOperator).toList()); |
| 58 | + } |
73 | 59 | } |
74 | 60 |
|
75 | | - /** Returns the canonical ScalarFunction for a language-specified function, or null. */ |
76 | | - public static @Nullable ScalarFunction get(String name) { |
77 | | - return REGISTRY.get(name); |
| 61 | + /** Full-text search functions. */ |
| 62 | + public static final Category RELEVANCE = |
| 63 | + new Category( |
| 64 | + List.of( |
| 65 | + function("match").vararg("field", "query").returnType(BOOLEAN).build(), |
| 66 | + function("match_phrase").vararg("field", "query").returnType(BOOLEAN).build(), |
| 67 | + function("match_bool_prefix").vararg("field", "query").returnType(BOOLEAN).build(), |
| 68 | + function("match_phrase_prefix").vararg("field", "query").returnType(BOOLEAN).build(), |
| 69 | + function("multi_match").vararg("fields", "query").returnType(BOOLEAN).build(), |
| 70 | + function("simple_query_string").vararg("fields", "query").returnType(BOOLEAN).build(), |
| 71 | + function("query_string").vararg("fields", "query").returnType(BOOLEAN).build())); |
| 72 | + |
| 73 | + /** All registered function specs, keyed by function name. */ |
| 74 | + private static final Map<String, UnifiedFunctionSpec> ALL_SPECS = |
| 75 | + Stream.of(RELEVANCE) |
| 76 | + .flatMap(c -> c.specs().stream()) |
| 77 | + .collect(Collectors.toMap(UnifiedFunctionSpec::getFuncName, s -> s)); |
| 78 | + |
| 79 | + /** Looks up a function spec by name across all categories. */ |
| 80 | + public static UnifiedFunctionSpec of(String name) { |
| 81 | + return ALL_SPECS.get(name.toLowerCase()); |
78 | 82 | } |
79 | 83 |
|
80 | | - /** Returns true if the name is a language-specified function. */ |
81 | | - public static boolean isLanguageFunction(String name) { |
82 | | - return REGISTRY.containsKey(name); |
| 84 | + public static Builder function(String name) { |
| 85 | + return new Builder(name); |
83 | 86 | } |
84 | 87 |
|
85 | | - /** All registered language function names. */ |
86 | | - public static Set<String> names() { |
87 | | - return REGISTRY.keySet(); |
88 | | - } |
| 88 | + /** Fluent builder for function specs. */ |
| 89 | + @RequiredArgsConstructor(access = AccessLevel.PRIVATE) |
| 90 | + public static class Builder { |
| 91 | + private final String funcName; |
| 92 | + private List<String> paramNames = List.of(); |
| 93 | + private SqlReturnTypeInference returnType; |
89 | 94 |
|
90 | | - private static ScalarFunction scalarFunction(List<String> paramNames) { |
91 | | - List<FunctionParameter> params = |
92 | | - IntStream.range(0, paramNames.size()) |
93 | | - .mapToObj(i -> (FunctionParameter) new AnyParam(i, paramNames.get(i))) |
94 | | - .toList(); |
95 | | - return new BooleanScalarFunction(params); |
96 | | - } |
| 95 | + public Builder vararg(String... names) { |
| 96 | + this.paramNames = List.of(names); |
| 97 | + return this; |
| 98 | + } |
97 | 99 |
|
98 | | - /** A ScalarFunction that returns BOOLEAN with the given parameters. */ |
99 | | - private record BooleanScalarFunction(List<FunctionParameter> params) implements ScalarFunction { |
100 | | - @Override |
101 | | - public List<FunctionParameter> getParameters() { |
102 | | - return params; |
| 100 | + public Builder returnType(SqlReturnTypeInference type) { |
| 101 | + this.returnType = type; |
| 102 | + return this; |
103 | 103 | } |
104 | 104 |
|
105 | | - @Override |
106 | | - public RelDataType getReturnType(RelDataTypeFactory typeFactory) { |
107 | | - return typeFactory.createSqlType(SqlTypeName.BOOLEAN); |
| 105 | + public UnifiedFunctionSpec build() { |
| 106 | + Objects.requireNonNull(returnType, "returnType is required"); |
| 107 | + return new UnifiedFunctionSpec( |
| 108 | + funcName, |
| 109 | + paramNames, |
| 110 | + new SqlUserDefinedFunction( |
| 111 | + new SqlIdentifier(funcName, SqlParserPos.ZERO), |
| 112 | + SqlKind.OTHER_FUNCTION, |
| 113 | + returnType, |
| 114 | + InferTypes.ANY_NULLABLE, |
| 115 | + new VariadicOperandMetadata(paramNames), |
| 116 | + List::of)); // Pushdown-only: no local implementation |
108 | 117 | } |
109 | 118 | } |
110 | 119 |
|
111 | | - /** A required function parameter of type ANY. */ |
112 | | - private record AnyParam(int ordinal, String name) implements FunctionParameter { |
| 120 | + /** Accepts required params + optional trailing params. Carries param names for rewriting. */ |
| 121 | + private record VariadicOperandMetadata(List<String> paramNames) implements SqlOperandMetadata { |
| 122 | + |
| 123 | + @Override |
| 124 | + public List<String> paramNames() { |
| 125 | + return paramNames; |
| 126 | + } |
| 127 | + |
113 | 128 | @Override |
114 | | - public int getOrdinal() { |
115 | | - return ordinal; |
| 129 | + public List<RelDataType> paramTypes(RelDataTypeFactory tf) { |
| 130 | + return List.of(); |
116 | 131 | } |
117 | 132 |
|
118 | 133 | @Override |
119 | | - public String getName() { |
120 | | - return name; |
| 134 | + public boolean checkOperandTypes(SqlCallBinding binding, boolean throwOnFailure) { |
| 135 | + return true; // Bypass: CALCITE-5366 breaks variadic type checking |
121 | 136 | } |
122 | 137 |
|
123 | 138 | @Override |
124 | | - public boolean isOptional() { |
125 | | - return false; |
| 139 | + public SqlOperandCountRange getOperandCountRange() { |
| 140 | + return SqlOperandCountRanges.from(paramNames.size()); |
126 | 141 | } |
127 | 142 |
|
128 | 143 | @Override |
129 | | - public RelDataType getType(RelDataTypeFactory typeFactory) { |
130 | | - return typeFactory.createSqlType(SqlTypeName.ANY); |
| 144 | + public String getAllowedSignatures(SqlOperator op, String opName) { |
| 145 | + return opName + "(" + String.join(", ", paramNames) + "[, option=value ...])"; |
131 | 146 | } |
132 | 147 | } |
133 | 148 | } |
0 commit comments