Skip to content

Commit 5f80dfc

Browse files
committed
feat: Register relevance search functions in unified query SQL path
Add lightweight SQL operators and a convertlet table that enables relevance search functions (match, match_phrase, match_bool_prefix, match_phrase_prefix, multi_match, simple_query_string, query_string) in the unified query SQL planning path. The SQL path uses MAP literal syntax for parameters: SELECT * FROM t WHERE match(MAP['field', name], MAP['query', 'text']) Implementation: - SqlExtensionFunctions: lightweight operators for SQL parsing and validation, chained with SqlStdOperatorTable via FrameworkConfig - RelevanceSearchConvertletTable: swaps lightweight operators with PPL operators (PPLBuiltinOperators) during SqlToRelConverter, bypassing the PPL type checker entirely - UnifiedQueryContext: wires operator table and convertlet table The convertlet approach avoids touching core/ module. The PPL path continues to resolve functions via PPLFuncImpTable independently. Natural syntax with named parameters (field => name, query => 'text') is blocked by CALCITE-6245: checkRollUp incorrectly resolves parameter name identifiers as column references for scalar functions in WHERE. Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent 8a7524c commit 5f80dfc

5 files changed

Lines changed: 375 additions & 0 deletions

File tree

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,14 @@
2323
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
2424
import org.apache.calcite.schema.Schema;
2525
import org.apache.calcite.schema.SchemaPlus;
26+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2627
import org.apache.calcite.sql.parser.SqlParser;
28+
import org.apache.calcite.sql.util.SqlOperatorTables;
2729
import org.apache.calcite.tools.FrameworkConfig;
2830
import org.apache.calcite.tools.Frameworks;
2931
import org.apache.calcite.tools.Programs;
32+
import org.opensearch.sql.api.function.RelevanceSearchConvertletTable;
33+
import org.opensearch.sql.api.function.SqlExtensionFunctions;
3034
import org.opensearch.sql.api.parser.CalciteSqlQueryParser;
3135
import org.opensearch.sql.api.parser.PPLQueryParser;
3236
import org.opensearch.sql.api.parser.UnifiedQueryParser;
@@ -243,6 +247,10 @@ private FrameworkConfig buildFrameworkConfig() {
243247
SchemaPlus defaultSchema = findSchemaByPath(rootSchema, defaultNamespace);
244248
return Frameworks.newConfigBuilder()
245249
.parserConfig(buildParserConfig())
250+
.convertletTable(new RelevanceSearchConvertletTable())
251+
.operatorTable(
252+
SqlOperatorTables.chain(
253+
SqlStdOperatorTable.instance(), SqlExtensionFunctions.OPERATOR_TABLE))
246254
.defaultSchema(defaultSchema)
247255
.traitDefs((List<RelTraitDef>) null)
248256
.programs(Programs.calc(DefaultRelMetadataProvider.INSTANCE))
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.function;
7+
8+
import java.util.ArrayList;
9+
import java.util.List;
10+
import org.apache.calcite.rex.RexBuilder;
11+
import org.apache.calcite.rex.RexNode;
12+
import org.apache.calcite.sql.SqlCall;
13+
import org.apache.calcite.sql.SqlOperator;
14+
import org.apache.calcite.sql.type.SqlTypeName;
15+
import org.apache.calcite.sql2rel.ReflectiveConvertletTable;
16+
import org.apache.calcite.sql2rel.SqlRexConvertlet;
17+
import org.apache.calcite.sql2rel.StandardConvertletTable;
18+
import org.opensearch.sql.expression.function.PPLBuiltinOperators;
19+
20+
/**
21+
* Convertlet table that intercepts relevance search function calls during SQL-to-RelNode conversion
22+
* and rewrites them as {@link org.apache.calcite.rex.RexCall} nodes using the PPL operators from
23+
* {@link PPLBuiltinOperators}.
24+
*
25+
* <p>This allows the SQL path to use lightweight {@link SqlExtensionFunctions} operators for
26+
* parsing and validation, while producing RexCall nodes with PPL operators that {@link
27+
* org.opensearch.sql.opensearch.request.PredicateAnalyzer} recognizes for pushdown — without
28+
* touching the PPL type checker in core/.
29+
*/
30+
public class RelevanceSearchConvertletTable extends ReflectiveConvertletTable {
31+
32+
private final StandardConvertletTable standard = StandardConvertletTable.INSTANCE;
33+
34+
public RelevanceSearchConvertletTable() {
35+
registerOp(SqlExtensionFunctions.MATCH, swapOperator(PPLBuiltinOperators.MATCH));
36+
registerOp(SqlExtensionFunctions.MATCH_PHRASE, swapOperator(PPLBuiltinOperators.MATCH_PHRASE));
37+
registerOp(
38+
SqlExtensionFunctions.MATCH_BOOL_PREFIX,
39+
swapOperator(PPLBuiltinOperators.MATCH_BOOL_PREFIX));
40+
registerOp(
41+
SqlExtensionFunctions.MATCH_PHRASE_PREFIX,
42+
swapOperator(PPLBuiltinOperators.MATCH_PHRASE_PREFIX));
43+
registerOp(SqlExtensionFunctions.MULTI_MATCH, swapOperator(PPLBuiltinOperators.MULTI_MATCH));
44+
registerOp(
45+
SqlExtensionFunctions.SIMPLE_QUERY_STRING,
46+
swapOperator(PPLBuiltinOperators.SIMPLE_QUERY_STRING));
47+
registerOp(SqlExtensionFunctions.QUERY_STRING, swapOperator(PPLBuiltinOperators.QUERY_STRING));
48+
}
49+
50+
@Override
51+
public SqlRexConvertlet get(SqlCall call) {
52+
SqlRexConvertlet convertlet = super.get(call);
53+
return convertlet != null ? convertlet : standard.get(call);
54+
}
55+
56+
/**
57+
* Creates a convertlet that converts operands using standard conversion, then wraps them in a
58+
* {@link org.apache.calcite.rex.RexCall} with the given PPL operator. This swaps the lightweight
59+
* SQL operator (used for validation) with the PPL operator (used for pushdown), bypassing the PPL
60+
* type checker entirely since {@link RexBuilder#makeCall} with an explicit return type skips
61+
* operand type checking.
62+
*/
63+
private static SqlRexConvertlet swapOperator(SqlOperator pplOperator) {
64+
return (cx, call) -> {
65+
RexBuilder rexBuilder = cx.getRexBuilder();
66+
List<RexNode> operands = new ArrayList<>();
67+
for (int i = 0; i < call.operandCount(); i++) {
68+
operands.add(cx.convertExpression(call.operand(i)));
69+
}
70+
return rexBuilder.makeCall(
71+
rexBuilder.getTypeFactory().createSqlType(SqlTypeName.BOOLEAN), pplOperator, operands);
72+
};
73+
}
74+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api.function;
7+
8+
import org.apache.calcite.sql.SqlFunction;
9+
import org.apache.calcite.sql.SqlFunctionCategory;
10+
import org.apache.calcite.sql.SqlKind;
11+
import org.apache.calcite.sql.SqlOperatorTable;
12+
import org.apache.calcite.sql.type.OperandTypes;
13+
import org.apache.calcite.sql.type.ReturnTypes;
14+
import org.apache.calcite.sql.type.SqlOperandCountRanges;
15+
import org.apache.calcite.sql.util.SqlOperatorTables;
16+
17+
/**
18+
* Extension operator table for non-standard functions in the unified SQL planning path. Chained
19+
* with {@link org.apache.calcite.sql.fun.SqlStdOperatorTable} via {@link SqlOperatorTables#chain}
20+
* in {@link org.apache.calcite.tools.FrameworkConfig} — the standard Calcite mechanism for
21+
* extending SQL with custom functions.
22+
*
23+
* <p>These are lightweight logical operators for SQL parsing and validation only. The {@link
24+
* RelevanceSearchConvertletTable} swaps them with the PPL operators from {@link
25+
* org.opensearch.sql.expression.function.PPLBuiltinOperators} during SQL-to-RelNode conversion,
26+
* bypassing the PPL type checker entirely.
27+
*/
28+
public class SqlExtensionFunctions {
29+
30+
private SqlExtensionFunctions() {}
31+
32+
// -- Single-field relevance functions: func(MAP['field', col], MAP['query', 'text'], ...) --
33+
34+
public static final SqlFunction MATCH = relevanceFunction("match", 2);
35+
public static final SqlFunction MATCH_PHRASE = relevanceFunction("match_phrase", 2);
36+
public static final SqlFunction MATCH_BOOL_PREFIX = relevanceFunction("match_bool_prefix", 2);
37+
public static final SqlFunction MATCH_PHRASE_PREFIX = relevanceFunction("match_phrase_prefix", 2);
38+
39+
// -- Multi-field relevance functions: func(MAP['query', 'text'], ...) --
40+
41+
public static final SqlFunction MULTI_MATCH = relevanceFunction("multi_match", 1);
42+
public static final SqlFunction SIMPLE_QUERY_STRING = relevanceFunction("simple_query_string", 1);
43+
public static final SqlFunction QUERY_STRING = relevanceFunction("query_string", 1);
44+
45+
/** All extension functions available to the unified SQL planner. */
46+
public static final SqlOperatorTable OPERATOR_TABLE =
47+
SqlOperatorTables.of(
48+
MATCH,
49+
MATCH_PHRASE,
50+
MATCH_BOOL_PREFIX,
51+
MATCH_PHRASE_PREFIX,
52+
MULTI_MATCH,
53+
SIMPLE_QUERY_STRING,
54+
QUERY_STRING);
55+
56+
/**
57+
* Creates a relevance search function that accepts MAP-encoded arguments. Validates only the
58+
* minimum operand count — actual type checking is deferred to pushdown.
59+
*/
60+
private static SqlFunction relevanceFunction(String name, int minArgs) {
61+
return new SqlFunction(
62+
name,
63+
SqlKind.OTHER_FUNCTION,
64+
ReturnTypes.BOOLEAN,
65+
null,
66+
OperandTypes.variadic(SqlOperandCountRanges.from(minArgs)),
67+
SqlFunctionCategory.USER_DEFINED_FUNCTION);
68+
}
69+
}
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api;
7+
8+
import org.junit.Test;
9+
import org.opensearch.sql.executor.QueryType;
10+
11+
/** Tests for relevance search functions in SQL planning path. */
12+
public class UnifiedRelevanceSearchSqlTest extends UnifiedQueryTestBase {
13+
14+
@Override
15+
protected QueryType queryType() {
16+
return QueryType.SQL;
17+
}
18+
19+
@Test
20+
public void testMatch() {
21+
// 'match' is a reserved SQL keyword (MATCH_RECOGNIZE), so it must be quoted
22+
givenQuery(
23+
"""
24+
SELECT * FROM catalog.employees
25+
WHERE "match"(MAP['field', name], MAP['query', 'John'])\
26+
""")
27+
.assertPlan(
28+
"""
29+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
30+
LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John'))])
31+
LogicalTableScan(table=[[catalog, employees]])
32+
""");
33+
}
34+
35+
@Test
36+
public void testMatchPhrase() {
37+
givenQuery(
38+
"""
39+
SELECT * FROM catalog.employees
40+
WHERE match_phrase(MAP['field', name], MAP['query', 'John Doe'])\
41+
""")
42+
.assertPlan(
43+
"""
44+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
45+
LogicalFilter(condition=[match_phrase(MAP('field', $1), MAP('query', 'John Doe'))])
46+
LogicalTableScan(table=[[catalog, employees]])
47+
""");
48+
}
49+
50+
@Test
51+
public void testMatchBoolPrefix() {
52+
givenQuery(
53+
"""
54+
SELECT * FROM catalog.employees
55+
WHERE match_bool_prefix(MAP['field', name], MAP['query', 'John'])\
56+
""")
57+
.assertPlan(
58+
"""
59+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
60+
LogicalFilter(condition=[match_bool_prefix(MAP('field', $1), MAP('query', 'John'))])
61+
LogicalTableScan(table=[[catalog, employees]])
62+
""");
63+
}
64+
65+
@Test
66+
public void testMatchPhrasePrefix() {
67+
givenQuery(
68+
"""
69+
SELECT * FROM catalog.employees
70+
WHERE match_phrase_prefix(MAP['field', name], MAP['query', 'John'])\
71+
""")
72+
.assertPlan(
73+
"""
74+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
75+
LogicalFilter(condition=[match_phrase_prefix(MAP('field', $1), MAP('query', 'John'))])
76+
LogicalTableScan(table=[[catalog, employees]])
77+
""");
78+
}
79+
80+
@Test
81+
public void testMatchWithOptionalParams() {
82+
givenQuery(
83+
"""
84+
SELECT * FROM catalog.employees
85+
WHERE "match"(MAP['field', name], MAP['query', 'John'], MAP['boost', '1.5'])\
86+
""")
87+
.assertPlan(
88+
"""
89+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
90+
LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John'), MAP('boost', '1.5'))])
91+
LogicalTableScan(table=[[catalog, employees]])
92+
""");
93+
}
94+
95+
@Test
96+
public void testMultiMatch() {
97+
givenQuery(
98+
"""
99+
SELECT * FROM catalog.employees
100+
WHERE multi_match(\
101+
MAP['fields', MAP['name', 1.0, 'department', 2.0]], MAP['query', 'John'])\
102+
""")
103+
.assertPlan(
104+
"""
105+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
106+
LogicalFilter(condition=[multi_match(MAP('fields', MAP(CAST('name'):CHAR(10) NOT NULL, 1.0:DECIMAL(2, 1), 'department', 2.0:DECIMAL(2, 1))), MAP('query', 'John'))])
107+
LogicalTableScan(table=[[catalog, employees]])
108+
""");
109+
}
110+
111+
@Test
112+
public void testSimpleQueryString() {
113+
givenQuery(
114+
"""
115+
SELECT * FROM catalog.employees
116+
WHERE simple_query_string(\
117+
MAP['fields', MAP['name', 1.0]], MAP['query', 'John'])\
118+
""")
119+
.assertPlan(
120+
"""
121+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
122+
LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name', 1.0:DECIMAL(2, 1))), MAP('query', 'John'))])
123+
LogicalTableScan(table=[[catalog, employees]])
124+
""");
125+
}
126+
127+
@Test
128+
public void testQueryString() {
129+
givenQuery(
130+
"""
131+
SELECT * FROM catalog.employees
132+
WHERE query_string(\
133+
MAP['fields', MAP['name', 1.0]], MAP['query', 'John'])\
134+
""")
135+
.assertPlan(
136+
"""
137+
LogicalProject(id=[$0], name=[$1], age=[$2], department=[$3])
138+
LogicalFilter(condition=[query_string(MAP('fields', MAP('name', 1.0:DECIMAL(2, 1))), MAP('query', 'John'))])
139+
LogicalTableScan(table=[[catalog, employees]])
140+
""");
141+
}
142+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.api;
7+
8+
import org.junit.Test;
9+
10+
/** Tests for relevance search functions in PPL planning path. */
11+
public class UnifiedRelevanceSearchTest extends UnifiedQueryTestBase {
12+
13+
@Test
14+
public void testMatch() {
15+
givenQuery("source=catalog.employees | where match(name, 'John')")
16+
.assertPlan(
17+
"""
18+
LogicalFilter(condition=[match(MAP('field', $1), MAP('query', 'John':VARCHAR))])
19+
LogicalTableScan(table=[[catalog, employees]])
20+
""");
21+
}
22+
23+
@Test
24+
public void testMatchPhrase() {
25+
givenQuery("source=catalog.employees | where match_phrase(name, 'John Doe')")
26+
.assertPlan(
27+
"""
28+
LogicalFilter(condition=[match_phrase(MAP('field', $1), MAP('query', 'John Doe':VARCHAR))])
29+
LogicalTableScan(table=[[catalog, employees]])
30+
""");
31+
}
32+
33+
@Test
34+
public void testMatchBoolPrefix() {
35+
givenQuery("source=catalog.employees | where match_bool_prefix(name, 'John')")
36+
.assertPlan(
37+
"""
38+
LogicalFilter(condition=[match_bool_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))])
39+
LogicalTableScan(table=[[catalog, employees]])
40+
""");
41+
}
42+
43+
@Test
44+
public void testMatchPhrasePrefix() {
45+
givenQuery("source=catalog.employees | where match_phrase_prefix(name, 'John')")
46+
.assertPlan(
47+
"""
48+
LogicalFilter(condition=[match_phrase_prefix(MAP('field', $1), MAP('query', 'John':VARCHAR))])
49+
LogicalTableScan(table=[[catalog, employees]])
50+
""");
51+
}
52+
53+
@Test
54+
public void testMultiMatch() {
55+
givenQuery("source=catalog.employees | where multi_match(['name', 'department'], 'John')")
56+
.assertPlan(
57+
"""
58+
LogicalFilter(condition=[multi_match(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE, 'department':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))])
59+
LogicalTableScan(table=[[catalog, employees]])
60+
""");
61+
}
62+
63+
@Test
64+
public void testSimpleQueryString() {
65+
givenQuery("source=catalog.employees | where simple_query_string(['name'], 'John')")
66+
.assertPlan(
67+
"""
68+
LogicalFilter(condition=[simple_query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))])
69+
LogicalTableScan(table=[[catalog, employees]])
70+
""");
71+
}
72+
73+
@Test
74+
public void testQueryString() {
75+
givenQuery("source=catalog.employees | where query_string(['name'], 'John')")
76+
.assertPlan(
77+
"""
78+
LogicalFilter(condition=[query_string(MAP('fields', MAP('name':VARCHAR, 1.0E0:DOUBLE)), MAP('query', 'John':VARCHAR))])
79+
LogicalTableScan(table=[[catalog, employees]])
80+
""");
81+
}
82+
}

0 commit comments

Comments
 (0)