Skip to content

Commit a768d30

Browse files
New function LISTAGG (#8689)
* Adding an implementation of the new LISTAGG function * Add README * Code formatting errors have been fixed. Adjustments have been made to the README. Fixed a bug with the <listagg overflow clause> behavior, now it is silently ignored. The dsqlMatch function has been redesigned. Redesigned behavior with DISTINCT. Multiple elements are now allowed in the ORDER BY. I also added influences on the sorting direction. * dyemanov omments have been corrected.
1 parent 84b5149 commit a768d30

File tree

9 files changed

+463
-20
lines changed

9 files changed

+463
-20
lines changed

doc/sql.extensions/README.listagg

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
SQL Language Extension: LISTAGG
2+
3+
Function:
4+
The current implementation has an aggregate function LIST which concatenates multiple row
5+
fields into a blob. The SQL standard has a similar function called LISTAGG. The major
6+
difference is that it also supports the ordered concatenation.
7+
8+
Authors:
9+
Chudaykin Alex <chudaykinalex@gmail.com>
10+
11+
Format:
12+
<listagg set function> ::=
13+
LISTAGG <left paren> [ <set quantifier> ] <character value expression> <comma> <listagg separator> [ <listagg overflow clause> ] <right paren> [ <within group specification> ]
14+
15+
<listagg separator> ::=
16+
<character string literal>
17+
18+
<listagg overflow clause> ::=
19+
ON OVERFLOW <overflow behavior>
20+
21+
<overflow behavior> ::=
22+
ERROR | TRUNCATE [ <listagg truncation filler> ] <listagg count indication>
23+
24+
<listagg truncation filler> ::=
25+
<character string literal>
26+
27+
<listagg count indication> ::=
28+
WITH COUNT | WITHOUT COUNT
29+
30+
<within group specification> ::=
31+
WITHIN GROUP <left paren> ORDER BY <sort specification list> <right paren>
32+
33+
Syntax Rules:
34+
The legacy LIST syntax is preserved for backward compatibility, LISTAGG is added to cover the
35+
standard features.
36+
37+
There is a <listagg overflow clause> rule in the standard, which is intended to output an error
38+
when the output value overflows. Since the LIST function always returns a BLOB, it was decided
39+
that this rule would be meaningless. So the OVERFLOW clause is syntactically supported but
40+
silently ignored if specified.
41+
42+
Examples:
43+
CREATE TABLE TEST_T
44+
(COL1 INT, COL2 VARCHAR(2), COL3 VARCHAR(2), COL4 VARCHAR(2), COL5 BOOLEAN, COL6 VARCHAR(2)
45+
CHARACTER SET WIN1251);
46+
COMMIT;
47+
INSERT INTO TEST_T values(1, 'A', 'A', 'J', false, 'П');
48+
INSERT INTO TEST_T values(2, 'B', 'B', 'I', false, 'Д');
49+
INSERT INTO TEST_T values(3, 'C', 'A', 'L', true, 'Ж');
50+
INSERT INTO TEST_T values(4, 'D', 'B', 'K', true, 'Й');
51+
COMMIT;
52+
53+
SELECT LISTAGG (ALL COL4, ':') AS FROM TEST_T;
54+
=======
55+
J:I:L:K
56+
57+
SELECT LISTAGG (DISTINCT COL4, ':') FROM TEST_T;
58+
========
59+
I:J:K:L
60+
61+
SELECT LISTAGG (DISTINCT COL3, ':') FROM TEST_T;
62+
====
63+
A:B
64+
65+
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL3 ASCENDING) FROM TEST_T;
66+
====
67+
A:B
68+
69+
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL3 DESCENDING) FROM TEST_T;
70+
====
71+
B:A
72+
73+
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL3 DESCENDING, COL4, COL5) FROM TEST_T;
74+
====
75+
B:A
76+
77+
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL4, COL3 DESCENDING, COL5) FROM TEST_T;
78+
====
79+
A:B
80+
81+
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2) FROM TEST_T;
82+
====
83+
A:B
84+
85+
SELECT LISTAGG (DISTINCT COL3, ':') WITHIN GROUP (ORDER BY COL2 DESCENDING) FROM TEST_T;
86+
====
87+
A:B
88+
89+
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL2 DESCENDING) FROM TEST_T;
90+
=======
91+
D:C:B:A
92+
93+
SELECT LISTAGG (COL4, ':') WITHIN GROUP (ORDER BY COL3 DESC) FROM TEST_T;
94+
=======
95+
I:K:J:L
96+
97+
SELECT LISTAGG (COL3, ':') WITHIN GROUP (ORDER BY COL5 ASCENDING) FROM TEST_T;
98+
=======
99+
A:B:A:B
100+
101+
SELECT LISTAGG (COL4, ':') WITHIN GROUP (ORDER BY COL3 ASC) FROM TEST_T;
102+
=======
103+
J:L:I:K
104+
105+
SELECT LISTAGG (ALL COL2) WITHIN GROUP (ORDER BY COL4) FROM TEST_T;
106+
=======
107+
B,A,D,C
108+
109+
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 DESC, COL4 ASC) FROM TEST_T;
110+
=======
111+
B:D:A:C
112+
113+
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 DESC, COL4 DESC) FROM TEST_T;
114+
=======
115+
D:B:C:A
116+
117+
SELECT LISTAGG (COL2, ':') WITHIN GROUP (ORDER BY COL3 ASC, COL4 DESC) FROM TEST_T;
118+
=======
119+
C:A:D:B
120+
121+
SELECT LISTAGG (ALL COL6, ':') FROM TEST_T;
122+
=======
123+
П:Д:Ж:Й
124+
125+
SELECT LISTAGG (ALL COL6, ':') WITHIN GROUP (ORDER BY COL2 DESC) FROM TEST_T;
126+
=======
127+
Й:Ж:Д:П
128+
129+
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL6) FROM TEST_T;
130+
=======
131+
B:C:D:A
132+
133+
SELECT LISTAGG (COL4, ':' ON OVERFLOW TRUNCATE '...' WITHOUT COUNT) WITHIN GROUP (ORDER BY COL3 ASC) FROM TEST_T;
134+
=======
135+
J:L:I:K
136+
137+
SELECT LISTAGG (COL4, ':' ON OVERFLOW TRUNCATE '...' WITH COUNT) WITHIN GROUP (ORDER BY COL3 DESC) FROM TEST_T;
138+
======
139+
I:K:J:L
140+
141+
SELECT LISTAGG (DISTINCT COL3, ':' ON OVERFLOW ERROR) WITHIN GROUP (ORDER BY COL3) FROM TEST_T;
142+
===
143+
A:B
144+
145+
INSERT INTO TEST_T values(5, 'E', NULL, NULL, NULL, NULL);
146+
INSERT INTO TEST_T values(6, 'F', 'C', 'N', true, 'К');
147+
148+
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL3) FROM TEST_T;
149+
===========
150+
E:A:C:B:D:F
151+
152+
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL3 NULLS LAST) FROM TEST_T;
153+
===========
154+
A:C:B:D:F:E
155+
156+
SELECT LISTAGG (ALL COL2, ':') WITHIN GROUP (ORDER BY COL6 NULLS FIRST) FROM TEST_T;
157+
===========
158+
E:B:C:D:F:A
159+

src/common/ParserTokens.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ PARSER_TOKEN(TOK_ENCRYPT, "ENCRYPT", true)
205205
PARSER_TOKEN(TOK_END, "END", false)
206206
PARSER_TOKEN(TOK_ENGINE, "ENGINE", true)
207207
PARSER_TOKEN(TOK_ENTRY_POINT, "ENTRY_POINT", true)
208+
PARSER_TOKEN(TOK_ERROR, "ERROR", true)
208209
PARSER_TOKEN(TOK_ESCAPE, "ESCAPE", false)
209210
PARSER_TOKEN(TOK_EXCEPTION, "EXCEPTION", true)
210211
PARSER_TOKEN(TOK_EXCESS, "EXCESS", true)
@@ -292,6 +293,7 @@ PARSER_TOKEN(TOK_LIKE, "LIKE", false)
292293
PARSER_TOKEN(TOK_LIMBO, "LIMBO", true)
293294
PARSER_TOKEN(TOK_LINGER, "LINGER", true)
294295
PARSER_TOKEN(TOK_LIST, "LIST", true)
296+
PARSER_TOKEN(TOK_LISTAGG, "LISTAGG", false)
295297
PARSER_TOKEN(TOK_LN, "LN", true)
296298
PARSER_TOKEN(TOK_LATERAL, "LATERAL", false)
297299
PARSER_TOKEN(TOK_LOCAL, "LOCAL", false)
@@ -522,6 +524,7 @@ PARSER_TOKEN(TOK_TRIGGER, "TRIGGER", false)
522524
PARSER_TOKEN(TOK_TRIM, "TRIM", false)
523525
PARSER_TOKEN(TOK_TRUE, "TRUE", false)
524526
PARSER_TOKEN(TOK_TRUNC, "TRUNC", true)
527+
PARSER_TOKEN(TOK_TRUNCATE, "TRUNCATE", false)
525528
PARSER_TOKEN(TOK_TRUSTED, "TRUSTED", true)
526529
PARSER_TOKEN(TOK_TWO_PHASE, "TWO_PHASE", true)
527530
PARSER_TOKEN(TOK_TYPE, "TYPE", true)
@@ -558,6 +561,7 @@ PARSER_TOKEN(TOK_WHERE, "WHERE", false)
558561
PARSER_TOKEN(TOK_WHILE, "WHILE", false)
559562
PARSER_TOKEN(TOK_WINDOW, "WINDOW", false)
560563
PARSER_TOKEN(TOK_WITH, "WITH", false)
564+
PARSER_TOKEN(TOK_WITHIN, "WITHIN", false)
561565
PARSER_TOKEN(TOK_WITHOUT, "WITHOUT", false)
562566
PARSER_TOKEN(TOK_WORK, "WORK", true)
563567
PARSER_TOKEN(TOK_WRITE, "WRITE", true)

src/dsql/AggNodes.cpp

Lines changed: 83 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ string AggNode::internalPrint(NodePrinter& printer) const
124124
NODE_PRINT(printer, dialect1);
125125
NODE_PRINT(printer, arg);
126126
NODE_PRINT(printer, asb);
127+
NODE_PRINT(printer, sort);
127128
NODE_PRINT(printer, indexed);
128129

129130
return aggInfo.name;
@@ -352,6 +353,8 @@ AggNode* AggNode::pass2(thread_db* tdbb, CompilerScratch* csb)
352353
dsc desc;
353354
getDesc(tdbb, csb, &desc);
354355
impureOffset = csb->allocImpure<impure_value_ex>();
356+
if (sort)
357+
doPass2(tdbb, csb, sort.getAddress());
355358

356359
return this;
357360
}
@@ -361,7 +364,7 @@ void AggNode::aggInit(thread_db* tdbb, Request* request) const
361364
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
362365
impure->vlux_count = 0;
363366

364-
if (distinct)
367+
if (distinct || sort)
365368
{
366369
// Initialize a sort to reject duplicate values.
367370

@@ -373,8 +376,8 @@ void AggNode::aggInit(thread_db* tdbb, Request* request) const
373376

374377
asbImpure->iasb_sort = FB_NEW_POOL(request->req_sorts.getPool()) Sort(
375378
tdbb->getDatabase(), &request->req_sorts, asb->length,
376-
asb->keyItems.getCount(), 1, asb->keyItems.begin(),
377-
RecordSource::rejectDuplicate, 0);
379+
asb->keyItems.getCount(), (distinct ? 1 : asb->keyItems.getCount()),
380+
asb->keyItems.begin(), (distinct ? RecordSource::rejectDuplicate : nullptr), 0);
378381
}
379382
}
380383

@@ -427,6 +430,46 @@ bool AggNode::aggPass(thread_db* tdbb, Request* request) const
427430
ULONG* const pDummy = reinterpret_cast<ULONG*>(data + asb->length - sizeof(ULONG));
428431
*pDummy = asbImpure->iasb_dummy++;
429432

433+
return true;
434+
}
435+
else if (sort)
436+
{
437+
fb_assert(asb);
438+
// "Put" the value to sort.
439+
impure_agg_sort* asbImpure = request->getImpure<impure_agg_sort>(asb->impure);
440+
UCHAR* data;
441+
asbImpure->iasb_sort->put(tdbb, reinterpret_cast<ULONG**>(&data));
442+
443+
MOVE_CLEAR(data, asb->length);
444+
445+
auto descOrder = asb->descOrder.begin();
446+
auto keyItem = asb->keyItems.begin();
447+
448+
for (auto& nodeOrder : sort->expressions)
449+
{
450+
dsc toDesc = *(descOrder++);
451+
toDesc.dsc_address = data + (IPTR) toDesc.dsc_address;
452+
if (const auto fromDsc = EVL_expr(tdbb, request, nodeOrder))
453+
{
454+
if (IS_INTL_DATA(fromDsc))
455+
{
456+
INTL_string_to_key(tdbb, INTL_TEXT_TO_INDEX(fromDsc->getTextType()),
457+
fromDsc, &toDesc, INTL_KEY_UNIQUE);
458+
}
459+
else
460+
MOV_move(tdbb, fromDsc, &toDesc);
461+
}
462+
else
463+
*(data + keyItem->getSkdOffset()) = TRUE;
464+
465+
// The first key for NULLS FIRST/LAST, the second key for the sorter
466+
keyItem += 2;
467+
}
468+
469+
dsc toDesc = asb->desc;
470+
toDesc.dsc_address = data + (IPTR) toDesc.dsc_address;
471+
MOV_move(tdbb, desc, &toDesc);
472+
430473
return true;
431474
}
432475
}
@@ -455,7 +498,7 @@ dsc* AggNode::execute(thread_db* tdbb, Request* request) const
455498
impure->vlu_blob = NULL;
456499
}
457500

458-
if (distinct)
501+
if (distinct || sort)
459502
{
460503
impure_agg_sort* asbImpure = request->getImpure<impure_agg_sort>(asb->impure);
461504
dsc desc = asb->desc;
@@ -478,7 +521,10 @@ dsc* AggNode::execute(thread_db* tdbb, Request* request) const
478521
break;
479522
}
480523

481-
desc.dsc_address = data + (asb->intl ? asb->keyItems[1].getSkdOffset() : 0);
524+
if (distinct)
525+
desc.dsc_address = data + (asb->intl ? asb->keyItems[1].getSkdOffset() : 0);
526+
else
527+
desc.dsc_address = data + (IPTR) asb->desc.dsc_address;
482528

483529
aggPass(tdbb, request, &desc);
484530
}
@@ -877,28 +923,52 @@ AggNode* AvgAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
877923
static AggNode::Register<ListAggNode> listAggInfo("LIST", blr_agg_list, blr_agg_list_distinct);
878924

879925
ListAggNode::ListAggNode(MemoryPool& pool, bool aDistinct, ValueExprNode* aArg,
880-
ValueExprNode* aDelimiter)
926+
ValueExprNode* aDelimiter, ValueListNode* aOrderClause)
881927
: AggNode(pool, listAggInfo, aDistinct, false, aArg),
882-
delimiter(aDelimiter)
928+
delimiter(aDelimiter),
929+
dsqlOrderClause(aOrderClause)
883930
{
884931
}
885932

886933
DmlNode* ListAggNode::parse(thread_db* tdbb, MemoryPool& pool, CompilerScratch* csb, const UCHAR blrOp)
887934
{
888-
ListAggNode* node = FB_NEW_POOL(pool) ListAggNode(pool,
889-
(blrOp == blr_agg_list_distinct));
935+
ListAggNode* node = FB_NEW_POOL(pool) ListAggNode(pool, (blrOp == blr_agg_list_distinct));
890936
node->arg = PAR_parse_value(tdbb, csb);
891937
node->delimiter = PAR_parse_value(tdbb, csb);
938+
if (csb->csb_blr_reader.peekByte() == blr_sort)
939+
node->sort = PAR_sort(tdbb, csb, blr_sort, true);
940+
892941
return node;
893942
}
894943

944+
bool ListAggNode::dsqlMatch(DsqlCompilerScratch* dsqlScratch, const ExprNode* other, bool ignoreMapCast) const
945+
{
946+
if (!AggNode::dsqlMatch(dsqlScratch, other, ignoreMapCast))
947+
return false;
948+
949+
const ListAggNode* o = nodeAs<ListAggNode>(other);
950+
fb_assert(o);
951+
952+
if (dsqlOrderClause || o->dsqlOrderClause)
953+
return PASS1_node_match(dsqlScratch, dsqlOrderClause, o->dsqlOrderClause, ignoreMapCast);
954+
955+
return true;
956+
}
957+
895958
void ListAggNode::make(DsqlCompilerScratch* dsqlScratch, dsc* desc)
896959
{
897960
DsqlDescMaker::fromNode(dsqlScratch, desc, arg);
898961
desc->makeBlob(desc->getBlobSubType(), desc->getTextType());
899962
desc->setNullable(true);
900963
}
901964

965+
void ListAggNode::genBlr(DsqlCompilerScratch* dsqlScratch)
966+
{
967+
AggNode::genBlr(dsqlScratch);
968+
if (dsqlOrderClause)
969+
GEN_sort(dsqlScratch, blr_sort, dsqlOrderClause);
970+
}
971+
902972
bool ListAggNode::setParameterType(DsqlCompilerScratch* dsqlScratch,
903973
std::function<void (dsc*)> makeDesc, bool forceVarChar)
904974
{
@@ -920,6 +990,7 @@ ValueExprNode* ListAggNode::copy(thread_db* tdbb, NodeCopier& copier) const
920990
node->nodScale = nodScale;
921991
node->arg = copier.copy(tdbb, arg);
922992
node->delimiter = copier.copy(tdbb, delimiter);
993+
node->sort = sort->copy(tdbb, copier);
923994
return node;
924995
}
925996

@@ -985,7 +1056,7 @@ dsc* ListAggNode::aggExecute(thread_db* tdbb, Request* request) const
9851056
{
9861057
impure_value_ex* impure = request->getImpure<impure_value_ex>(impureOffset);
9871058

988-
if (distinct)
1059+
if (distinct || sort)
9891060
{
9901061
if (impure->vlu_blob)
9911062
{
@@ -1005,7 +1076,8 @@ AggNode* ListAggNode::dsqlCopy(DsqlCompilerScratch* dsqlScratch) /*const*/
10051076
thread_db* tdbb = JRD_get_thread_data();
10061077

10071078
AggNode* node = FB_NEW_POOL(dsqlScratch->getPool()) ListAggNode(dsqlScratch->getPool(), distinct,
1008-
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, delimiter));
1079+
doDsqlPass(dsqlScratch, arg), doDsqlPass(dsqlScratch, delimiter),
1080+
doDsqlPass(dsqlScratch, dsqlOrderClause));
10091081

10101082
dsc argDesc;
10111083
node->arg->make(dsqlScratch, &argDesc);

0 commit comments

Comments
 (0)