TDengine/source/libs/executor/src/countwindowoperator.c

392 lines
15 KiB
C
Raw Normal View History

2024-01-26 07:56:26 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "executorInt.h"
#include "filter.h"
#include "function.h"
#include "functionMgt.h"
#include "operator.h"
#include "querytask.h"
#include "tcommon.h"
#include "tcompare.h"
#include "tdatablock.h"
#include "ttime.h"
2024-02-06 09:19:12 +00:00
typedef struct SCountWindowResult {
2024-07-22 04:51:25 +00:00
int32_t winRows;
SResultRow row;
2024-02-06 09:19:12 +00:00
} SCountWindowResult;
typedef struct SCountWindowSupp {
2024-07-22 04:51:25 +00:00
SArray* pWinStates;
int32_t stateIndex;
int32_t curStateIndex;
2024-02-06 09:19:12 +00:00
} SCountWindowSupp;
2024-01-26 07:56:26 +00:00
typedef struct SCountWindowOperatorInfo {
SOptrBasicInfo binfo;
SAggSupporter aggSup;
SExprSupp scalarSup;
int32_t tsSlotId; // primary timestamp column slot id
STimeWindowAggSupp twAggSup;
uint64_t groupId; // current group id, used to identify the data block from different groups
SResultRow* pRow;
2024-02-06 09:19:12 +00:00
int32_t windowCount;
int32_t windowSliding;
SCountWindowSupp countSup;
SSDataBlock* pPreDataBlock;
int32_t preStateIndex;
2024-01-26 07:56:26 +00:00
} SCountWindowOperatorInfo;
void destroyCountWindowOperatorInfo(void* param) {
SCountWindowOperatorInfo* pInfo = (SCountWindowOperatorInfo*)param;
if (pInfo == NULL) {
return;
}
cleanupBasicInfo(&pInfo->binfo);
colDataDestroy(&pInfo->twAggSup.timeWindowData);
cleanupAggSup(&pInfo->aggSup);
cleanupExprSupp(&pInfo->scalarSup);
2024-02-06 09:19:12 +00:00
taosArrayDestroy(pInfo->countSup.pWinStates);
2024-01-26 07:56:26 +00:00
taosMemoryFreeClear(param);
}
2024-08-27 09:04:44 +00:00
static int32_t countWindowAggregateNext(SOperatorInfo* pOperator, SSDataBlock** ppRes);
2024-07-22 04:51:25 +00:00
static void clearWinStateBuff(SCountWindowResult* pBuff) { pBuff->winRows = 0; }
2024-02-06 09:19:12 +00:00
static SCountWindowResult* getCountWinStateInfo(SCountWindowSupp* pCountSup) {
SCountWindowResult* pBuffInfo = taosArrayGet(pCountSup->pWinStates, pCountSup->stateIndex);
pCountSup->curStateIndex = pCountSup->stateIndex;
2024-08-05 08:09:01 +00:00
if (!pBuffInfo) {
2024-08-20 08:47:22 +00:00
terrno = TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR;
qError("%s failed at line %d since %s", __func__, __LINE__, tstrerror(terrno));
2024-08-05 08:09:01 +00:00
return NULL;
}
2024-08-20 08:47:22 +00:00
int32_t size = taosArrayGetSize(pCountSup->pWinStates);
if (size == 0) {
terrno = TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR;
qError("%s failed at line %d since %s", __func__, __LINE__, tstrerror(terrno));
return NULL;
2024-06-03 11:01:24 +00:00
}
2024-08-20 08:47:22 +00:00
pCountSup->stateIndex = (pCountSup->stateIndex + 1) % size;
2024-02-06 09:19:12 +00:00
return pBuffInfo;
}
2024-07-23 02:50:16 +00:00
static int32_t setCountWindowOutputBuff(SExprSupp* pExprSup, SCountWindowSupp* pCountSup, SResultRow** pResult,
SCountWindowResult** ppResBuff) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t lino = 0;
2024-02-06 09:19:12 +00:00
SCountWindowResult* pBuff = getCountWinStateInfo(pCountSup);
2024-08-06 01:24:31 +00:00
QUERY_CHECK_NULL(pBuff, code, lino, _end, terrno);
2024-02-06 09:19:12 +00:00
(*pResult) = &pBuff->row;
2024-07-23 02:50:16 +00:00
code = setResultRowInitCtx(*pResult, pExprSup->pCtx, pExprSup->numOfExprs, pExprSup->rowEntryInfoOffset);
(*ppResBuff) = pBuff;
2024-08-06 01:24:31 +00:00
_end:
if (code != TSDB_CODE_SUCCESS) {
qError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
}
2024-07-23 02:50:16 +00:00
return code;
2024-02-06 09:19:12 +00:00
}
static int32_t updateCountWindowInfo(int32_t start, int32_t blockRows, int32_t countWinRows, int32_t* pCurrentRows) {
int32_t rows = TMIN(countWinRows - (*pCurrentRows), blockRows - start);
(*pCurrentRows) += rows;
return rows;
}
2024-07-23 02:50:16 +00:00
void doCountWindowAggImpl(SOperatorInfo* pOperator, SSDataBlock* pBlock) {
int32_t code = TSDB_CODE_SUCCESS;
int32_t lino = 0;
2024-01-26 07:56:26 +00:00
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2024-02-06 09:19:12 +00:00
SExprSupp* pExprSup = &pOperator->exprSupp;
2024-01-26 07:56:26 +00:00
SCountWindowOperatorInfo* pInfo = pOperator->info;
SSDataBlock* pRes = pInfo->binfo.pRes;
SColumnInfoData* pColInfoData = taosArrayGet(pBlock->pDataBlock, pInfo->tsSlotId);
2024-08-05 08:09:01 +00:00
QUERY_CHECK_NULL(pColInfoData, code, lino, _end, terrno);
TSKEY* tsCols = (TSKEY*)pColInfoData->pData;
int32_t numOfBuff = taosArrayGetSize(pInfo->countSup.pWinStates);
if (numOfBuff == 0) {
code = TSDB_CODE_QRY_EXECUTOR_INTERNAL_ERROR;
qError("%s failed at line %d since %s", __func__, __LINE__, tstrerror(code));
T_LONG_JMP(pTaskInfo->env, code);
}
pInfo->countSup.stateIndex = (pInfo->preStateIndex + 1) % numOfBuff;
int32_t newSize = pRes->info.rows + pBlock->info.rows / pInfo->windowSliding + 1;
if (newSize > pRes->info.capacity) {
code = blockDataEnsureCapacity(pRes, newSize);
QUERY_CHECK_CODE(code, lino, _end);
}
2024-01-26 07:56:26 +00:00
2024-02-05 10:37:24 +00:00
for (int32_t i = 0; i < pBlock->info.rows;) {
2024-07-23 02:50:16 +00:00
SCountWindowResult* pBuffInfo = NULL;
code = setCountWindowOutputBuff(pExprSup, &pInfo->countSup, &pInfo->pRow, &pBuffInfo);
if (code != TSDB_CODE_SUCCESS) {
qError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
T_LONG_JMP(pTaskInfo->env, code);
}
int32_t prevRows = pBuffInfo->winRows;
int32_t num = updateCountWindowInfo(i, pBlock->info.rows, pInfo->windowCount, &pBuffInfo->winRows);
int32_t step = num;
2024-02-06 09:19:12 +00:00
if (prevRows == 0) {
pInfo->pRow->win.skey = tsCols[i];
}
pInfo->pRow->win.ekey = tsCols[num + i - 1];
updateTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pInfo->pRow->win, 0);
applyAggFunctionOnPartialTuples(pTaskInfo, pExprSup->pCtx, &pInfo->twAggSup.timeWindowData, i, num,
pBlock->info.rows, pExprSup->numOfExprs);
if (pInfo->windowCount != pInfo->windowSliding) {
if (prevRows <= pInfo->windowSliding) {
if (pBuffInfo->winRows > pInfo->windowSliding) {
step = pInfo->windowSliding - prevRows;
2024-02-29 06:04:59 +00:00
} else {
step = pInfo->windowSliding;
2024-02-06 09:19:12 +00:00
}
} else {
step = 0;
}
}
if (pBuffInfo->winRows == pInfo->windowCount) {
doUpdateNumOfRows(pExprSup->pCtx, pInfo->pRow, pExprSup->numOfExprs, pExprSup->rowEntryInfoOffset);
copyResultrowToDataBlock(pExprSup->pExprInfo, pExprSup->numOfExprs, pInfo->pRow, pExprSup->pCtx, pRes,
pExprSup->rowEntryInfoOffset, pTaskInfo);
pRes->info.rows += pInfo->pRow->numOfRows;
clearWinStateBuff(pBuffInfo);
pInfo->preStateIndex = pInfo->countSup.curStateIndex;
clearResultRowInitFlag(pExprSup->pCtx, pExprSup->numOfExprs);
}
2024-02-05 10:37:24 +00:00
i += step;
2024-01-26 07:56:26 +00:00
}
2024-08-05 08:09:01 +00:00
code = doFilter(pRes, pOperator->exprSupp.pFilterInfo, NULL);
QUERY_CHECK_CODE(code, lino, _end);
2024-08-05 08:09:01 +00:00
_end:
if (code != TSDB_CODE_SUCCESS) {
qError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
pTaskInfo->code = code;
T_LONG_JMP(pTaskInfo->env, code);
}
2024-01-26 07:56:26 +00:00
}
2024-07-22 04:51:25 +00:00
static void buildCountResult(SExprSupp* pExprSup, SCountWindowSupp* pCountSup, SExecTaskInfo* pTaskInfo,
SFilterInfo* pFilterInfo, int32_t preStateIndex, SSDataBlock* pBlock) {
2024-02-06 09:19:12 +00:00
SResultRow* pResultRow = NULL;
2024-07-24 08:17:28 +00:00
int32_t code = TSDB_CODE_SUCCESS;
int32_t lino = 0;
int32_t numOfBuff = taosArrayGetSize(pCountSup->pWinStates);
int32_t newSize = pBlock->info.rows + numOfBuff;
if (newSize > pBlock->info.capacity) {
code = blockDataEnsureCapacity(pBlock, newSize);
QUERY_CHECK_CODE(code, lino, _end);
}
pCountSup->stateIndex = (preStateIndex + 1) % numOfBuff;
for (int32_t i = 0; i < numOfBuff; i++) {
2024-07-23 02:50:16 +00:00
SCountWindowResult* pBuff = NULL;
code = setCountWindowOutputBuff(pExprSup, pCountSup, &pResultRow, &pBuff);
QUERY_CHECK_CODE(code, lino, _end);
2024-02-06 09:19:12 +00:00
if (pBuff->winRows == 0) {
2024-07-22 04:51:25 +00:00
continue;
2024-02-06 09:19:12 +00:00
}
doUpdateNumOfRows(pExprSup->pCtx, pResultRow, pExprSup->numOfExprs, pExprSup->rowEntryInfoOffset);
copyResultrowToDataBlock(pExprSup->pExprInfo, pExprSup->numOfExprs, pResultRow, pExprSup->pCtx, pBlock,
pExprSup->rowEntryInfoOffset, pTaskInfo);
pBlock->info.rows += pResultRow->numOfRows;
clearWinStateBuff(pBuff);
clearResultRowInitFlag(pExprSup->pCtx, pExprSup->numOfExprs);
}
2024-07-23 02:50:16 +00:00
code = doFilter(pBlock, pFilterInfo, NULL);
QUERY_CHECK_CODE(code, lino, _end);
_end:
if (code != TSDB_CODE_SUCCESS) {
qError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
T_LONG_JMP(pTaskInfo->env, code);
}
2024-02-06 09:19:12 +00:00
}
2024-07-24 08:17:28 +00:00
static int32_t countWindowAggregateNext(SOperatorInfo* pOperator, SSDataBlock** ppRes) {
2024-07-22 04:51:25 +00:00
int32_t code = TSDB_CODE_SUCCESS;
int32_t lino = 0;
2024-01-26 07:56:26 +00:00
SCountWindowOperatorInfo* pInfo = pOperator->info;
SExecTaskInfo* pTaskInfo = pOperator->pTaskInfo;
2024-02-06 09:19:12 +00:00
SExprSupp* pExprSup = &pOperator->exprSupp;
2024-01-26 07:56:26 +00:00
int32_t order = pInfo->binfo.inputTsOrder;
SSDataBlock* pRes = pInfo->binfo.pRes;
blockDataCleanup(pRes);
while (1) {
SSDataBlock* pBlock = NULL;
if (pInfo->pPreDataBlock == NULL) {
pBlock = getNextBlockFromDownstream(pOperator, 0);
} else {
pBlock = pInfo->pPreDataBlock;
pInfo->pPreDataBlock = NULL;
}
2024-01-26 07:56:26 +00:00
if (pBlock == NULL) {
break;
}
pRes->info.scanFlag = pBlock->info.scanFlag;
2024-07-22 04:51:25 +00:00
code = setInputDataBlock(pExprSup, pBlock, order, MAIN_SCAN, true);
QUERY_CHECK_CODE(code, lino, _end);
code = blockDataUpdateTsWindow(pBlock, pInfo->tsSlotId);
QUERY_CHECK_CODE(code, lino, _end);
2024-01-26 07:56:26 +00:00
// there is an scalar expression that needs to be calculated right before apply the group aggregation.
if (pInfo->scalarSup.pExprInfo != NULL) {
2024-07-24 08:17:28 +00:00
code = projectApplyFunctions(pInfo->scalarSup.pExprInfo, pBlock, pBlock, pInfo->scalarSup.pCtx,
pInfo->scalarSup.numOfExprs, NULL);
QUERY_CHECK_CODE(code, lino, _end);
2024-01-26 07:56:26 +00:00
}
2024-02-06 09:19:12 +00:00
if (pInfo->groupId == 0) {
pInfo->groupId = pBlock->info.id.groupId;
} else if (pInfo->groupId != pBlock->info.id.groupId) {
pInfo->pPreDataBlock = pBlock;
pRes->info.id.groupId = pInfo->groupId;
buildCountResult(pExprSup, &pInfo->countSup, pTaskInfo, pOperator->exprSupp.pFilterInfo, pInfo->preStateIndex, pRes);
2024-02-06 09:19:12 +00:00
pInfo->groupId = pBlock->info.id.groupId;
if (pRes->info.rows > 0) {
(*ppRes) = pRes;
return code;
}
2024-02-06 09:19:12 +00:00
}
2024-01-26 07:56:26 +00:00
doCountWindowAggImpl(pOperator, pBlock);
if (pRes->info.rows >= pOperator->resultInfo.threshold) {
pRes->info.id.groupId = pInfo->groupId;
2024-07-24 08:17:28 +00:00
(*ppRes) = pRes;
return code;
2024-01-26 07:56:26 +00:00
}
}
pRes->info.id.groupId = pInfo->groupId;
buildCountResult(pExprSup, &pInfo->countSup, pTaskInfo, pOperator->exprSupp.pFilterInfo, pInfo->preStateIndex, pRes);
2024-07-22 04:51:25 +00:00
_end:
if (code != TSDB_CODE_SUCCESS) {
qError("%s failed at line %d since %s", __func__, lino, tstrerror(code));
2024-07-24 08:17:28 +00:00
pTaskInfo->code = code;
2024-07-22 04:51:25 +00:00
T_LONG_JMP(pTaskInfo->env, code);
}
2024-07-24 08:17:28 +00:00
(*ppRes) = pRes->info.rows == 0 ? NULL : pRes;
return code;
}
2024-07-24 09:08:08 +00:00
int32_t createCountwindowOperatorInfo(SOperatorInfo* downstream, SPhysiNode* physiNode,
SExecTaskInfo* pTaskInfo, SOperatorInfo** pOptrInfo) {
QRY_OPTR_CHECK(pOptrInfo);
2024-07-22 04:51:25 +00:00
int32_t code = TSDB_CODE_SUCCESS;
int32_t lino = 0;
2024-01-26 07:56:26 +00:00
SCountWindowOperatorInfo* pInfo = taosMemoryCalloc(1, sizeof(SCountWindowOperatorInfo));
SOperatorInfo* pOperator = taosMemoryCalloc(1, sizeof(SOperatorInfo));
if (pInfo == NULL || pOperator == NULL) {
2024-07-24 09:08:08 +00:00
code = TSDB_CODE_OUT_OF_MEMORY;
2024-01-26 07:56:26 +00:00
goto _error;
}
pOperator->exprSupp.hasWindowOrGroup = true;
2024-01-26 07:56:26 +00:00
SCountWinodwPhysiNode* pCountWindowNode = (SCountWinodwPhysiNode*)physiNode;
pInfo->tsSlotId = ((SColumnNode*)pCountWindowNode->window.pTspk)->slotId;
if (pCountWindowNode->window.pExprs != NULL) {
int32_t numOfScalarExpr = 0;
2024-08-05 03:57:18 +00:00
SExprInfo* pScalarExprInfo = NULL;
code = createExprInfo(pCountWindowNode->window.pExprs, NULL, &pScalarExprInfo, &numOfScalarExpr);
QUERY_CHECK_CODE(code, lino, _error);
2024-01-26 07:56:26 +00:00
code = initExprSupp(&pInfo->scalarSup, pScalarExprInfo, numOfScalarExpr, &pTaskInfo->storageAPI.functionStore);
2024-07-22 04:51:25 +00:00
QUERY_CHECK_CODE(code, lino, _error);
2024-01-26 07:56:26 +00:00
}
2024-07-22 04:51:25 +00:00
size_t keyBufSize = 0;
2024-01-26 07:56:26 +00:00
int32_t num = 0;
2024-08-05 03:57:18 +00:00
SExprInfo* pExprInfo = NULL;
code = createExprInfo(pCountWindowNode->window.pFuncs, NULL, &pExprInfo, &num);
QUERY_CHECK_CODE(code, lino, _error);
2024-01-26 07:56:26 +00:00
initResultSizeInfo(&pOperator->resultInfo, 4096);
code = initAggSup(&pOperator->exprSupp, &pInfo->aggSup, pExprInfo, num, keyBufSize, pTaskInfo->id.str,
pTaskInfo->streamInfo.pState, &pTaskInfo->storageAPI.functionStore);
2024-07-22 04:51:25 +00:00
QUERY_CHECK_CODE(code, lino, _error);
2024-01-26 07:56:26 +00:00
SSDataBlock* pResBlock = createDataBlockFromDescNode(pCountWindowNode->window.node.pOutputDataBlockDesc);
2024-08-05 03:17:49 +00:00
QUERY_CHECK_NULL(pResBlock, code, lino, _error, terrno);
2024-08-07 08:59:19 +00:00
initBasicInfo(&pInfo->binfo, pResBlock);
2024-07-23 02:50:16 +00:00
code = blockDataEnsureCapacity(pResBlock, pOperator->resultInfo.capacity);
QUERY_CHECK_CODE(code, lino, _error);
2024-01-26 07:56:26 +00:00
initResultRowInfo(&pInfo->binfo.resultRowInfo);
pInfo->binfo.inputTsOrder = physiNode->inputTsOrder;
pInfo->binfo.outputTsOrder = physiNode->outputTsOrder;
2024-02-06 09:19:12 +00:00
pInfo->windowCount = pCountWindowNode->windowCount;
pInfo->windowSliding = pCountWindowNode->windowSliding;
2024-07-22 04:51:25 +00:00
// sizeof(SCountWindowResult)
2024-02-06 09:19:12 +00:00
int32_t itemSize = sizeof(int32_t) + pInfo->aggSup.resultRowSize;
int32_t numOfItem = 1;
if (pInfo->windowCount != pInfo->windowSliding) {
numOfItem = pInfo->windowCount / pInfo->windowSliding + 1;
}
2024-08-05 03:57:18 +00:00
2024-02-06 09:19:12 +00:00
pInfo->countSup.pWinStates = taosArrayInit_s(itemSize, numOfItem);
if (!pInfo->countSup.pWinStates) {
goto _error;
}
2024-01-26 07:56:26 +00:00
2024-02-06 09:19:12 +00:00
pInfo->countSup.stateIndex = 0;
pInfo->pPreDataBlock = NULL;
pInfo->preStateIndex = 0;
2024-01-26 07:56:26 +00:00
2024-02-23 03:18:29 +00:00
code = filterInitFromNode((SNode*)pCountWindowNode->window.node.pConditions, &pOperator->exprSupp.pFilterInfo, 0);
2024-07-22 04:51:25 +00:00
QUERY_CHECK_CODE(code, lino, _error);
2024-02-23 03:18:29 +00:00
2024-07-22 04:51:25 +00:00
code = initExecTimeWindowInfo(&pInfo->twAggSup.timeWindowData, &pTaskInfo->window);
QUERY_CHECK_CODE(code, lino, _error);
2024-01-26 07:56:26 +00:00
setOperatorInfo(pOperator, "CountWindowOperator", QUERY_NODE_PHYSICAL_PLAN_MERGE_COUNT, true, OP_NOT_OPENED, pInfo,
pTaskInfo);
2024-08-27 09:04:44 +00:00
pOperator->fpSet = createOperatorFpSet(optrDummyOpenFn, countWindowAggregateNext, NULL, destroyCountWindowOperatorInfo,
2024-01-26 07:56:26 +00:00
optrDefaultBufFn, NULL, optrDefaultGetNextExtFn, NULL);
code = appendDownstream(pOperator, &downstream, 1);
if (code != TSDB_CODE_SUCCESS) {
goto _error;
}
2024-07-24 09:08:08 +00:00
*pOptrInfo = pOperator;
2024-08-29 11:43:59 +00:00
return TSDB_CODE_SUCCESS;
2024-01-26 07:56:26 +00:00
_error:
if (pInfo != NULL) {
2024-01-26 08:15:25 +00:00
destroyCountWindowOperatorInfo(pInfo);
2024-01-26 07:56:26 +00:00
}
2024-08-29 11:43:59 +00:00
destroyOperatorAndDownstreams(pOperator, &downstream, 1);
2024-01-26 07:56:26 +00:00
pTaskInfo->code = code;
2024-07-24 09:08:08 +00:00
return code;
2024-01-26 07:56:26 +00:00
}