TDengine/source/libs/executor/inc/hashjoin.h

191 lines
5 KiB
C
Raw Normal View History

2023-06-15 11:34:15 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_HASHJOIN_H
#define TDENGINE_HASHJOIN_H
#ifdef __cplusplus
extern "C" {
#endif
2024-11-19 08:23:46 +00:00
#include "executorInt.h"
#include "operator.h"
2023-06-21 11:33:27 +00:00
#define HASH_JOIN_DEFAULT_PAGE_SIZE 10485760
2024-04-10 11:27:36 +00:00
#define HJOIN_DEFAULT_BLK_ROWS_NUM 4096
#define HJOIN_BLK_SIZE_LIMIT 10485760
#define HJOIN_ROW_BITMAP_SIZE (2 * 1048576)
#define HJOIN_BLK_THRESHOLD_RATIO 0.9
2023-06-21 11:33:27 +00:00
2024-04-12 07:44:23 +00:00
typedef int32_t (*hJoinImplFp)(SOperatorInfo*);
feat: support customized taos/taosd (#29736) * feat: support TDAcoreOS * chore: cmake options for TD_ACORE * chore: disable lemon for TD_ACORE * chore: add lzma2 and msvcregex * chore: cmake for lzma2 * chore: adapt for TD_ACORE * chore: adapt strcasecmp for TD_ACORE * chore: adapt for geos/threadName * chore: build adapt for TD_ACORE * chore: build adapt for TD_ACORE * chore: build adapt for TD_ACORE * chore: build adapt for TD_ACORE * chore: build adapt for TD_ACORE termio * chore: refact transComm.h for TD_ACORE * chore: refact transportInt.h for TD_ACORE * chore: refact trans.c for TD_ACORE * chore: refact trpc.h for TD_ACORE * chore: refact transCli.c/transComm.c/transSvr.c for TD_ACORE * chore: refact uv.h for TD_ACORE * chore: refact geosWrapper.h for TD_ACORE * chore: refact token/builtins/udf for TD_ACORE * chore: refact rocks for TD_ACORE * chore: refact tsdbCache.c for TD_ACORE, use LRU cache for last/last_row, not use rocksdb * chore: refact FAIL to _ERR to solve conflicts for TD_ACORE * chore: restore lemon.c/lempar.c * chore: support build lemon for TD_ACORE * chore: refact trpc and siginfo_t for TD_ACORE * chore: refact timezone for TD_ACORE * chore: refact lz4 for TD_ACORE * chore: refact TD_ACORE to make compile pass * chore: code optimization for TD_ASTRA * feat: support run taos with taosd integrated * feat: support invoke taos shell * feat: support invoke taos shell * feat: support invoke taos shell * chore: code optimization * chore: fix undefined reference problem os TD_ASTRA * chore: resolve compile problem for TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix undefined reference problem os TD_ASTRA * chore: fix getpid * chore: fix typo * chore: set stack size and ajust min pack size for TD_ASTRA * chore: fix pthread create parameters * chore: chmod adapt for TD_ASTRA * chore: fix trans compile problem * chore: adapt chmod for TD_ASTRA * chore: byte alignment for TD_ASTRA * chore: more code for adaption of TD_ASTRA * chore: more code for adaption of TD_ASTRA * chore: more code for adaption of TD_ASTRA * chore: byte alignment for TD_ASTRA * chore: conditional compile option * chore: adapt for TD_ASTRA * chore: adjust taosPId and msvcregex for TD_ASTRA * chore: log dir separator for wal build name * chore: fix type of pointer parameter * chore: fix compile problem of tsdbGetS3Size * enh: get last ver from wal log for TD_ASTRA * enh: refact wal meta ver * enh: refact wal meta ver * fix: typo of taosUcs4Compare * enh: process return value of CI * chore: more code for TD_ASTRA adaption * chore: return value of taosCloseFile in walMeta.c * chore: fix compile problem * chore: fix compile problem of TD_ASTRA * fix: update macro for tq and stream task * chore: code optimization for TD_ASTRA * chore: restore create log and init cfg interface * chore: restore strncasecmp and strcasecmp * fix: adjust the field position of SDataBlockInfo * fix: pragma pack min size * fix: pragma pack min size * chore: more code for TD_ASTRA adaption * fix: type of parameters * chore: adapt strncasecmp and strcasecmp for TD_ASTRA * chore: restore interface of init log * enh: pack push optimization * fix: taos init cfg * add astra support * fix: fetch the value of suid * chore: switch of build with udf * add temp code * chore: more code for TD_ASTRA adaption * chore: add macro ERRNO to replace errno * chore: bytes align for TD_ASTRA * fix: remove obsolete codes * enh: support USE_UDF macro * fix compile error * fix: resolve redefinition problem * fix: compile problem of log.cpp * fix: compile problem of osTimezone * fix: resolve compile problem of udf * fix: pragma definition on windows * fix: ucs4 and stpncpy for TD_ASTRA * fix: memory align problem for TD_ASTRA * enh: solve memory leak for TD_ASTRA_RPC * fix: compile problem of taosSetInt64Aligned * fix: restore mndSubscribe.c * fix: scalar for udf * chore: code adaption for TD_ASTRA * chore: code optimization for TD_ASTRA * fix: typo of add definition * fix: typo of macro in tudf.h * chore: remove void to make CI pass * enh: move macro from cmake.platform to cmake.options * enh: byte align for hash node and error code * chore: restore the size for lru cache * enh: restore some code about pack push * chore: restore the pack push in tmsg.h * fix: add macro of pack pop for windows --------- Co-authored-by: yihaoDeng <luomoxyz@126.com>
2025-03-14 05:32:13 +00:00
#pragma pack(push, 1)
2023-06-25 09:31:04 +00:00
typedef struct SBufRowInfo {
void* next;
uint16_t pageId;
int32_t offset;
} SBufRowInfo;
#pragma pack(pop)
2024-04-12 07:44:23 +00:00
typedef enum EHJoinPhase {
E_JOIN_PHASE_PRE = 1,
E_JOIN_PHASE_CUR,
E_JOIN_PHASE_POST
} EHJoinPhase;
2023-06-20 11:34:16 +00:00
typedef struct SHJoinCtx {
bool rowRemains;
2024-04-12 07:44:23 +00:00
bool midRemains;
2024-04-10 11:27:36 +00:00
int64_t limit;
2023-06-20 11:34:16 +00:00
SBufRowInfo* pBuildRow;
SSDataBlock* pProbeData;
2024-04-12 07:44:23 +00:00
EHJoinPhase probePhase;
int32_t probePreIdx;
int32_t probeStartIdx;
int32_t probeEndIdx;
int32_t probePostIdx;
bool readMatch;
2023-06-20 11:34:16 +00:00
} SHJoinCtx;
2023-06-15 11:34:15 +00:00
2023-06-21 11:33:27 +00:00
typedef struct SHJoinColInfo {
2023-12-12 11:31:12 +00:00
int32_t srcSlot;
int32_t dstSlot;
bool keyCol;
bool vardata;
int32_t* offset;
int32_t bytes;
char* data;
char* bitMap;
SColumnInfoData* colData;
2023-06-21 11:33:27 +00:00
} SHJoinColInfo;
2023-06-15 11:34:15 +00:00
2023-06-19 11:40:15 +00:00
typedef struct SBufPageInfo {
int32_t pageSize;
int32_t offset;
char* data;
} SBufPageInfo;
2023-06-20 11:34:16 +00:00
typedef struct SGroupData {
2023-06-19 11:40:15 +00:00
SBufRowInfo* rows;
2023-06-20 11:34:16 +00:00
} SGroupData;
2023-06-19 11:40:15 +00:00
2024-04-12 07:44:23 +00:00
typedef struct SHJoinColMap {
int32_t srcSlot;
int32_t dstSlot;
bool vardata;
int32_t bytes;
} SHJoinColMap;
// for now timetruncate only
typedef struct SHJoinPrimExprCtx {
int64_t truncateUnit;
int64_t timezoneUnit;
int32_t targetSlotId;
} SHJoinPrimExprCtx;
typedef struct SHJoinTableCtx {
2023-07-05 03:07:08 +00:00
int32_t downStreamIdx;
2023-06-19 11:40:15 +00:00
SOperatorInfo* downStream;
int32_t blkId;
SQueryStat inputStat;
2024-04-12 07:44:23 +00:00
bool hasTimeRange;
SHJoinColMap* primCol;
SNode* primExpr;
SHJoinPrimExprCtx primCtx;
2023-06-19 11:40:15 +00:00
int32_t keyNum;
2023-06-21 11:33:27 +00:00
SHJoinColInfo* keyCols;
2023-06-19 11:40:15 +00:00
char* keyBuf;
2023-06-21 11:33:27 +00:00
char* keyData;
2023-06-19 11:40:15 +00:00
int32_t valNum;
2023-06-21 11:33:27 +00:00
SHJoinColInfo* valCols;
char* valData;
int32_t valBitMapSize;
2023-06-19 11:40:15 +00:00
int32_t valBufSize;
2023-06-21 11:33:27 +00:00
SArray* valVarCols;
bool valColExist;
2024-04-12 07:44:23 +00:00
} SHJoinTableCtx;
2023-06-15 11:34:15 +00:00
2023-07-25 01:03:52 +00:00
typedef struct SHJoinExecInfo {
int64_t buildBlkNum;
int64_t buildBlkRows;
int64_t probeBlkNum;
int64_t probeBlkRows;
int64_t resRows;
int64_t expectRows;
} SHJoinExecInfo;
2023-06-15 11:34:15 +00:00
typedef struct SHJoinOperatorInfo {
2024-04-12 07:44:23 +00:00
EJoinType joinType;
EJoinSubType subType;
SHJoinTableCtx tbs[2];
SHJoinTableCtx* pBuild;
SHJoinTableCtx* pProbe;
2024-04-10 11:27:36 +00:00
SFilterInfo* pPreFilter;
SFilterInfo* pFinFilter;
SSDataBlock* finBlk;
SSDataBlock* midBlk;
2024-04-12 07:44:23 +00:00
STimeWindow tblTimeRange;
2023-06-21 11:33:27 +00:00
int32_t pResColNum;
int8_t* pResColMap;
SArray* pRowBufs;
SSHashObj* pKeyHash;
2023-07-06 11:33:31 +00:00
bool keyHashBuilt;
2023-06-21 11:33:27 +00:00
SHJoinCtx ctx;
2023-07-25 01:03:52 +00:00
SHJoinExecInfo execInfo;
2024-04-10 11:27:36 +00:00
int32_t blkThreshold;
2024-04-12 07:44:23 +00:00
hJoinImplFp joinFp;
2023-06-15 11:34:15 +00:00
} SHJoinOperatorInfo;
2023-06-20 11:34:16 +00:00
2024-04-10 11:27:36 +00:00
#define HJ_ERR_RET(c) \
do { \
int32_t _code = (c); \
if (_code != TSDB_CODE_SUCCESS) { \
terrno = _code; \
return _code; \
} \
} while (0)
#define HJ_ERR_JRET(c) \
do { \
code = (c); \
if (code != TSDB_CODE_SUCCESS) { \
terrno = code; \
goto _return; \
} \
} while (0)
2024-04-12 07:44:23 +00:00
int32_t hInnerJoinDo(struct SOperatorInfo* pOperator);
int32_t hLeftJoinDo(struct SOperatorInfo* pOperator);
void hJoinSetDone(struct SOperatorInfo* pOperator);
void hJoinAppendResToBlock(struct SOperatorInfo* pOperator, SSDataBlock* pRes, bool* allFetched);
bool hJoinCopyKeyColsDataToBuf(SHJoinTableCtx* pTable, int32_t rowIdx, size_t *pBufLen);
int32_t hJoinCopyMergeMidBlk(SHJoinCtx* pCtx, SSDataBlock** ppMid, SSDataBlock** ppFin);
int32_t hJoinHandleMidRemains(SHJoinOperatorInfo* pJoin, SHJoinCtx* pCtx);
bool hJoinBlkReachThreshold(SHJoinOperatorInfo* pInfo, int64_t blkRows);
int32_t hJoinCopyNMatchRowsToBlock(SHJoinOperatorInfo* pJoin, SSDataBlock* pRes, int32_t startIdx, int32_t rows);
2024-04-10 11:27:36 +00:00
2023-06-15 11:34:15 +00:00
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_HASHJOIN_H