TDengine/source/libs/executor/inc/hashjoin.h

189 lines
4.9 KiB
C
Raw Normal View History

2023-06-15 11:34:15 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef TDENGINE_HASHJOIN_H
#define TDENGINE_HASHJOIN_H
#ifdef __cplusplus
extern "C" {
#endif
2023-06-21 11:33:27 +00:00
#define HASH_JOIN_DEFAULT_PAGE_SIZE 10485760
2024-04-10 11:27:36 +00:00
#define HJOIN_DEFAULT_BLK_ROWS_NUM 4096
#define HJOIN_BLK_SIZE_LIMIT 10485760
#define HJOIN_ROW_BITMAP_SIZE (2 * 1048576)
#define HJOIN_BLK_THRESHOLD_RATIO 0.9
2023-06-21 11:33:27 +00:00
2024-04-12 07:44:23 +00:00
typedef int32_t (*hJoinImplFp)(SOperatorInfo*);
2023-06-25 09:31:04 +00:00
#pragma pack(push, 1)
typedef struct SBufRowInfo {
void* next;
uint16_t pageId;
int32_t offset;
} SBufRowInfo;
#pragma pack(pop)
2024-04-12 07:44:23 +00:00
typedef enum EHJoinPhase {
E_JOIN_PHASE_PRE = 1,
E_JOIN_PHASE_CUR,
E_JOIN_PHASE_POST
} EHJoinPhase;
2023-06-20 11:34:16 +00:00
typedef struct SHJoinCtx {
bool rowRemains;
2024-04-12 07:44:23 +00:00
bool midRemains;
2024-04-10 11:27:36 +00:00
int64_t limit;
2023-06-20 11:34:16 +00:00
SBufRowInfo* pBuildRow;
SSDataBlock* pProbeData;
2024-04-12 07:44:23 +00:00
EHJoinPhase probePhase;
int32_t probePreIdx;
int32_t probeStartIdx;
int32_t probeEndIdx;
int32_t probePostIdx;
bool readMatch;
2023-06-20 11:34:16 +00:00
} SHJoinCtx;
2023-06-15 11:34:15 +00:00
2023-06-21 11:33:27 +00:00
typedef struct SHJoinColInfo {
2023-12-12 11:31:12 +00:00
int32_t srcSlot;
int32_t dstSlot;
bool keyCol;
bool vardata;
int32_t* offset;
int32_t bytes;
char* data;
char* bitMap;
SColumnInfoData* colData;
2023-06-21 11:33:27 +00:00
} SHJoinColInfo;
2023-06-15 11:34:15 +00:00
2023-06-19 11:40:15 +00:00
typedef struct SBufPageInfo {
int32_t pageSize;
int32_t offset;
char* data;
} SBufPageInfo;
2023-06-20 11:34:16 +00:00
typedef struct SGroupData {
2023-06-19 11:40:15 +00:00
SBufRowInfo* rows;
2023-06-20 11:34:16 +00:00
} SGroupData;
2023-06-19 11:40:15 +00:00
2024-04-12 07:44:23 +00:00
typedef struct SHJoinColMap {
int32_t srcSlot;
int32_t dstSlot;
bool vardata;
int32_t bytes;
} SHJoinColMap;
// for now timetruncate only
typedef struct SHJoinPrimExprCtx {
int64_t truncateUnit;
int64_t timezoneUnit;
int32_t targetSlotId;
} SHJoinPrimExprCtx;
typedef struct SHJoinTableCtx {
2023-07-05 03:07:08 +00:00
int32_t downStreamIdx;
2023-06-19 11:40:15 +00:00
SOperatorInfo* downStream;
int32_t blkId;
SQueryStat inputStat;
2024-04-12 07:44:23 +00:00
bool hasTimeRange;
SHJoinColMap* primCol;
SNode* primExpr;
SHJoinPrimExprCtx primCtx;
2023-06-19 11:40:15 +00:00
int32_t keyNum;
2023-06-21 11:33:27 +00:00
SHJoinColInfo* keyCols;
2023-06-19 11:40:15 +00:00
char* keyBuf;
2023-06-21 11:33:27 +00:00
char* keyData;
2023-06-19 11:40:15 +00:00
int32_t valNum;
2023-06-21 11:33:27 +00:00
SHJoinColInfo* valCols;
char* valData;
int32_t valBitMapSize;
2023-06-19 11:40:15 +00:00
int32_t valBufSize;
2023-06-21 11:33:27 +00:00
SArray* valVarCols;
bool valColExist;
2024-04-12 07:44:23 +00:00
} SHJoinTableCtx;
2023-06-15 11:34:15 +00:00
2023-07-25 01:03:52 +00:00
typedef struct SHJoinExecInfo {
int64_t buildBlkNum;
int64_t buildBlkRows;
int64_t probeBlkNum;
int64_t probeBlkRows;
int64_t resRows;
int64_t expectRows;
} SHJoinExecInfo;
2023-06-15 11:34:15 +00:00
typedef struct SHJoinOperatorInfo {
2024-04-12 07:44:23 +00:00
EJoinType joinType;
EJoinSubType subType;
SHJoinTableCtx tbs[2];
SHJoinTableCtx* pBuild;
SHJoinTableCtx* pProbe;
2024-04-10 11:27:36 +00:00
SFilterInfo* pPreFilter;
SFilterInfo* pFinFilter;
SSDataBlock* finBlk;
SSDataBlock* midBlk;
2024-04-12 07:44:23 +00:00
STimeWindow tblTimeRange;
2023-06-21 11:33:27 +00:00
int32_t pResColNum;
int8_t* pResColMap;
SArray* pRowBufs;
SSHashObj* pKeyHash;
2023-07-06 11:33:31 +00:00
bool keyHashBuilt;
2023-06-21 11:33:27 +00:00
SHJoinCtx ctx;
2023-07-25 01:03:52 +00:00
SHJoinExecInfo execInfo;
2024-04-10 11:27:36 +00:00
int32_t blkThreshold;
2024-04-12 07:44:23 +00:00
hJoinImplFp joinFp;
2023-06-15 11:34:15 +00:00
} SHJoinOperatorInfo;
2023-06-20 11:34:16 +00:00
2024-04-10 11:27:36 +00:00
#define HJ_ERR_RET(c) \
do { \
int32_t _code = (c); \
if (_code != TSDB_CODE_SUCCESS) { \
terrno = _code; \
return _code; \
} \
} while (0)
#define HJ_ERR_JRET(c) \
do { \
code = (c); \
if (code != TSDB_CODE_SUCCESS) { \
terrno = code; \
goto _return; \
} \
} while (0)
2024-04-12 07:44:23 +00:00
int32_t hInnerJoinDo(struct SOperatorInfo* pOperator);
int32_t hLeftJoinDo(struct SOperatorInfo* pOperator);
void hJoinSetDone(struct SOperatorInfo* pOperator);
void hJoinAppendResToBlock(struct SOperatorInfo* pOperator, SSDataBlock* pRes, bool* allFetched);
bool hJoinCopyKeyColsDataToBuf(SHJoinTableCtx* pTable, int32_t rowIdx, size_t *pBufLen);
int32_t hJoinCopyMergeMidBlk(SHJoinCtx* pCtx, SSDataBlock** ppMid, SSDataBlock** ppFin);
int32_t hJoinHandleMidRemains(SHJoinOperatorInfo* pJoin, SHJoinCtx* pCtx);
bool hJoinBlkReachThreshold(SHJoinOperatorInfo* pInfo, int64_t blkRows);
int32_t hJoinCopyNMatchRowsToBlock(SHJoinOperatorInfo* pJoin, SSDataBlock* pRes, int32_t startIdx, int32_t rows);
2024-04-10 11:27:36 +00:00
2023-06-15 11:34:15 +00:00
#ifdef __cplusplus
}
#endif
#endif // TDENGINE_HASHJOIN_H