TDengine/source/libs/stream/src/tstreamFileState.c

743 lines
24 KiB
C
Raw Normal View History

2023-04-03 06:31:37 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "tstreamFileState.h"
2023-05-11 09:32:44 +00:00
#include "query.h"
2023-04-07 07:26:15 +00:00
#include "streamBackendRocksdb.h"
2023-04-03 06:31:37 +00:00
#include "taos.h"
2023-04-10 04:51:00 +00:00
#include "tcommon.h"
2023-04-03 06:31:37 +00:00
#include "thash.h"
#include "tsimplehash.h"
2023-04-21 09:47:00 +00:00
#define FLUSH_RATIO 0.5
2023-04-07 07:38:02 +00:00
#define FLUSH_NUM 4
2023-09-06 08:34:25 +00:00
#define DEFAULT_MAX_STREAM_BUFFER_SIZE (128 * 1024 * 1024)
#define MIN_NUM_OF_ROW_BUFF 10240
2023-04-03 06:31:37 +00:00
2023-11-07 06:11:08 +00:00
#define TASK_KEY "streamFileState"
2023-11-07 07:45:38 +00:00
#define STREAM_STATE_INFO_NAME "StreamStateCheckPoint"
2023-11-07 06:11:08 +00:00
2023-04-03 06:31:37 +00:00
struct SStreamFileState {
2023-09-22 01:40:08 +00:00
SList* usedBuffs;
SList* freeBuffs;
void* rowStateBuff;
void* pFileStore;
int32_t rowSize;
int32_t selectivityRowSize;
int32_t keyLen;
uint64_t preCheckPointVersion;
uint64_t checkPointVersion;
TSKEY maxTs;
TSKEY deleteMark;
TSKEY flushMark;
uint64_t maxRowCount;
uint64_t curRowCount;
GetTsFun getTs;
char* id;
char* cfName;
2023-10-07 09:43:50 +00:00
_state_buff_cleanup_fn stateBuffCleanupFn;
_state_buff_remove_fn stateBuffRemoveFn;
_state_buff_remove_by_pos_fn stateBuffRemoveByPosFn;
_state_buff_create_statekey_fn stateBuffCreateStateKeyFn;
2023-09-22 01:40:08 +00:00
_state_file_remove_fn stateFileRemoveFn;
_state_file_get_fn stateFileGetFn;
_state_file_clear_fn stateFileClearFn;
2023-04-03 06:31:37 +00:00
};
typedef SRowBuffPos SRowBuffInfo;
2023-10-10 06:35:02 +00:00
int32_t stateHashBuffRemoveFn(void* pBuff, const void* pKey, size_t keyLen) {
2023-10-07 09:43:50 +00:00
SRowBuffPos** pos = tSimpleHashGet(pBuff, pKey, keyLen);
if (pos) {
(*pos)->beFlushed = true;
2023-09-26 09:19:21 +00:00
}
2023-09-20 07:06:08 +00:00
return tSimpleHashRemove(pBuff, pKey, keyLen);
}
2023-10-07 09:43:50 +00:00
int32_t stateHashBuffRemoveByPosFn(SStreamFileState* pFileState, SRowBuffPos* pPos) {
2023-10-10 06:35:02 +00:00
size_t keyLen = pFileState->keyLen;
2023-10-07 09:43:50 +00:00
SRowBuffPos** ppPos = tSimpleHashGet(pFileState->rowStateBuff, pPos->pKey, keyLen);
if (ppPos) {
if ((*ppPos) == pPos) {
return tSimpleHashRemove(pFileState->rowStateBuff, pPos->pKey, keyLen);
}
}
return TSDB_CODE_SUCCESS;
}
2023-10-10 06:35:02 +00:00
void stateHashBuffClearFn(void* pBuff) { tSimpleHashClear(pBuff); }
2023-09-20 07:06:08 +00:00
2023-10-10 06:35:02 +00:00
void stateHashBuffCleanupFn(void* pBuff) { tSimpleHashCleanup(pBuff); }
2023-09-20 07:06:08 +00:00
int32_t intervalFileRemoveFn(SStreamFileState* pFileState, const void* pKey) {
return streamStateDel_rocksdb(pFileState->pFileStore, pKey);
}
int32_t intervalFileGetFn(SStreamFileState* pFileState, void* pKey, void* data, int32_t* pDataLen) {
return streamStateGet_rocksdb(pFileState->pFileStore, pKey, data, pDataLen);
}
2023-09-22 01:40:08 +00:00
void* intervalCreateStateKey(SRowBuffPos* pPos, int64_t num) {
SStateKey* pStateKey = taosMemoryCalloc(1, sizeof(SStateKey));
2023-10-10 06:35:02 +00:00
SWinKey* pWinKey = pPos->pKey;
2023-09-22 01:40:08 +00:00
pStateKey->key = *pWinKey;
pStateKey->opNum = num;
return pStateKey;
}
2023-09-20 07:06:08 +00:00
int32_t sessionFileRemoveFn(SStreamFileState* pFileState, const void* pKey) {
return streamStateSessionDel_rocksdb(pFileState->pFileStore, pKey);
}
int32_t sessionFileGetFn(SStreamFileState* pFileState, void* pKey, void* data, int32_t* pDataLen) {
return streamStateSessionGet_rocksdb(pFileState->pFileStore, pKey, data, pDataLen);
}
2023-09-22 01:40:08 +00:00
void* sessionCreateStateKey(SRowBuffPos* pPos, int64_t num) {
SStateSessionKey* pStateKey = taosMemoryCalloc(1, sizeof(SStateSessionKey));
2023-10-10 06:35:02 +00:00
SSessionKey* pWinKey = pPos->pKey;
2023-09-22 01:40:08 +00:00
pStateKey->key = *pWinKey;
pStateKey->opNum = num;
return pStateKey;
}
2023-11-07 06:11:08 +00:00
static void streamFileStateDecode(TSKEY* pKey, void* pBuff, int32_t len) { pBuff = taosDecodeFixedI64(pBuff, pKey); }
static void streamFileStateEncode(TSKEY* pKey, void** pVal, int32_t* pLen) {
*pLen = sizeof(TSKEY);
(*pVal) = taosMemoryCalloc(1, *pLen);
void* buff = *pVal;
taosEncodeFixedI64(&buff, *pKey);
}
2023-05-18 05:46:38 +00:00
SStreamFileState* streamFileStateInit(int64_t memSize, uint32_t keySize, uint32_t rowSize, uint32_t selectRowSize,
2023-10-10 06:35:02 +00:00
GetTsFun fp, void* pFile, TSKEY delMark, const char* taskId, int64_t checkpointId,
int8_t type) {
2023-04-03 06:31:37 +00:00
if (memSize <= 0) {
memSize = DEFAULT_MAX_STREAM_BUFFER_SIZE;
}
if (rowSize == 0) {
goto _error;
}
SStreamFileState* pFileState = taosMemoryCalloc(1, sizeof(SStreamFileState));
if (!pFileState) {
goto _error;
}
2023-05-15 08:26:24 +00:00
rowSize += selectRowSize;
2023-04-11 10:47:15 +00:00
pFileState->maxRowCount = TMAX((uint64_t)memSize / rowSize, FLUSH_NUM * 2);
2023-04-03 06:31:37 +00:00
pFileState->usedBuffs = tdListNew(POINTER_BYTES);
pFileState->freeBuffs = tdListNew(POINTER_BYTES);
_hash_fn_t hashFn = taosGetDefaultHashFunction(TSDB_DATA_TYPE_BINARY);
2023-09-06 08:34:25 +00:00
int32_t cap = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount);
2023-09-20 07:06:08 +00:00
if (type == STREAM_STATE_BUFF_HASH) {
pFileState->rowStateBuff = tSimpleHashInit(cap, hashFn);
pFileState->stateBuffCleanupFn = stateHashBuffCleanupFn;
pFileState->stateBuffRemoveFn = stateHashBuffRemoveFn;
2023-10-07 09:43:50 +00:00
pFileState->stateBuffRemoveByPosFn = stateHashBuffRemoveByPosFn;
2023-09-22 01:40:08 +00:00
pFileState->stateBuffCreateStateKeyFn = intervalCreateStateKey;
2023-09-20 07:06:08 +00:00
pFileState->stateFileRemoveFn = intervalFileRemoveFn;
pFileState->stateFileGetFn = intervalFileGetFn;
pFileState->stateFileClearFn = streamStateClear_rocksdb;
2023-09-22 01:40:08 +00:00
pFileState->cfName = taosStrdup("state");
2023-09-20 07:06:08 +00:00
} else {
pFileState->rowStateBuff = tSimpleHashInit(cap, hashFn);
pFileState->stateBuffCleanupFn = sessionWinStateCleanup;
2023-10-07 09:43:50 +00:00
pFileState->stateBuffRemoveFn = deleteSessionWinStateBuffFn;
pFileState->stateBuffRemoveByPosFn = deleteSessionWinStateBuffByPosFn;
2023-09-22 01:40:08 +00:00
pFileState->stateBuffCreateStateKeyFn = sessionCreateStateKey;
2023-09-20 07:06:08 +00:00
pFileState->stateFileRemoveFn = sessionFileRemoveFn;
pFileState->stateFileGetFn = sessionFileGetFn;
pFileState->stateFileClearFn = streamStateSessionClear_rocksdb;
2023-09-22 01:40:08 +00:00
pFileState->cfName = taosStrdup("sess");
2023-09-20 07:06:08 +00:00
}
if (!pFileState->usedBuffs || !pFileState->freeBuffs || !pFileState->rowStateBuff) {
2023-04-03 06:31:37 +00:00
goto _error;
}
2023-06-08 10:59:25 +00:00
2023-04-07 04:50:59 +00:00
pFileState->keyLen = keySize;
2023-04-03 06:31:37 +00:00
pFileState->rowSize = rowSize;
2023-05-15 08:26:24 +00:00
pFileState->selectivityRowSize = selectRowSize;
2023-04-03 06:31:37 +00:00
pFileState->preCheckPointVersion = 0;
pFileState->checkPointVersion = 1;
pFileState->pFileStore = pFile;
2023-04-04 09:08:24 +00:00
pFileState->getTs = fp;
2023-04-03 06:31:37 +00:00
pFileState->curRowCount = 0;
2023-04-04 09:08:24 +00:00
pFileState->deleteMark = delMark;
2023-04-21 09:47:00 +00:00
pFileState->flushMark = INT64_MIN;
pFileState->maxTs = INT64_MIN;
2023-07-12 02:48:58 +00:00
pFileState->id = taosStrdup(taskId);
2023-06-08 10:59:25 +00:00
2023-10-10 06:35:02 +00:00
// todo(liuyao) optimize
2023-10-07 11:24:21 +00:00
if (type == STREAM_STATE_BUFF_HASH) {
recoverSnapshot(pFileState, checkpointId);
2023-10-13 03:32:35 +00:00
} else {
recoverSesssion(pFileState, checkpointId);
2023-10-07 11:24:21 +00:00
}
2023-10-13 03:32:35 +00:00
2023-11-07 06:11:08 +00:00
void* valBuf = NULL;
int32_t len = 0;
2023-11-07 07:45:38 +00:00
int32_t code = streamDefaultGet_rocksdb(pFileState->pFileStore, STREAM_STATE_INFO_NAME, &valBuf, &len);
2023-11-07 06:11:08 +00:00
if (code == TSDB_CODE_SUCCESS) {
ASSERT(len == sizeof(TSKEY));
streamFileStateDecode(&pFileState->flushMark, valBuf, len);
2023-11-07 07:45:38 +00:00
qDebug("===stream===flushMark read:%" PRId64, pFileState->flushMark);
2023-11-07 06:11:08 +00:00
}
taosMemoryFreeClear(valBuf);
2023-04-03 06:31:37 +00:00
return pFileState;
_error:
2023-04-04 09:08:24 +00:00
streamFileStateDestroy(pFileState);
2023-04-03 06:31:37 +00:00
return NULL;
}
void destroyRowBuffPos(SRowBuffPos* pPos) {
2023-04-04 09:08:24 +00:00
taosMemoryFreeClear(pPos->pKey);
2023-04-03 06:31:37 +00:00
taosMemoryFreeClear(pPos->pRowBuff);
taosMemoryFree(pPos);
}
void destroyRowBuffPosPtr(void* ptr) {
if (!ptr) {
return;
}
2023-04-04 09:08:24 +00:00
SRowBuffPos* pPos = *(SRowBuffPos**)ptr;
2023-04-07 04:50:59 +00:00
if (!pPos->beUsed) {
destroyRowBuffPos(pPos);
}
2023-04-03 06:31:37 +00:00
}
2023-04-18 01:50:13 +00:00
void destroyRowBuffAllPosPtr(void* ptr) {
if (!ptr) {
return;
}
SRowBuffPos* pPos = *(SRowBuffPos**)ptr;
destroyRowBuffPos(pPos);
}
2023-04-04 09:08:24 +00:00
void destroyRowBuff(void* ptr) {
if (!ptr) {
return;
}
taosMemoryFree(*(void**)ptr);
}
void streamFileStateDestroy(SStreamFileState* pFileState) {
if (!pFileState) {
return;
}
2023-06-08 10:59:25 +00:00
taosMemoryFree(pFileState->id);
2023-09-22 01:40:08 +00:00
taosMemoryFree(pFileState->cfName);
2023-04-18 01:50:13 +00:00
tdListFreeP(pFileState->usedBuffs, destroyRowBuffAllPosPtr);
2023-04-04 09:08:24 +00:00
tdListFreeP(pFileState->freeBuffs, destroyRowBuff);
2023-09-20 07:06:08 +00:00
pFileState->stateBuffCleanupFn(pFileState->rowStateBuff);
2023-04-04 09:08:24 +00:00
taosMemoryFree(pFileState);
2023-04-03 06:31:37 +00:00
}
2023-09-20 07:06:08 +00:00
void putFreeBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) {
if (pPos->pRowBuff) {
tdListAppend(pFileState->freeBuffs, &(pPos->pRowBuff));
pPos->pRowBuff = NULL;
}
}
2023-04-04 09:08:24 +00:00
void clearExpiredRowBuff(SStreamFileState* pFileState, TSKEY ts, bool all) {
2023-04-03 06:31:37 +00:00
SListIter iter = {0};
tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD);
SListNode* pNode = NULL;
while ((pNode = tdListNext(&iter)) != NULL) {
2023-04-04 09:08:24 +00:00
SRowBuffPos* pPos = *(SRowBuffPos**)(pNode->data);
2023-06-02 08:51:44 +00:00
if (all || (pFileState->getTs(pPos->pKey) < ts && !pPos->beUsed)) {
2023-09-20 07:06:08 +00:00
putFreeBuff(pFileState, pPos);
2023-04-07 10:45:09 +00:00
if (!all) {
2023-10-07 09:43:50 +00:00
pFileState->stateBuffRemoveByPosFn(pFileState, pPos);
2023-04-07 10:45:09 +00:00
}
2023-04-03 06:31:37 +00:00
destroyRowBuffPos(pPos);
2023-04-07 04:50:59 +00:00
tdListPopNode(pFileState->usedBuffs, pNode);
taosMemoryFreeClear(pNode);
2023-04-03 06:31:37 +00:00
}
}
}
2023-09-26 02:41:12 +00:00
void clearFlushedRowBuff(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uint64_t max) {
uint64_t i = 0;
SListIter iter = {0};
tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD);
SListNode* pNode = NULL;
while ((pNode = tdListNext(&iter)) != NULL && i < max) {
SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
2023-09-26 09:19:21 +00:00
if (isFlushedState(pFileState, pFileState->getTs(pPos->pKey), 0) && !pPos->beUsed) {
2023-09-26 02:41:12 +00:00
tdListAppend(pFlushList, &pPos);
pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey));
2023-10-07 09:43:50 +00:00
pFileState->stateBuffRemoveByPosFn(pFileState, pPos);
2023-09-26 02:41:12 +00:00
tdListPopNode(pFileState->usedBuffs, pNode);
taosMemoryFreeClear(pNode);
if (pPos->pRowBuff) {
i++;
}
}
}
}
2023-04-04 09:08:24 +00:00
void streamFileStateClear(SStreamFileState* pFileState) {
2023-04-21 09:47:00 +00:00
pFileState->flushMark = INT64_MIN;
pFileState->maxTs = INT64_MIN;
2023-09-20 07:06:08 +00:00
tSimpleHashClear(pFileState->rowStateBuff);
2023-04-04 09:08:24 +00:00
clearExpiredRowBuff(pFileState, 0, true);
}
2023-05-11 09:32:44 +00:00
bool needClearDiskBuff(SStreamFileState* pFileState) { return pFileState->flushMark > 0; }
2023-04-26 10:58:19 +00:00
2023-10-10 06:35:02 +00:00
void streamFileStateReleaseBuff(SStreamFileState* pFileState, SRowBuffPos* pPos, bool used) { pPos->beUsed = used; }
2023-09-20 07:06:08 +00:00
2023-04-07 04:50:59 +00:00
void popUsedBuffs(SStreamFileState* pFileState, SStreamSnapshot* pFlushList, uint64_t max, bool used) {
2023-04-07 07:26:15 +00:00
uint64_t i = 0;
2023-04-03 06:31:37 +00:00
SListIter iter = {0};
tdListInitIter(pFileState->usedBuffs, &iter, TD_LIST_FORWARD);
SListNode* pNode = NULL;
2023-04-07 04:50:59 +00:00
while ((pNode = tdListNext(&iter)) != NULL && i < max) {
2023-04-03 06:31:37 +00:00
SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
2023-04-07 04:50:59 +00:00
if (pPos->beUsed == used) {
2023-04-03 06:31:37 +00:00
tdListAppend(pFlushList, &pPos);
2023-04-04 09:08:24 +00:00
pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey));
2023-10-07 09:43:50 +00:00
pFileState->stateBuffRemoveByPosFn(pFileState, pPos);
2023-04-07 04:50:59 +00:00
tdListPopNode(pFileState->usedBuffs, pNode);
taosMemoryFreeClear(pNode);
2023-09-20 07:06:08 +00:00
if (pPos->pRowBuff) {
i++;
}
2023-04-03 06:31:37 +00:00
}
}
2023-06-08 10:59:25 +00:00
qInfo("stream state flush %d rows to disk. is used:%d", listNEles(pFlushList), used);
2023-04-07 04:50:59 +00:00
}
int32_t flushRowBuff(SStreamFileState* pFileState) {
SStreamSnapshot* pFlushList = tdListNew(POINTER_BYTES);
if (!pFlushList) {
return TSDB_CODE_OUT_OF_MEMORY;
}
2023-06-08 10:59:25 +00:00
2023-04-07 04:50:59 +00:00
uint64_t num = (uint64_t)(pFileState->curRowCount * FLUSH_RATIO);
num = TMAX(num, FLUSH_NUM);
2023-09-26 02:41:12 +00:00
clearFlushedRowBuff(pFileState, pFlushList, num);
2023-04-07 04:50:59 +00:00
if (isListEmpty(pFlushList)) {
2023-09-26 02:41:12 +00:00
popUsedBuffs(pFileState, pFlushList, num, false);
if (isListEmpty(pFlushList)) {
popUsedBuffs(pFileState, pFlushList, num, true);
}
2023-04-07 04:50:59 +00:00
}
2023-06-08 10:59:25 +00:00
2023-04-06 08:33:00 +00:00
flushSnapshot(pFileState, pFlushList, false);
2023-06-08 10:59:25 +00:00
2023-04-07 04:50:59 +00:00
SListIter fIter = {0};
tdListInitIter(pFlushList, &fIter, TD_LIST_FORWARD);
SListNode* pNode = NULL;
while ((pNode = tdListNext(&fIter)) != NULL) {
SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
2023-09-20 07:06:08 +00:00
putFreeBuff(pFileState, pPos);
2023-04-07 04:50:59 +00:00
}
2023-06-08 10:59:25 +00:00
2023-04-07 04:50:59 +00:00
tdListFreeP(pFlushList, destroyRowBuffPosPtr);
2023-04-03 06:31:37 +00:00
return TSDB_CODE_SUCCESS;
}
int32_t clearRowBuff(SStreamFileState* pFileState) {
2023-04-04 09:08:24 +00:00
clearExpiredRowBuff(pFileState, pFileState->maxTs - pFileState->deleteMark, false);
2023-04-03 06:31:37 +00:00
if (isListEmpty(pFileState->freeBuffs)) {
return flushRowBuff(pFileState);
}
return TSDB_CODE_SUCCESS;
}
2023-09-20 07:06:08 +00:00
void* getFreeBuff(SStreamFileState* pFileState) {
2023-10-10 06:35:02 +00:00
SList* lists = pFileState->freeBuffs;
int32_t buffSize = pFileState->rowSize;
2023-04-03 06:31:37 +00:00
SListNode* pNode = tdListPopHead(lists);
if (!pNode) {
return NULL;
}
void* ptr = *(void**)pNode->data;
2023-04-04 09:08:24 +00:00
memset(ptr, 0, buffSize);
2023-04-03 06:31:37 +00:00
taosMemoryFree(pNode);
return ptr;
}
2023-09-20 07:06:08 +00:00
int32_t streamFileStateClearBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) {
if (pPos->pRowBuff) {
memset(pPos->pRowBuff, 0, pFileState->rowSize);
return TSDB_CODE_SUCCESS;
}
return TSDB_CODE_FAILED;
}
2023-04-03 06:31:37 +00:00
SRowBuffPos* getNewRowPos(SStreamFileState* pFileState) {
SRowBuffPos* pPos = taosMemoryCalloc(1, sizeof(SRowBuffPos));
2023-04-07 04:50:59 +00:00
pPos->pKey = taosMemoryCalloc(1, pFileState->keyLen);
2023-09-20 07:06:08 +00:00
void* pBuff = getFreeBuff(pFileState);
2023-04-03 06:31:37 +00:00
if (pBuff) {
pPos->pRowBuff = pBuff;
2023-04-07 04:50:59 +00:00
goto _end;
2023-04-03 06:31:37 +00:00
}
if (pFileState->curRowCount < pFileState->maxRowCount) {
pBuff = taosMemoryCalloc(1, pFileState->rowSize);
if (pBuff) {
pPos->pRowBuff = pBuff;
pFileState->curRowCount++;
2023-04-07 04:50:59 +00:00
goto _end;
2023-04-03 06:31:37 +00:00
}
}
int32_t code = clearRowBuff(pFileState);
ASSERT(code == 0);
2023-09-20 07:06:08 +00:00
pPos->pRowBuff = getFreeBuff(pFileState);
2023-04-07 04:50:59 +00:00
_end:
tdListAppend(pFileState->usedBuffs, &pPos);
ASSERT(pPos->pRowBuff != NULL);
2023-04-03 06:31:37 +00:00
return pPos;
}
2023-09-20 07:06:08 +00:00
SRowBuffPos* getNewRowPosForWrite(SStreamFileState* pFileState) {
2023-10-10 06:35:02 +00:00
SRowBuffPos* newPos = getNewRowPos(pFileState);
2023-09-20 07:06:08 +00:00
newPos->beUsed = true;
newPos->beFlushed = false;
newPos->needFree = false;
return newPos;
}
2023-04-03 06:31:37 +00:00
int32_t getRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen, void** pVal, int32_t* pVLen) {
2023-04-04 09:08:24 +00:00
pFileState->maxTs = TMAX(pFileState->maxTs, pFileState->getTs(pKey));
2023-09-20 07:06:08 +00:00
SRowBuffPos** pos = tSimpleHashGet(pFileState->rowStateBuff, pKey, keyLen);
2023-04-03 06:31:37 +00:00
if (pos) {
2023-04-07 04:50:59 +00:00
*pVLen = pFileState->rowSize;
*pVal = *pos;
(*pos)->beUsed = true;
2023-09-06 08:34:25 +00:00
(*pos)->beFlushed = false;
2023-04-03 06:31:37 +00:00
return TSDB_CODE_SUCCESS;
}
2023-09-20 07:06:08 +00:00
SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState);
2023-04-07 04:50:59 +00:00
ASSERT(pNewPos->pRowBuff);
2023-04-04 09:08:24 +00:00
memcpy(pNewPos->pKey, pKey, keyLen);
TSKEY ts = pFileState->getTs(pKey);
2023-09-26 02:41:12 +00:00
if (!isDeteled(pFileState, ts) && isFlushedState(pFileState, ts, 0)) {
2023-04-04 09:08:24 +00:00
int32_t len = 0;
2023-06-08 10:59:25 +00:00
void* p = NULL;
int32_t code = streamStateGet_rocksdb(pFileState->pFileStore, pKey, &p, &len);
2023-04-21 09:47:00 +00:00
qDebug("===stream===get %" PRId64 " from disc, res %d", ts, code);
2023-04-07 04:50:59 +00:00
if (code == TSDB_CODE_SUCCESS) {
2023-06-08 10:59:25 +00:00
memcpy(pNewPos->pRowBuff, p, len);
2023-04-07 04:50:59 +00:00
}
2023-06-08 10:59:25 +00:00
taosMemoryFree(p);
2023-04-04 09:08:24 +00:00
}
2023-09-20 07:06:08 +00:00
tSimpleHashPut(pFileState->rowStateBuff, pKey, keyLen, &pNewPos, POINTER_BYTES);
2023-04-04 09:08:24 +00:00
if (pVal) {
*pVLen = pFileState->rowSize;
*pVal = pNewPos;
}
2023-04-03 06:31:37 +00:00
return TSDB_CODE_SUCCESS;
}
2023-04-04 09:08:24 +00:00
int32_t deleteRowBuff(SStreamFileState* pFileState, const void* pKey, int32_t keyLen) {
2023-10-07 09:43:50 +00:00
int32_t code_buff = pFileState->stateBuffRemoveFn(pFileState->rowStateBuff, pKey, keyLen);
2023-09-20 07:06:08 +00:00
int32_t code_file = pFileState->stateFileRemoveFn(pFileState, pKey);
if (code_buff == TSDB_CODE_SUCCESS || code_file == TSDB_CODE_SUCCESS) {
return TSDB_CODE_SUCCESS;
}
return TSDB_CODE_FAILED;
2023-04-04 09:08:24 +00:00
}
2023-09-22 01:40:08 +00:00
static void recoverSessionRowBuff(SStreamFileState* pFileState, SRowBuffPos* pPos) {
int32_t len = 0;
void* pBuff = NULL;
pFileState->stateFileGetFn(pFileState, pPos->pKey, &pBuff, &len);
memcpy(pPos->pRowBuff, pBuff, len);
taosMemoryFree(pBuff);
}
2023-04-04 09:08:24 +00:00
int32_t getRowBuffByPos(SStreamFileState* pFileState, SRowBuffPos* pPos, void** pVal) {
2023-04-03 06:31:37 +00:00
if (pPos->pRowBuff) {
2023-09-22 01:40:08 +00:00
if (pPos->needFree) {
recoverSessionRowBuff(pFileState, pPos);
}
2023-04-04 09:08:24 +00:00
(*pVal) = pPos->pRowBuff;
return TSDB_CODE_SUCCESS;
2023-04-03 06:31:37 +00:00
}
2023-09-20 07:06:08 +00:00
pPos->pRowBuff = getFreeBuff(pFileState);
2023-04-07 04:50:59 +00:00
if (!pPos->pRowBuff) {
2023-10-07 09:43:50 +00:00
if (pFileState->curRowCount < pFileState->maxRowCount) {
pPos->pRowBuff = taosMemoryCalloc(1, pFileState->rowSize);
pFileState->curRowCount++;
} else {
int32_t code = clearRowBuff(pFileState);
ASSERT(code == 0);
pPos->pRowBuff = getFreeBuff(pFileState);
}
2023-04-07 04:50:59 +00:00
ASSERT(pPos->pRowBuff);
}
2023-04-03 06:31:37 +00:00
2023-09-22 01:40:08 +00:00
recoverSessionRowBuff(pFileState, pPos);
2023-04-04 09:08:24 +00:00
(*pVal) = pPos->pRowBuff;
2023-09-26 09:19:21 +00:00
if (!pPos->needFree) {
tdListPrepend(pFileState->usedBuffs, &pPos);
}
2023-04-04 09:08:24 +00:00
return TSDB_CODE_SUCCESS;
}
bool hasRowBuff(SStreamFileState* pFileState, void* pKey, int32_t keyLen) {
2023-09-20 07:06:08 +00:00
SRowBuffPos** pos = tSimpleHashGet(pFileState->rowStateBuff, pKey, keyLen);
2023-04-04 09:08:24 +00:00
if (pos) {
return true;
}
return false;
2023-04-03 06:31:37 +00:00
}
SStreamSnapshot* getSnapshot(SStreamFileState* pFileState) {
2023-05-15 07:22:29 +00:00
int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs) ? INT64_MIN
: pFileState->maxTs - pFileState->deleteMark;
clearExpiredRowBuff(pFileState, mark, false);
2023-04-03 06:31:37 +00:00
return pFileState->usedBuffs;
}
2023-04-06 08:33:00 +00:00
int32_t flushSnapshot(SStreamFileState* pFileState, SStreamSnapshot* pSnapshot, bool flushState) {
2023-04-07 07:26:15 +00:00
int32_t code = TSDB_CODE_SUCCESS;
2023-04-03 06:31:37 +00:00
SListIter iter = {0};
tdListInitIter(pSnapshot, &iter, TD_LIST_FORWARD);
2023-04-14 14:05:59 +00:00
const int32_t BATCH_LIMIT = 256;
2023-06-08 10:59:25 +00:00
int64_t st = taosGetTimestampMs();
int32_t numOfElems = listNEles(pSnapshot);
SListNode* pNode = NULL;
2023-04-07 07:26:15 +00:00
2023-09-22 01:40:08 +00:00
int idx = streamStateGetCfIdx(pFileState->pFileStore, pFileState->cfName);
2023-06-08 13:36:52 +00:00
int32_t len = pFileState->rowSize + sizeof(uint64_t) + sizeof(int32_t) + 1;
char* buf = taosMemoryCalloc(1, len);
2023-04-07 07:26:15 +00:00
void* batch = streamStateCreateBatch();
2023-04-03 06:31:37 +00:00
while ((pNode = tdListNext(&iter)) != NULL && code == TSDB_CODE_SUCCESS) {
SRowBuffPos* pPos = *(SRowBuffPos**)pNode->data;
2023-09-26 09:19:21 +00:00
if (pPos->beFlushed || !pPos->pRowBuff) {
2023-09-06 08:34:25 +00:00
continue;
}
pPos->beFlushed = true;
2023-11-07 06:11:08 +00:00
pFileState->flushMark = TMAX(pFileState->flushMark, pFileState->getTs(pPos->pKey));
2023-06-08 10:59:25 +00:00
2023-10-07 11:11:46 +00:00
qDebug("===stream===flushed start:%" PRId64, pFileState->getTs(pPos->pKey));
2023-04-07 07:26:15 +00:00
if (streamStateGetBatchSize(batch) >= BATCH_LIMIT) {
2023-06-27 02:39:38 +00:00
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
2023-04-07 07:26:15 +00:00
streamStateClearBatch(batch);
}
2023-04-10 04:51:00 +00:00
2023-09-22 01:40:08 +00:00
void* pSKey = pFileState->stateBuffCreateStateKeyFn(pPos, ((SStreamState*)pFileState->pFileStore)->number);
code = streamStatePutBatchOptimize(pFileState->pFileStore, idx, batch, pSKey, pPos->pRowBuff, pFileState->rowSize,
2023-06-08 13:36:52 +00:00
0, buf);
2023-09-22 01:40:08 +00:00
taosMemoryFreeClear(pSKey);
2023-06-08 10:59:25 +00:00
// todo handle failure
2023-06-08 13:36:52 +00:00
memset(buf, 0, len);
2023-04-06 08:33:00 +00:00
}
2023-06-08 13:36:52 +00:00
taosMemoryFree(buf);
2023-06-08 09:54:22 +00:00
2023-04-07 07:26:15 +00:00
if (streamStateGetBatchSize(batch) > 0) {
2023-06-27 02:39:38 +00:00
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
2023-04-07 07:26:15 +00:00
}
2023-06-08 09:54:22 +00:00
2023-04-12 05:50:01 +00:00
streamStateClearBatch(batch);
2023-04-07 07:26:15 +00:00
2023-06-08 10:59:25 +00:00
int64_t elapsed = taosGetTimestampMs() - st;
2023-07-13 02:38:21 +00:00
qDebug("%s flush to disk in batch model completed, rows:%d, batch size:%d, elapsed time:%" PRId64 "ms",
pFileState->id, numOfElems, BATCH_LIMIT, elapsed);
2023-06-08 10:59:25 +00:00
2023-04-06 08:33:00 +00:00
if (flushState) {
2023-11-07 07:45:38 +00:00
void* valBuf = NULL;
int32_t len = 0;
streamFileStateEncode(&pFileState->flushMark, &valBuf, &len);
qDebug("===stream===flushMark write:%" PRId64, pFileState->flushMark);
streamStatePutBatch(pFileState->pFileStore, "default", batch, STREAM_STATE_INFO_NAME, valBuf, len, 0);
taosMemoryFree(valBuf);
2023-04-11 14:04:39 +00:00
streamStatePutBatch_rocksdb(pFileState->pFileStore, batch);
2023-04-03 06:31:37 +00:00
}
2023-04-12 07:39:54 +00:00
2023-06-08 10:59:25 +00:00
streamStateDestroyBatch(batch);
2023-04-03 06:31:37 +00:00
return code;
}
2023-04-12 09:33:58 +00:00
2023-04-12 03:03:51 +00:00
int32_t forceRemoveCheckpoint(SStreamFileState* pFileState, int64_t checkpointId) {
char keyBuf[128] = {0};
2023-11-07 06:11:08 +00:00
sprintf(keyBuf, "%s:%" PRId64 "", TASK_KEY, checkpointId);
2023-04-12 03:03:51 +00:00
return streamDefaultDel_rocksdb(pFileState->pFileStore, keyBuf);
}
2023-04-12 09:33:58 +00:00
2023-04-12 03:03:51 +00:00
int32_t getSnapshotIdList(SStreamFileState* pFileState, SArray* list) {
2023-11-07 06:11:08 +00:00
return streamDefaultIterGet_rocksdb(pFileState->pFileStore, TASK_KEY, NULL, list);
2023-04-12 03:03:51 +00:00
}
2023-04-12 09:33:58 +00:00
int32_t deleteExpiredCheckPoint(SStreamFileState* pFileState, TSKEY mark) {
2023-04-11 14:04:39 +00:00
int32_t code = TSDB_CODE_SUCCESS;
int64_t maxCheckPointId = 0;
{
char buf[128] = {0};
void* val = NULL;
int32_t len = 0;
2023-11-07 06:11:08 +00:00
memcpy(buf, TASK_KEY, strlen(TASK_KEY));
2023-04-11 14:04:39 +00:00
code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len);
2023-06-02 10:05:32 +00:00
if (code != 0 || len == 0 || val == NULL) {
2023-04-11 14:04:39 +00:00
return TSDB_CODE_FAILED;
}
2023-06-05 10:52:07 +00:00
memcpy(buf, val, len);
2023-06-02 15:47:14 +00:00
buf[len] = 0;
maxCheckPointId = atol((char*)buf);
2023-06-02 04:08:08 +00:00
taosMemoryFree(val);
2023-04-11 14:04:39 +00:00
}
2023-04-11 14:13:35 +00:00
for (int64_t i = maxCheckPointId; i > 0; i--) {
2023-04-11 14:04:39 +00:00
char buf[128] = {0};
void* val = 0;
int32_t len = 0;
2023-11-07 06:11:08 +00:00
sprintf(buf, "%s:%" PRId64 "", TASK_KEY, i);
2023-04-11 14:04:39 +00:00
code = streamDefaultGet_rocksdb(pFileState->pFileStore, buf, &val, &len);
if (code != 0) {
return TSDB_CODE_FAILED;
}
2023-06-05 10:52:07 +00:00
memcpy(buf, val, len);
2023-06-02 15:47:14 +00:00
buf[len] = 0;
taosMemoryFree(val);
2023-04-11 14:04:39 +00:00
TSKEY ts;
2023-06-02 15:47:14 +00:00
ts = atol((char*)buf);
2023-04-12 09:33:58 +00:00
if (ts < mark) {
2023-04-14 14:05:59 +00:00
// statekey winkey.ts < mark
2023-04-12 03:03:51 +00:00
forceRemoveCheckpoint(pFileState, i);
2023-04-11 14:04:39 +00:00
break;
}
}
2023-04-12 09:33:58 +00:00
return code;
}
2023-04-11 14:04:39 +00:00
2023-10-10 07:12:21 +00:00
int32_t recoverSesssion(SStreamFileState* pFileState, int64_t ckId) {
2023-10-13 03:32:35 +00:00
int code = TSDB_CODE_SUCCESS;
if (pFileState->maxTs != INT64_MIN) {
int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs)
? INT64_MIN
: pFileState->maxTs - pFileState->deleteMark;
deleteExpiredCheckPoint(pFileState, mark);
}
2023-10-10 07:12:21 +00:00
SStreamStateCur* pCur = streamStateSessionSeekToLast_rocksdb(pFileState->pFileStore);
if (pCur == NULL) {
return -1;
}
2023-10-13 03:32:35 +00:00
int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount);
2023-10-10 07:12:21 +00:00
while (code == TSDB_CODE_SUCCESS) {
2023-10-13 03:32:35 +00:00
if (pFileState->curRowCount >= recoverNum) {
break;
}
2023-10-10 07:12:21 +00:00
void* pVal = NULL;
int32_t vlen = 0;
SSessionKey key = {0};
code = streamStateSessionGetKVByCur_rocksdb(pCur, &key, &pVal, &vlen);
if (code != 0) {
break;
}
2023-10-13 03:32:35 +00:00
SRowBuffPos* pPos = createSessionWinBuff(pFileState, &key, pVal, &vlen);
putSessionWinResultBuff(pFileState, pPos);
2023-10-10 07:12:21 +00:00
code = streamStateSessionCurPrev_rocksdb(pCur);
}
streamStateFreeCur(pCur);
return code;
}
2023-10-13 03:32:35 +00:00
2023-07-12 02:48:58 +00:00
int32_t recoverSnapshot(SStreamFileState* pFileState, int64_t ckId) {
2023-04-12 09:33:58 +00:00
int32_t code = TSDB_CODE_SUCCESS;
2023-05-15 08:26:24 +00:00
if (pFileState->maxTs != INT64_MIN) {
2023-05-18 05:46:38 +00:00
int64_t mark = (INT64_MIN + pFileState->deleteMark >= pFileState->maxTs)
? INT64_MIN
: pFileState->maxTs - pFileState->deleteMark;
deleteExpiredCheckPoint(pFileState, mark);
2023-05-15 08:26:24 +00:00
}
2023-04-06 08:33:00 +00:00
2023-10-10 06:35:02 +00:00
SStreamStateCur* pCur = streamStateSeekToLast_rocksdb(pFileState->pFileStore);
2023-04-14 09:36:08 +00:00
if (pCur == NULL) {
return -1;
2023-04-06 08:33:00 +00:00
}
2023-10-13 03:32:35 +00:00
int32_t recoverNum = TMIN(MIN_NUM_OF_ROW_BUFF, pFileState->maxRowCount);
2023-04-06 08:33:00 +00:00
while (code == TSDB_CODE_SUCCESS) {
2023-10-13 03:32:35 +00:00
if (pFileState->curRowCount >= recoverNum) {
2023-04-06 08:33:00 +00:00
break;
}
2023-10-13 03:32:35 +00:00
2023-04-07 07:26:15 +00:00
void* pVal = NULL;
2023-10-10 07:12:21 +00:00
int32_t vlen = 0;
2023-09-20 07:06:08 +00:00
SRowBuffPos* pNewPos = getNewRowPosForWrite(pFileState);
2023-10-10 07:12:21 +00:00
code = streamStateGetKVByCur_rocksdb(pCur, pNewPos->pKey, (const void**)&pVal, &vlen);
2023-04-06 08:33:00 +00:00
if (code != TSDB_CODE_SUCCESS || pFileState->getTs(pNewPos->pKey) < pFileState->flushMark) {
destroyRowBuffPos(pNewPos);
2023-04-18 01:50:13 +00:00
SListNode* pNode = tdListPopTail(pFileState->usedBuffs);
taosMemoryFreeClear(pNode);
2023-07-13 11:24:11 +00:00
taosMemoryFreeClear(pVal);
2023-04-06 08:33:00 +00:00
break;
}
2023-10-10 07:12:21 +00:00
ASSERT(vlen == pFileState->rowSize);
memcpy(pNewPos->pRowBuff, pVal, vlen);
2023-07-13 11:24:11 +00:00
taosMemoryFreeClear(pVal);
2023-09-06 08:34:25 +00:00
pNewPos->beFlushed = true;
2023-09-20 07:06:08 +00:00
code = tSimpleHashPut(pFileState->rowStateBuff, pNewPos->pKey, pFileState->keyLen, &pNewPos, POINTER_BYTES);
2023-04-06 08:33:00 +00:00
if (code != TSDB_CODE_SUCCESS) {
destroyRowBuffPos(pNewPos);
break;
}
2023-10-10 07:12:21 +00:00
code = streamStateCurPrev_rocksdb(pCur);
2023-04-06 08:33:00 +00:00
}
2023-04-17 15:01:47 +00:00
streamStateFreeCur(pCur);
2023-04-06 08:33:00 +00:00
2023-04-03 06:31:37 +00:00
return TSDB_CODE_SUCCESS;
2023-05-15 08:26:24 +00:00
}
2023-05-18 05:46:38 +00:00
int32_t streamFileStateGeSelectRowSize(SStreamFileState* pFileState) { return pFileState->selectivityRowSize; }
2023-06-15 06:13:17 +00:00
void streamFileStateReloadInfo(SStreamFileState* pFileState, TSKEY ts) {
pFileState->flushMark = TMAX(pFileState->flushMark, ts);
pFileState->maxTs = TMAX(pFileState->maxTs, ts);
}
2023-09-20 07:06:08 +00:00
2023-10-10 06:35:02 +00:00
void* getRowStateBuff(SStreamFileState* pFileState) { return pFileState->rowStateBuff; }
2023-09-20 07:06:08 +00:00
2023-10-10 06:35:02 +00:00
void* getStateFileStore(SStreamFileState* pFileState) { return pFileState->pFileStore; }
2023-09-20 07:06:08 +00:00
bool isDeteled(SStreamFileState* pFileState, TSKEY ts) {
2023-09-22 11:25:19 +00:00
return pFileState->deleteMark > 0 && ts < (pFileState->maxTs - pFileState->deleteMark);
2023-09-20 07:06:08 +00:00
}
2023-10-10 06:35:02 +00:00
bool isFlushedState(SStreamFileState* pFileState, TSKEY ts, TSKEY gap) { return ts <= (pFileState->flushMark + gap); }
2023-09-20 07:06:08 +00:00
2023-10-10 06:35:02 +00:00
int32_t getRowStateRowSize(SStreamFileState* pFileState) { return pFileState->rowSize; }