TDengine/source/libs/sync/src/syncMain.c

2698 lines
88 KiB
C
Raw Normal View History

2022-02-22 03:28:15 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2022-11-01 03:45:58 +00:00
#define _DEFAULT_SOURCE
2022-02-26 10:30:58 +00:00
#include "sync.h"
2022-03-07 08:06:07 +00:00
#include "syncAppendEntries.h"
#include "syncAppendEntriesReply.h"
2022-03-18 03:29:03 +00:00
#include "syncCommit.h"
2022-03-14 09:02:43 +00:00
#include "syncElection.h"
2022-02-28 08:36:57 +00:00
#include "syncEnv.h"
2022-03-09 06:51:02 +00:00
#include "syncIndexMgr.h"
2022-02-22 03:28:15 +00:00
#include "syncInt.h"
2022-03-14 10:22:39 +00:00
#include "syncMessage.h"
#include "syncPipeline.h"
2022-04-18 13:50:56 +00:00
#include "syncRaftCfg.h"
2022-03-09 06:51:02 +00:00
#include "syncRaftLog.h"
2022-03-08 02:52:18 +00:00
#include "syncRaftStore.h"
2022-03-14 09:02:43 +00:00
#include "syncReplication.h"
2022-03-07 08:06:07 +00:00
#include "syncRequestVote.h"
#include "syncRequestVoteReply.h"
2022-04-18 13:50:56 +00:00
#include "syncRespMgr.h"
2022-06-01 03:28:33 +00:00
#include "syncSnapshot.h"
2022-03-07 06:42:04 +00:00
#include "syncTimeout.h"
2022-02-28 09:47:47 +00:00
#include "syncUtil.h"
2022-03-08 02:52:18 +00:00
#include "syncVoteMgr.h"
#include "tglobal.h"
2022-03-21 08:28:50 +00:00
#include "tref.h"
2023-03-18 03:16:52 +00:00
#include "syncUtil.h"
2022-02-22 03:28:15 +00:00
2022-03-24 09:30:50 +00:00
static void syncNodeEqPingTimer(void* param, void* tmrId);
static void syncNodeEqElectTimer(void* param, void* tmrId);
static void syncNodeEqHeartbeatTimer(void* param, void* tmrId);
static int32_t syncNodeAppendNoop(SSyncNode* ths);
static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId);
2022-10-31 11:57:27 +00:00
static bool syncIsConfigChanged(const SSyncCfg* pOldCfg, const SSyncCfg* pNewCfg);
2022-11-07 15:31:01 +00:00
static int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId);
static int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer);
static int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer);
2022-11-13 10:18:46 +00:00
static int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg);
static bool syncNodeInConfig(SSyncNode* pSyncNode, const SSyncCfg* config);
static void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* newConfig, SyncIndex lastConfigChangeIndex);
static bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg);
static bool syncNodeCanChange(SSyncNode* pSyncNode);
static int32_t syncNodeLeaderTransfer(SSyncNode* pSyncNode);
static int32_t syncNodeLeaderTransferTo(SSyncNode* pSyncNode, SNodeInfo newLeader);
static int32_t syncDoLeaderTransfer(SSyncNode* ths, SRpcMsg* pRpcMsg, SSyncRaftEntry* pEntry);
static ESyncStrategy syncNodeStrategy(SSyncNode* pSyncNode);
2022-02-22 03:28:15 +00:00
int64_t syncOpen(SSyncInfo* pSyncInfo) {
2022-02-26 16:02:18 +00:00
SSyncNode* pSyncNode = syncNodeOpen(pSyncInfo);
if (pSyncNode == NULL) {
2022-10-24 03:57:26 +00:00
sError("vgId:%d, failed to open sync node", pSyncInfo->vgId);
return -1;
}
2022-03-18 09:46:40 +00:00
2022-11-01 03:45:58 +00:00
pSyncNode->rid = syncNodeAdd(pSyncNode);
2022-03-21 08:28:50 +00:00
if (pSyncNode->rid < 0) {
syncNodeClose(pSyncNode);
2022-03-21 08:28:50 +00:00
return -1;
}
2022-11-01 07:40:23 +00:00
pSyncNode->pingBaseLine = pSyncInfo->pingMs;
pSyncNode->pingTimerMS = pSyncInfo->pingMs;
pSyncNode->electBaseLine = pSyncInfo->electMs;
pSyncNode->hbBaseLine = pSyncInfo->heartbeatMs;
pSyncNode->heartbeatTimerMS = pSyncInfo->heartbeatMs;
pSyncNode->msgcb = pSyncInfo->msgcb;
2022-03-21 08:28:50 +00:00
return pSyncNode->rid;
2022-02-26 16:02:18 +00:00
}
2022-02-22 03:28:15 +00:00
2022-10-31 04:59:42 +00:00
int32_t syncStart(int64_t rid) {
2022-11-01 03:45:58 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-04-18 13:50:56 +00:00
if (pSyncNode == NULL) {
2023-02-06 16:12:05 +00:00
sError("failed to acquire rid:%" PRId64 " of tsNodeReftId for pSyncNode", rid);
2022-10-31 04:59:42 +00:00
return -1;
}
if (syncNodeRestore(pSyncNode) < 0) {
sError("vgId:%d, failed to restore sync log buffer since %s", pSyncNode->vgId, terrstr());
2022-11-17 09:21:51 +00:00
goto _err;
2022-04-18 13:50:56 +00:00
}
2022-05-25 10:32:34 +00:00
2022-11-17 12:27:23 +00:00
if (syncNodeStart(pSyncNode) < 0) {
sError("vgId:%d, failed to start sync node since %s", pSyncNode->vgId, terrstr());
goto _err;
}
2022-04-18 13:50:56 +00:00
2022-11-17 12:27:23 +00:00
syncNodeRelease(pSyncNode);
return 0;
2022-05-13 04:12:37 +00:00
2022-11-17 09:21:51 +00:00
_err:
syncNodeRelease(pSyncNode);
return -1;
2022-05-13 04:12:37 +00:00
}
2022-03-09 08:34:34 +00:00
void syncStop(int64_t rid) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-01 03:45:58 +00:00
if (pSyncNode != NULL) {
pSyncNode->isStart = false;
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-11-01 03:45:58 +00:00
syncNodeRemove(rid);
2022-05-26 07:08:20 +00:00
}
}
2022-11-03 01:39:20 +00:00
void syncPreStop(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-07 12:04:55 +00:00
if (pSyncNode != NULL) {
syncNodePreClose(pSyncNode);
syncNodeRelease(pSyncNode);
2022-06-20 09:48:56 +00:00
}
}
void syncPostStop(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode != NULL) {
syncNodePostClose(pSyncNode);
syncNodeRelease(pSyncNode);
}
}
2022-11-01 03:45:58 +00:00
static bool syncNodeCheckNewConfig(SSyncNode* pSyncNode, const SSyncCfg* pCfg) {
if (!syncNodeInConfig(pSyncNode, pCfg)) return false;
return abs(pCfg->replicaNum - pSyncNode->replicaNum) <= 1;
2022-06-12 07:21:56 +00:00
}
2022-10-20 10:32:19 +00:00
int32_t syncReconfig(int64_t rid, SSyncCfg* pNewCfg) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-01 03:45:58 +00:00
if (pSyncNode == NULL) return -1;
2022-06-12 07:21:56 +00:00
2023-04-18 11:03:45 +00:00
if(pSyncNode->raftCfg.lastConfigIndex >= pNewCfg->lastIndex){
syncNodeRelease(pSyncNode);
sInfo("vgId:%d, no need Reconfig, current index:%" PRId64 ", new index:%" PRId64, pSyncNode->vgId,
pSyncNode->raftCfg.lastConfigIndex, pNewCfg->lastIndex);
return 0;
}
2022-06-20 09:48:56 +00:00
if (!syncNodeCheckNewConfig(pSyncNode, pNewCfg)) {
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-06-20 09:48:56 +00:00
terrno = TSDB_CODE_SYN_NEW_CONFIG_ERROR;
2022-11-01 03:45:58 +00:00
sError("vgId:%d, failed to reconfig since invalid new config", pSyncNode->vgId);
2022-06-15 08:14:17 +00:00
return -1;
2022-06-12 07:21:56 +00:00
}
2022-06-10 08:51:17 +00:00
2022-10-20 10:32:19 +00:00
syncNodeUpdateNewConfigIndex(pSyncNode, pNewCfg);
2023-04-18 11:03:45 +00:00
syncNodeDoConfigChange(pSyncNode, pNewCfg, pNewCfg->lastIndex);
2022-11-01 03:45:58 +00:00
2022-10-20 11:36:47 +00:00
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
syncNodeStopHeartbeatTimer(pSyncNode);
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
2022-11-01 03:45:58 +00:00
syncHbTimerInit(pSyncNode, &pSyncNode->peerHeartbeatTimerArr[i], pSyncNode->replicasId[i]);
2022-10-20 11:36:47 +00:00
}
syncNodeStartHeartbeatTimer(pSyncNode);
2022-12-14 01:14:40 +00:00
// syncNodeReplicate(pSyncNode);
2022-10-20 11:36:47 +00:00
}
2022-11-01 03:45:58 +00:00
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-10-20 10:32:19 +00:00
return 0;
2022-03-14 08:27:25 +00:00
}
2022-02-22 03:28:15 +00:00
2022-11-01 03:45:58 +00:00
int32_t syncProcessMsg(int64_t rid, SRpcMsg* pMsg) {
int32_t code = -1;
if (!syncIsInit()) return code;
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-01 03:45:58 +00:00
if (pSyncNode == NULL) return code;
2022-11-13 10:18:46 +00:00
switch (pMsg->msgType) {
case TDMT_SYNC_HEARTBEAT:
code = syncNodeOnHeartbeat(pSyncNode, pMsg);
break;
case TDMT_SYNC_HEARTBEAT_REPLY:
code = syncNodeOnHeartbeatReply(pSyncNode, pMsg);
break;
case TDMT_SYNC_TIMEOUT:
code = syncNodeOnTimeout(pSyncNode, pMsg);
break;
2023-03-14 13:02:06 +00:00
case TDMT_SYNC_TIMEOUT_ELECTION:
code = syncNodeOnTimeout(pSyncNode, pMsg);
break;
2022-11-13 10:18:46 +00:00
case TDMT_SYNC_CLIENT_REQUEST:
code = syncNodeOnClientRequest(pSyncNode, pMsg, NULL);
break;
case TDMT_SYNC_REQUEST_VOTE:
code = syncNodeOnRequestVote(pSyncNode, pMsg);
break;
case TDMT_SYNC_REQUEST_VOTE_REPLY:
code = syncNodeOnRequestVoteReply(pSyncNode, pMsg);
break;
case TDMT_SYNC_APPEND_ENTRIES:
code = syncNodeOnAppendEntries(pSyncNode, pMsg);
break;
case TDMT_SYNC_APPEND_ENTRIES_REPLY:
code = syncNodeOnAppendEntriesReply(pSyncNode, pMsg);
break;
case TDMT_SYNC_SNAPSHOT_SEND:
code = syncNodeOnSnapshot(pSyncNode, pMsg);
break;
case TDMT_SYNC_SNAPSHOT_RSP:
2022-12-20 13:30:46 +00:00
code = syncNodeOnSnapshotRsp(pSyncNode, pMsg);
2022-11-13 10:18:46 +00:00
break;
case TDMT_SYNC_LOCAL_CMD:
code = syncNodeOnLocalCmd(pSyncNode, pMsg);
break;
2023-03-24 01:13:20 +00:00
case TDMT_SYNC_FORCE_FOLLOWER:
code = syncForceBecomeFollower(pSyncNode, pMsg);
break;
2022-11-13 10:18:46 +00:00
default:
terrno = TSDB_CODE_MSG_NOT_PROCESSED;
2022-11-13 10:18:46 +00:00
code = -1;
2022-06-12 07:21:56 +00:00
}
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
if (code != 0) {
sDebug("vgId:%d, failed to process sync msg:%p type:%s since 0x%x", pSyncNode->vgId, pMsg, TMSG_INFO(pMsg->msgType),
terrno);
}
2022-11-01 03:45:58 +00:00
return code;
2022-06-13 06:54:38 +00:00
}
2022-11-01 03:45:58 +00:00
int32_t syncLeaderTransfer(int64_t rid) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-01 03:45:58 +00:00
if (pSyncNode == NULL) return -1;
2022-06-13 06:54:38 +00:00
2022-11-01 03:45:58 +00:00
int32_t ret = syncNodeLeaderTransfer(pSyncNode);
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-06-13 06:54:38 +00:00
return ret;
}
2023-03-24 01:13:20 +00:00
int32_t syncForceBecomeFollower(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
syncNodeBecomeFollower(ths, "force election");
2023-03-22 01:36:59 +00:00
2023-03-24 01:13:20 +00:00
SRpcMsg rsp = {
.code = 0,
.pCont = pRpcMsg->info.rsp,
.contLen = pRpcMsg->info.rspLen,
.info = pRpcMsg->info,
};
tmsgSendRsp(&rsp);
return 0;
2023-03-22 01:36:59 +00:00
}
int32_t syncSendTimeoutRsp(int64_t rid, int64_t seq) {
2022-12-14 01:14:40 +00:00
SSyncNode* pNode = syncNodeAcquire(rid);
if (pNode == NULL) return -1;
2022-12-14 01:14:40 +00:00
SRpcMsg rpcMsg = {0};
int32_t ret = syncRespMgrGetAndDel(pNode->pSyncRespMgr, seq, &rpcMsg.info);
2022-12-14 01:14:40 +00:00
rpcMsg.code = TSDB_CODE_SYN_TIMEOUT;
syncNodeRelease(pNode);
if (ret == 1) {
sInfo("send timeout response, seq:%" PRId64 " handle:%p ahandle:%p", seq, rpcMsg.info.handle, rpcMsg.info.ahandle);
rpcSendResponse(&rpcMsg);
return 0;
} else {
sError("no message handle to send timeout response, seq:%" PRId64, seq);
return -1;
}
2022-12-14 01:14:40 +00:00
}
2022-10-20 06:53:03 +00:00
SyncIndex syncMinMatchIndex(SSyncNode* pSyncNode) {
SyncIndex minMatchIndex = SYNC_INDEX_INVALID;
if (pSyncNode->peersNum > 0) {
minMatchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[0]));
}
for (int32_t i = 1; i < pSyncNode->peersNum; ++i) {
SyncIndex matchIndex = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i]));
if (matchIndex < minMatchIndex) {
minMatchIndex = matchIndex;
}
}
return minMatchIndex;
}
int32_t syncBeginSnapshot(int64_t rid, int64_t lastApplyIndex) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
sError("sync begin snapshot error");
return -1;
}
SyncIndex beginIndex = pSyncNode->pLogStore->syncLogBeginIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogStore->syncLogEndIndex(pSyncNode->pLogStore);
bool isEmpty = pSyncNode->pLogStore->syncLogIsEmpty(pSyncNode->pLogStore);
if (isEmpty || !(lastApplyIndex >= beginIndex && lastApplyIndex <= endIndex)) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 ", empty:%d, do not delete wal", lastApplyIndex, isEmpty);
syncNodeRelease(pSyncNode);
return 0;
}
int32_t code = 0;
int64_t logRetention = 0;
2022-10-18 07:24:00 +00:00
if (syncNodeIsMnode(pSyncNode)) {
2022-10-20 06:53:03 +00:00
// mnode
logRetention = tsMndLogRetention;
2022-10-20 06:53:03 +00:00
} else {
// vnode
if (pSyncNode->replicaNum > 1) {
// multi replicas
logRetention = SYNC_VNODE_LOG_RETENTION;
}
}
2022-10-20 06:53:03 +00:00
2023-04-18 11:03:45 +00:00
if (pSyncNode->totalReplicaNum > 1) {
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER && pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER
&& pSyncNode->state != TAOS_SYNC_STATE_LEARNER) {
sNTrace(pSyncNode, "new-snapshot-index:%" PRId64 " candidate or unknown state, do not delete wal",
lastApplyIndex);
syncNodeRelease(pSyncNode);
return 0;
}
logRetention = TMAX(logRetention, lastApplyIndex - pSyncNode->minMatchIndex + logRetention);
}
2022-10-20 06:53:03 +00:00
_DEL_WAL:
2022-10-20 06:53:03 +00:00
do {
SSyncLogStoreData* pData = pSyncNode->pLogStore->data;
SyncIndex snapshotVer = walGetSnapshotVer(pData->pWal);
SyncIndex walCommitVer = walGetCommittedVer(pData->pWal);
SyncIndex wallastVer = walGetLastVer(pData->pWal);
if (lastApplyIndex <= walCommitVer) {
SyncIndex snapshottingIndex = atomic_load_64(&pSyncNode->snapshottingIndex);
if (snapshottingIndex == SYNC_INDEX_INVALID) {
atomic_store_64(&pSyncNode->snapshottingIndex, lastApplyIndex);
pSyncNode->snapshottingTime = taosGetTimestampMs();
code = walBeginSnapshot(pData->pWal, lastApplyIndex, logRetention);
if (code == 0) {
sNTrace(pSyncNode, "wal snapshot begin, index:%" PRId64 ", last apply index:%" PRId64,
pSyncNode->snapshottingIndex, lastApplyIndex);
} else {
sNError(pSyncNode, "wal snapshot begin error since:%s, index:%" PRId64 ", last apply index:%" PRId64,
terrstr(terrno), pSyncNode->snapshottingIndex, lastApplyIndex);
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
}
2022-10-20 06:53:03 +00:00
} else {
sNTrace(pSyncNode, "snapshotting for %" PRId64 ", do not delete wal for new-snapshot-index:%" PRId64,
snapshottingIndex, lastApplyIndex);
2022-10-20 06:53:03 +00:00
}
}
2022-10-20 06:53:03 +00:00
} while (0);
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
return code;
}
int32_t syncEndSnapshot(int64_t rid) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
sError("sync end snapshot error");
return -1;
}
int32_t code = 0;
if (atomic_load_64(&pSyncNode->snapshottingIndex) != SYNC_INDEX_INVALID) {
SSyncLogStoreData* pData = pSyncNode->pLogStore->data;
code = walEndSnapshot(pData->pWal);
2022-10-20 06:53:03 +00:00
if (code != 0) {
2022-11-07 12:04:55 +00:00
sNError(pSyncNode, "wal snapshot end error since:%s", terrstr());
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-10-20 06:53:03 +00:00
return -1;
} else {
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "wal snapshot end, index:%" PRId64, atomic_load_64(&pSyncNode->snapshottingIndex));
2022-10-20 06:53:03 +00:00
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
}
}
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
return code;
}
2022-10-31 06:17:26 +00:00
int32_t syncStepDown(int64_t rid, SyncTerm newTerm) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-10-31 06:17:26 +00:00
if (pSyncNode == NULL) {
sError("sync step down error");
2022-07-12 03:08:01 +00:00
return -1;
}
2022-10-31 06:17:26 +00:00
syncNodeStepDown(pSyncNode, newTerm);
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-10-31 06:17:26 +00:00
return 0;
2022-07-12 03:08:01 +00:00
}
bool syncNodeIsReadyForRead(SSyncNode* pSyncNode) {
2022-11-03 05:57:57 +00:00
if (pSyncNode == NULL) {
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
sError("sync ready for read error");
2022-11-03 05:57:57 +00:00
return false;
}
2022-07-12 03:08:01 +00:00
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
terrno = TSDB_CODE_SYN_NOT_LEADER;
return false;
}
if (!pSyncNode->restoreFinish) {
terrno = TSDB_CODE_SYN_RESTORING;
return false;
}
2022-06-13 06:54:38 +00:00
return true;
}
bool syncIsReadyForRead(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
sError("sync ready for read error");
return false;
}
bool ready = syncNodeIsReadyForRead(pSyncNode);
2022-11-03 05:57:57 +00:00
syncNodeRelease(pSyncNode);
return ready;
2022-03-22 08:58:36 +00:00
}
2022-03-18 09:46:40 +00:00
bool syncSnapshotSending(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return false;
}
bool b = syncNodeSnapshotSending(pSyncNode);
syncNodeRelease(pSyncNode);
return b;
}
bool syncSnapshotRecving(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
return false;
}
bool b = syncNodeSnapshotRecving(pSyncNode);
syncNodeRelease(pSyncNode);
return b;
}
2022-07-12 03:08:01 +00:00
int32_t syncNodeLeaderTransfer(SSyncNode* pSyncNode) {
if (pSyncNode->peersNum == 0) {
2022-11-16 07:45:40 +00:00
sDebug("vgId:%d, only one replica, cannot leader transfer", pSyncNode->vgId);
return 0;
2022-03-22 08:58:36 +00:00
}
2022-04-18 13:50:56 +00:00
2022-11-03 03:50:24 +00:00
int32_t ret = 0;
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER && pSyncNode->replicaNum > 1) {
2022-11-03 03:50:24 +00:00
SNodeInfo newLeader = (pSyncNode->peersNodeInfo)[0];
if (pSyncNode->peersNum == 2) {
SyncIndex matchIndex0 = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[0]));
SyncIndex matchIndex1 = syncIndexMgrGetIndex(pSyncNode->pMatchIndex, &(pSyncNode->peersId[1]));
if (matchIndex1 > matchIndex0) {
newLeader = (pSyncNode->peersNodeInfo)[1];
}
}
2022-11-03 03:50:24 +00:00
ret = syncNodeLeaderTransferTo(pSyncNode, newLeader);
}
2022-07-12 03:08:01 +00:00
return ret;
2022-03-22 08:58:36 +00:00
}
2022-07-12 03:08:01 +00:00
int32_t syncNodeLeaderTransferTo(SSyncNode* pSyncNode, SNodeInfo newLeader) {
if (pSyncNode->replicaNum == 1) {
2022-11-16 07:45:40 +00:00
sDebug("vgId:%d, only one replica, cannot leader transfer", pSyncNode->vgId);
2022-07-12 03:08:01 +00:00
return -1;
2022-06-16 08:16:40 +00:00
}
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "begin leader transfer to %s:%u", newLeader.nodeFqdn, newLeader.nodePort);
2022-07-12 06:34:18 +00:00
2022-11-13 09:00:47 +00:00
SRpcMsg rpcMsg = {0};
(void)syncBuildLeaderTransfer(&rpcMsg, pSyncNode->vgId);
SyncLeaderTransfer* pMsg = rpcMsg.pCont;
pMsg->newLeaderId.addr = SYNC_ADDR(&newLeader);
2022-07-12 03:08:01 +00:00
pMsg->newLeaderId.vgId = pSyncNode->vgId;
pMsg->newNodeInfo = newLeader;
2022-12-14 01:14:40 +00:00
int32_t ret = syncNodePropose(pSyncNode, &rpcMsg, false, NULL);
2022-11-15 05:43:30 +00:00
rpcFreeCont(rpcMsg.pCont);
return ret;
2022-06-16 08:16:40 +00:00
}
2022-11-02 02:24:55 +00:00
SSyncState syncGetState(int64_t rid) {
SSyncState state = {.state = TAOS_SYNC_STATE_ERROR};
2022-06-16 08:16:40 +00:00
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-02 02:24:55 +00:00
if (pSyncNode != NULL) {
state.state = pSyncNode->state;
state.restored = pSyncNode->restoreFinish;
if (pSyncNode->vgId != 1) {
state.canRead = syncNodeIsReadyForRead(pSyncNode);
} else {
state.canRead = state.restored;
}
2022-11-02 02:24:55 +00:00
syncNodeRelease(pSyncNode);
2022-05-23 15:53:15 +00:00
}
2022-11-02 02:24:55 +00:00
return state;
2022-05-23 15:53:15 +00:00
}
SyncIndex syncNodeGetSnapshotConfigIndex(SSyncNode* pSyncNode, SyncIndex snapshotLastApplyIndex) {
ASSERT(pSyncNode->raftCfg.configIndexCount >= 1);
SyncIndex lastIndex = (pSyncNode->raftCfg.configIndexArr)[0];
for (int32_t i = 0; i < pSyncNode->raftCfg.configIndexCount; ++i) {
if ((pSyncNode->raftCfg.configIndexArr)[i] > lastIndex &&
(pSyncNode->raftCfg.configIndexArr)[i] <= snapshotLastApplyIndex) {
lastIndex = (pSyncNode->raftCfg.configIndexArr)[i];
}
}
2022-08-03 02:56:37 +00:00
sTrace("vgId:%d, sync get last config index, index:%" PRId64 " lcindex:%" PRId64, pSyncNode->vgId,
2022-07-08 10:00:03 +00:00
snapshotLastApplyIndex, lastIndex);
return lastIndex;
}
void syncGetRetryEpSet(int64_t rid, SEpSet* pEpSet) {
pEpSet->numOfEps = 0;
2022-04-18 13:50:56 +00:00
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-11-08 14:43:12 +00:00
if (pSyncNode == NULL) return;
2022-04-18 13:50:56 +00:00
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if(pSyncNode->raftCfg.cfg.nodeInfo[i].nodeRole == TAOS_SYNC_ROLE_LEARNER) continue;
2022-11-08 14:43:12 +00:00
SEp* pEp = &pEpSet->eps[i];
tstrncpy(pEp->fqdn, pSyncNode->raftCfg.cfg.nodeInfo[i].nodeFqdn, TSDB_FQDN_LEN);
pEp->port = (pSyncNode->raftCfg.cfg.nodeInfo)[i].nodePort;
2022-11-08 14:43:12 +00:00
pEpSet->numOfEps++;
sDebug("vgId:%d, sync get retry epset, index:%d %s:%d", pSyncNode->vgId, i, pEp->fqdn, pEp->port);
2022-04-18 13:50:56 +00:00
}
2022-10-25 06:28:34 +00:00
if (pEpSet->numOfEps > 0) {
pEpSet->inUse = (pSyncNode->raftCfg.cfg.myIndex + 1) % pEpSet->numOfEps;
2022-04-18 13:50:56 +00:00
}
2022-11-08 14:43:12 +00:00
sInfo("vgId:%d, sync get retry epset numOfEps:%d inUse:%d", pSyncNode->vgId, pEpSet->numOfEps, pEpSet->inUse);
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-04-18 13:50:56 +00:00
}
2022-12-14 01:14:40 +00:00
int32_t syncPropose(int64_t rid, SRpcMsg* pMsg, bool isWeak, int64_t* seq) {
2022-10-31 15:40:43 +00:00
SSyncNode* pSyncNode = syncNodeAcquire(rid);
2022-06-10 08:51:17 +00:00
if (pSyncNode == NULL) {
sError("sync propose error");
2022-06-15 08:14:17 +00:00
return -1;
2022-06-10 08:51:17 +00:00
}
2022-08-15 06:42:40 +00:00
2022-12-14 01:14:40 +00:00
int32_t ret = syncNodePropose(pSyncNode, pMsg, isWeak, seq);
2022-10-31 15:40:43 +00:00
syncNodeRelease(pSyncNode);
2022-06-12 07:21:56 +00:00
return ret;
}
2022-04-18 13:50:56 +00:00
2023-04-18 11:03:45 +00:00
int32_t syncIsCatchUp(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
sError("sync Node Acquire error since %d", errno);
return -1;
}
2023-04-24 02:23:43 +00:00
int32_t isCatchUp = 0;
if(pSyncNode->pLogBuf->totalIndex < 0 || pSyncNode->pLogBuf->commitIndex < 0 ||
pSyncNode->pLogBuf->totalIndex < pSyncNode->pLogBuf->commitIndex ||
pSyncNode->pLogBuf->totalIndex - pSyncNode->pLogBuf->commitIndex > SYNC_LEARNER_CATCHUP){
sInfo("vgId:%d, Not catch up, wait one second, totalIndex:%" PRId64 " commitIndex:%" PRId64 " matchIndex:%" PRId64,
pSyncNode->vgId, pSyncNode->pLogBuf->totalIndex, pSyncNode->pLogBuf->commitIndex,
pSyncNode->pLogBuf->matchIndex);
isCatchUp = 0;
}
else{
sInfo("vgId:%d, Catch up, totalIndex:%" PRId64 " commitIndex:%" PRId64 " matchIndex:%" PRId64,
pSyncNode->vgId, pSyncNode->pLogBuf->totalIndex, pSyncNode->pLogBuf->commitIndex,
pSyncNode->pLogBuf->matchIndex);
isCatchUp = 1;
2023-04-18 11:03:45 +00:00
}
syncNodeRelease(pSyncNode);
2023-04-24 02:23:43 +00:00
return isCatchUp;
}
ESyncRole syncGetRole(int64_t rid) {
SSyncNode* pSyncNode = syncNodeAcquire(rid);
if (pSyncNode == NULL) {
sError("sync Node Acquire error since %d", errno);
return -1;
}
ESyncRole role = pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole;
syncNodeRelease(pSyncNode);
return role;
2023-04-18 11:03:45 +00:00
}
2022-12-14 01:14:40 +00:00
int32_t syncNodePropose(SSyncNode* pSyncNode, SRpcMsg* pMsg, bool isWeak, int64_t* seq) {
2022-11-07 13:12:43 +00:00
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
terrno = TSDB_CODE_SYN_NOT_LEADER;
2022-12-20 13:30:46 +00:00
sNError(pSyncNode, "sync propose not leader, type:%s", TMSG_INFO(pMsg->msgType));
2022-11-07 13:12:43 +00:00
return -1;
}
2022-05-24 03:58:50 +00:00
2022-11-07 13:12:43 +00:00
// not restored, vnode enable
if (!pSyncNode->restoreFinish && pSyncNode->vgId != 1) {
terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY;
sNError(pSyncNode, "failed to sync propose since not ready, type:%s, last:%" PRId64 ", cmt:%" PRId64,
TMSG_INFO(pMsg->msgType), syncNodeGetLastIndex(pSyncNode), pSyncNode->commitIndex);
return -1;
}
2022-06-25 12:31:42 +00:00
// heartbeat timeout
if (syncNodeHeartbeatReplyTimeout(pSyncNode)) {
terrno = TSDB_CODE_SYN_PROPOSE_NOT_READY;
sNError(pSyncNode, "failed to sync propose since hearbeat timeout, type:%s, last:%" PRId64 ", cmt:%" PRId64,
TMSG_INFO(pMsg->msgType), syncNodeGetLastIndex(pSyncNode), pSyncNode->commitIndex);
return -1;
}
2022-11-07 13:12:43 +00:00
// optimized one replica
if (syncNodeIsOptimizedOneReplica(pSyncNode, pMsg)) {
SyncIndex retIndex;
int32_t code = syncNodeOnClientRequest(pSyncNode, pMsg, &retIndex);
2022-11-07 13:12:43 +00:00
if (code == 0) {
pMsg->info.conn.applyIndex = retIndex;
2023-02-13 11:00:10 +00:00
pMsg->info.conn.applyTerm = raftStoreGetTerm(pSyncNode);
sTrace("vgId:%d, propose optimized msg, index:%" PRId64 " type:%s", pSyncNode->vgId, retIndex,
TMSG_INFO(pMsg->msgType));
return 1;
2022-04-18 13:50:56 +00:00
} else {
2022-11-07 13:12:43 +00:00
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
sError("vgId:%d, failed to propose optimized msg, index:%" PRId64 " type:%s", pSyncNode->vgId, retIndex,
2022-11-07 13:12:43 +00:00
TMSG_INFO(pMsg->msgType));
return -1;
2022-08-15 06:42:40 +00:00
}
2022-11-07 13:12:43 +00:00
} else {
2022-11-07 12:31:26 +00:00
SRespStub stub = {.createTime = taosGetTimestampMs(), .rpcMsg = *pMsg};
uint64_t seqNum = syncRespMgrAdd(pSyncNode->pSyncRespMgr, &stub);
SRpcMsg rpcMsg = {0};
2022-11-12 12:29:49 +00:00
int32_t code = syncBuildClientRequest(&rpcMsg, pMsg, seqNum, isWeak, pSyncNode->vgId);
if (code != 0) {
sError("vgId:%d, failed to propose msg while serialize since %s", pSyncNode->vgId, terrstr());
(void)syncRespMgrDel(pSyncNode->pSyncRespMgr, seqNum);
return -1;
2022-04-18 13:50:56 +00:00
}
2022-06-25 12:31:42 +00:00
sNTrace(pSyncNode, "propose msg, type:%s", TMSG_INFO(pMsg->msgType));
code = (*pSyncNode->syncEqMsg)(pSyncNode->msgcb, &rpcMsg);
if (code != 0) {
sWarn("vgId:%d, failed to propose msg while enqueue since %s", pSyncNode->vgId, terrstr());
(void)syncRespMgrDel(pSyncNode->pSyncRespMgr, seqNum);
2022-04-18 13:50:56 +00:00
}
2022-06-20 09:48:56 +00:00
2022-12-14 01:14:40 +00:00
if (seq != NULL) *seq = seqNum;
return code;
2022-03-09 08:34:34 +00:00
}
2022-03-21 08:42:30 +00:00
}
2022-11-07 15:31:01 +00:00
static int32_t syncHbTimerInit(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer, SRaftId destId) {
pSyncTimer->pTimer = NULL;
pSyncTimer->counter = 0;
pSyncTimer->timerMS = pSyncNode->hbBaseLine;
pSyncTimer->timerCb = syncNodeEqPeerHeartbeatTimer;
pSyncTimer->destId = destId;
2022-11-25 10:19:25 +00:00
pSyncTimer->timeStamp = taosGetTimestampMs();
atomic_store_64(&pSyncTimer->logicClock, 0);
return 0;
}
2022-11-07 15:31:01 +00:00
static int32_t syncHbTimerStart(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
int32_t ret = 0;
2022-11-26 03:54:38 +00:00
int64_t tsNow = taosGetTimestampMs();
2022-10-31 15:40:43 +00:00
if (syncIsInit()) {
SSyncHbTimerData* pData = syncHbTimerDataAcquire(pSyncTimer->hbDataRid);
if (pData == NULL) {
pData = taosMemoryMalloc(sizeof(SSyncHbTimerData));
pData->rid = syncHbTimerDataAdd(pData);
}
pSyncTimer->hbDataRid = pData->rid;
2022-11-26 03:54:38 +00:00
pSyncTimer->timeStamp = tsNow;
pData->syncNodeRid = pSyncNode->rid;
pData->pTimer = pSyncTimer;
pData->destId = pSyncTimer->destId;
pData->logicClock = pSyncTimer->logicClock;
2022-11-26 03:54:38 +00:00
pData->execTime = tsNow + pSyncTimer->timerMS;
2022-10-14 03:18:48 +00:00
2023-04-18 11:03:45 +00:00
sTrace("vgId:%d, start hb timer, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, pData->rid, pData->destId.addr);
taosTmrReset(pSyncTimer->timerCb, pSyncTimer->timerMS / HEARTBEAT_TICK_NUM, (void*)(pData->rid),
syncEnv()->pTimerManager, &pSyncTimer->pTimer);
} else {
sError("vgId:%d, start ctrl hb timer error, sync env is stop", pSyncNode->vgId);
}
return ret;
}
2022-11-07 15:31:01 +00:00
static int32_t syncHbTimerStop(SSyncNode* pSyncNode, SSyncTimer* pSyncTimer) {
int32_t ret = 0;
atomic_add_fetch_64(&pSyncTimer->logicClock, 1);
taosTmrStop(pSyncTimer->pTimer);
pSyncTimer->pTimer = NULL;
syncHbTimerDataRemove(pSyncTimer->hbDataRid);
pSyncTimer->hbDataRid = -1;
return ret;
}
int32_t syncNodeLogStoreRestoreOnNeed(SSyncNode* pNode) {
2022-12-07 14:24:47 +00:00
ASSERTS(pNode->pLogStore != NULL, "log store not created");
ASSERTS(pNode->pFsm != NULL, "pFsm not registered");
ASSERTS(pNode->pFsm->FpGetSnapshotInfo != NULL, "FpGetSnapshotInfo not registered");
SSnapshot snapshot = {0};
pNode->pFsm->FpGetSnapshotInfo(pNode->pFsm, &snapshot);
SyncIndex commitIndex = snapshot.lastApplyIndex;
SyncIndex firstVer = pNode->pLogStore->syncLogBeginIndex(pNode->pLogStore);
SyncIndex lastVer = pNode->pLogStore->syncLogLastIndex(pNode->pLogStore);
if (lastVer < commitIndex || firstVer > commitIndex + 1) {
if (pNode->pLogStore->syncLogRestoreFromSnapshot(pNode->pLogStore, commitIndex)) {
2023-02-06 16:12:05 +00:00
sError("vgId:%d, failed to restore log store from snapshot since %s. lastVer:%" PRId64 ", snapshotVer:%" PRId64,
pNode->vgId, terrstr(), lastVer, commitIndex);
return -1;
}
}
return 0;
}
2022-03-14 08:27:25 +00:00
// open/close --------------
2022-10-24 03:57:26 +00:00
SSyncNode* syncNodeOpen(SSyncInfo* pSyncInfo) {
SSyncNode* pSyncNode = taosMemoryCalloc(1, sizeof(SSyncNode));
if (pSyncNode == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
goto _error;
}
2022-02-26 16:02:18 +00:00
2022-04-18 13:50:56 +00:00
if (!taosDirExist((char*)(pSyncInfo->path))) {
if (taosMkDir(pSyncInfo->path) != 0) {
terrno = TAOS_SYSTEM_ERROR(errno);
sError("failed to create dir:%s since %s", pSyncInfo->path, terrstr());
goto _error;
2022-04-18 13:50:56 +00:00
}
}
2022-04-18 13:50:56 +00:00
memcpy(pSyncNode->path, pSyncInfo->path, sizeof(pSyncNode->path));
snprintf(pSyncNode->raftStorePath, sizeof(pSyncNode->raftStorePath), "%s%sraft_store.json", pSyncInfo->path,
TD_DIRSEP);
2022-08-02 07:57:37 +00:00
snprintf(pSyncNode->configPath, sizeof(pSyncNode->configPath), "%s%sraft_config.json", pSyncInfo->path, TD_DIRSEP);
if (!taosCheckExistFile(pSyncNode->configPath)) {
2022-05-30 04:26:15 +00:00
// create a new raft config file
sInfo("vgId:%d, create a new raft config file", pSyncNode->vgId);
pSyncNode->raftCfg.isStandBy = pSyncInfo->isStandBy;
pSyncNode->raftCfg.snapshotStrategy = pSyncInfo->snapshotStrategy;
2023-04-18 11:03:45 +00:00
pSyncNode->raftCfg.lastConfigIndex = pSyncInfo->syncCfg.lastIndex;
pSyncNode->raftCfg.batchSize = pSyncInfo->batchSize;
pSyncNode->raftCfg.cfg = pSyncInfo->syncCfg;
pSyncNode->raftCfg.configIndexCount = 1;
pSyncNode->raftCfg.configIndexArr[0] = -1;
if (syncWriteCfgFile(pSyncNode) != 0) {
sError("vgId:%d, failed to create sync cfg file", pSyncNode->vgId);
2022-10-13 06:06:27 +00:00
goto _error;
}
} else {
// update syncCfg by raft_config.json
if (syncReadCfgFile(pSyncNode) != 0) {
sError("vgId:%d, failed to read sync cfg file", pSyncNode->vgId);
2022-10-13 06:06:27 +00:00
goto _error;
}
2022-10-31 13:33:06 +00:00
2023-04-18 11:03:45 +00:00
if (pSyncInfo->syncCfg.totalReplicaNum > 0 && syncIsConfigChanged(&pSyncNode->raftCfg.cfg, &pSyncInfo->syncCfg)) {
2022-10-24 04:42:18 +00:00
sInfo("vgId:%d, use sync config from input options and write to cfg file", pSyncNode->vgId);
pSyncNode->raftCfg.cfg = pSyncInfo->syncCfg;
if (syncWriteCfgFile(pSyncNode) != 0) {
sError("vgId:%d, failed to write sync cfg file", pSyncNode->vgId);
2022-10-24 04:42:18 +00:00
goto _error;
}
2022-10-24 03:57:26 +00:00
} else {
sInfo("vgId:%d, use sync config from sync cfg file", pSyncNode->vgId);
pSyncInfo->syncCfg = pSyncNode->raftCfg.cfg;
2022-10-24 03:57:26 +00:00
}
2022-03-16 07:09:56 +00:00
}
2022-03-08 02:52:18 +00:00
// init by SSyncInfo
2022-03-01 12:29:49 +00:00
pSyncNode->vgId = pSyncInfo->vgId;
2022-12-30 10:46:42 +00:00
SSyncCfg* pCfg = &pSyncNode->raftCfg.cfg;
bool updated = false;
2023-04-18 11:03:45 +00:00
sInfo("vgId:%d, start to open sync node, totalReplicaNum:%d replicaNum:%d selfIndex:%d",
pSyncNode->vgId, pCfg->totalReplicaNum, pCfg->replicaNum, pCfg->myIndex);
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
2022-10-24 03:57:26 +00:00
SNodeInfo* pNode = &pCfg->nodeInfo[i];
if (tmsgUpdateDnodeInfo(&pNode->nodeId, &pNode->clusterId, pNode->nodeFqdn, &pNode->nodePort)) {
updated = true;
}
sInfo("vgId:%d, index:%d ep:%s:%u dnode:%d cluster:%" PRId64, pSyncNode->vgId, i, pNode->nodeFqdn, pNode->nodePort,
pNode->nodeId, pNode->clusterId);
2022-10-24 03:57:26 +00:00
}
if (updated) {
sInfo("vgId:%d, save config info since dnode info changed", pSyncNode->vgId);
if (syncWriteCfgFile(pSyncNode) != 0) {
sError("vgId:%d, failed to write sync cfg file on dnode info updated", pSyncNode->vgId);
goto _error;
}
}
2022-03-09 06:51:02 +00:00
pSyncNode->pWal = pSyncInfo->pWal;
2022-05-19 11:44:01 +00:00
pSyncNode->msgcb = pSyncInfo->msgcb;
2022-11-01 07:40:23 +00:00
pSyncNode->syncSendMSg = pSyncInfo->syncSendMSg;
pSyncNode->syncEqMsg = pSyncInfo->syncEqMsg;
pSyncNode->syncEqCtrlMsg = pSyncInfo->syncEqCtrlMsg;
2022-03-01 12:29:49 +00:00
2022-10-31 04:59:42 +00:00
// create raft log ring buffer
pSyncNode->pLogBuf = syncLogBufferCreate();
if (pSyncNode->pLogBuf == NULL) {
sError("failed to init sync log buffer since %s. vgId:%d", terrstr(), pSyncNode->vgId);
2022-10-31 04:59:42 +00:00
goto _error;
}
2022-03-08 02:52:18 +00:00
// init internal
pSyncNode->myNodeInfo = pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex];
2022-11-10 02:04:05 +00:00
if (!syncUtilNodeInfo2RaftId(&pSyncNode->myNodeInfo, pSyncNode->vgId, &pSyncNode->myRaftId)) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to determine my raft member id", pSyncNode->vgId);
2022-10-13 06:06:27 +00:00
goto _error;
}
2022-03-01 12:29:49 +00:00
2022-03-08 02:52:18 +00:00
// init peersNum, peers, peersId
2023-04-18 11:03:45 +00:00
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.totalReplicaNum - 1;
2022-11-07 15:31:01 +00:00
int32_t j = 0;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if (i != pSyncNode->raftCfg.cfg.myIndex) {
pSyncNode->peersNodeInfo[j] = pSyncNode->raftCfg.cfg.nodeInfo[i];
syncUtilNodeInfo2EpSet(&pSyncNode->peersNodeInfo[j], &pSyncNode->peersEpset[j]);
2022-03-01 12:29:49 +00:00
j++;
}
}
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
2022-11-10 02:04:05 +00:00
if (!syncUtilNodeInfo2RaftId(&pSyncNode->peersNodeInfo[i], pSyncNode->vgId, &pSyncNode->peersId[i])) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to determine raft member id, peer:%d", pSyncNode->vgId, i);
2022-10-13 06:06:27 +00:00
goto _error;
}
2022-03-08 02:52:18 +00:00
}
2022-03-01 12:29:49 +00:00
2022-03-08 02:52:18 +00:00
// init replicaNum, replicasId
pSyncNode->replicaNum = pSyncNode->raftCfg.cfg.replicaNum;
2023-04-18 11:03:45 +00:00
pSyncNode->totalReplicaNum = pSyncNode->raftCfg.cfg.totalReplicaNum;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if (!syncUtilNodeInfo2RaftId(&pSyncNode->raftCfg.cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i])) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to determine raft member id, replica:%d", pSyncNode->vgId, i);
2022-10-13 06:06:27 +00:00
goto _error;
}
2022-03-08 02:52:18 +00:00
}
2022-03-08 09:07:29 +00:00
// init raft algorithm
2022-03-08 02:52:18 +00:00
pSyncNode->pFsm = pSyncInfo->pFsm;
pSyncInfo->pFsm = NULL;
pSyncNode->quorum = syncUtilQuorum(pSyncNode->raftCfg.cfg.replicaNum);
2022-03-08 02:52:18 +00:00
pSyncNode->leaderCache = EMPTY_RAFT_ID;
2022-03-21 08:28:50 +00:00
// init life cycle outside
2022-03-08 02:52:18 +00:00
2022-03-18 07:21:40 +00:00
// TLA+ Spec
// InitHistoryVars == /\ elections = {}
// /\ allLogs = {}
// /\ voterLog = [i \in Server |-> [j \in {} |-> <<>>]]
// InitServerVars == /\ currentTerm = [i \in Server |-> 1]
// /\ state = [i \in Server |-> Follower]
// /\ votedFor = [i \in Server |-> Nil]
// InitCandidateVars == /\ votesResponded = [i \in Server |-> {}]
// /\ votesGranted = [i \in Server |-> {}]
// \* The values nextIndex[i][i] and matchIndex[i][i] are never read, since the
// \* leader does not send itself messages. It's still easier to include these
// \* in the functions.
// InitLeaderVars == /\ nextIndex = [i \in Server |-> [j \in Server |-> 1]]
// /\ matchIndex = [i \in Server |-> [j \in Server |-> 0]]
// InitLogVars == /\ log = [i \in Server |-> << >>]
// /\ commitIndex = [i \in Server |-> 0]
// Init == /\ messages = [m \in {} |-> 0]
// /\ InitHistoryVars
// /\ InitServerVars
// /\ InitCandidateVars
// /\ InitLeaderVars
// /\ InitLogVars
//
2022-03-09 06:51:02 +00:00
// init TLA+ server vars
2022-03-03 09:28:00 +00:00
pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER;
2023-02-13 11:00:10 +00:00
if (raftStoreOpen(pSyncNode) != 0) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to open raft store at path %s", pSyncNode->vgId, pSyncNode->raftStorePath);
goto _error;
}
2022-03-08 02:52:18 +00:00
2022-03-09 06:51:02 +00:00
// init TLA+ candidate vars
2022-03-08 02:52:18 +00:00
pSyncNode->pVotesGranted = voteGrantedCreate(pSyncNode);
if (pSyncNode->pVotesGranted == NULL) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to create VotesGranted", pSyncNode->vgId);
goto _error;
}
2022-03-08 02:52:18 +00:00
pSyncNode->pVotesRespond = votesRespondCreate(pSyncNode);
if (pSyncNode->pVotesRespond == NULL) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to create VotesRespond", pSyncNode->vgId);
goto _error;
}
2022-03-08 02:52:18 +00:00
2022-03-09 06:51:02 +00:00
// init TLA+ leader vars
pSyncNode->pNextIndex = syncIndexMgrCreate(pSyncNode);
if (pSyncNode->pNextIndex == NULL) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to create SyncIndexMgr", pSyncNode->vgId);
goto _error;
}
2022-03-09 06:51:02 +00:00
pSyncNode->pMatchIndex = syncIndexMgrCreate(pSyncNode);
if (pSyncNode->pMatchIndex == NULL) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to create SyncIndexMgr", pSyncNode->vgId);
goto _error;
}
2022-03-09 06:51:02 +00:00
// init TLA+ log vars
pSyncNode->pLogStore = logStoreCreate(pSyncNode);
if (pSyncNode->pLogStore == NULL) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to create SyncLogStore", pSyncNode->vgId);
goto _error;
}
SyncIndex commitIndex = SYNC_INDEX_INVALID;
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
SSnapshot snapshot = {0};
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
if (snapshot.lastApplyIndex > commitIndex) {
commitIndex = snapshot.lastApplyIndex;
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "reset commit index by snapshot");
}
}
pSyncNode->commitIndex = commitIndex;
sInfo("vgId:%d, sync node commitIndex initialized as %" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
2022-03-01 12:29:49 +00:00
// restore log store on need
if (syncNodeLogStoreRestoreOnNeed(pSyncNode) < 0) {
sError("vgId:%d, failed to restore log store since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
2022-03-22 02:42:02 +00:00
// timer ms init
pSyncNode->pingBaseLine = PING_TIMER_MS;
pSyncNode->electBaseLine = tsElectInterval;
pSyncNode->hbBaseLine = tsHeartbeatInterval;
2022-03-22 02:42:02 +00:00
2022-03-07 06:18:46 +00:00
// init ping timer
2022-02-28 08:36:57 +00:00
pSyncNode->pPingTimer = NULL;
2022-03-22 02:42:02 +00:00
pSyncNode->pingTimerMS = pSyncNode->pingBaseLine;
2022-03-06 09:59:24 +00:00
atomic_store_64(&pSyncNode->pingTimerLogicClock, 0);
atomic_store_64(&pSyncNode->pingTimerLogicClockUser, 0);
2022-03-14 08:27:25 +00:00
pSyncNode->FpPingTimerCB = syncNodeEqPingTimer;
2022-02-28 08:36:57 +00:00
pSyncNode->pingTimerCounter = 0;
2022-02-27 02:22:15 +00:00
2022-03-07 06:18:46 +00:00
// init elect timer
pSyncNode->pElectTimer = NULL;
2022-03-22 02:42:02 +00:00
pSyncNode->electTimerMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine);
2022-03-07 06:18:46 +00:00
atomic_store_64(&pSyncNode->electTimerLogicClock, 0);
2022-03-14 08:27:25 +00:00
pSyncNode->FpElectTimerCB = syncNodeEqElectTimer;
2022-03-07 06:18:46 +00:00
pSyncNode->electTimerCounter = 0;
// init heartbeat timer
pSyncNode->pHeartbeatTimer = NULL;
2022-03-22 02:42:02 +00:00
pSyncNode->heartbeatTimerMS = pSyncNode->hbBaseLine;
2022-03-07 06:18:46 +00:00
atomic_store_64(&pSyncNode->heartbeatTimerLogicClock, 0);
atomic_store_64(&pSyncNode->heartbeatTimerLogicClockUser, 0);
2022-03-14 08:27:25 +00:00
pSyncNode->FpHeartbeatTimerCB = syncNodeEqHeartbeatTimer;
2022-03-07 06:18:46 +00:00
pSyncNode->heartbeatTimerCounter = 0;
// init peer heartbeat timer
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
syncHbTimerInit(pSyncNode, &(pSyncNode->peerHeartbeatTimerArr[i]), (pSyncNode->replicasId)[i]);
}
2022-04-18 13:50:56 +00:00
// tools
2022-07-12 08:57:19 +00:00
pSyncNode->pSyncRespMgr = syncRespMgrCreate(pSyncNode, SYNC_RESP_TTL_MS);
if (pSyncNode->pSyncRespMgr == NULL) {
2022-10-24 04:42:18 +00:00
sError("vgId:%d, failed to create SyncRespMgr", pSyncNode->vgId);
goto _error;
}
2022-04-18 13:50:56 +00:00
// restore state
pSyncNode->restoreFinish = false;
2022-06-01 03:28:33 +00:00
// snapshot senders
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
2022-06-01 03:28:33 +00:00
SSyncSnapshotSender* pSender = snapshotSenderCreate(pSyncNode, i);
if (pSender == NULL) return NULL;
pSyncNode->senders[i] = pSender;
sSDebug(pSender, "snapshot sender create while open sync node, data:%p", pSender);
2022-06-01 03:28:33 +00:00
}
// snapshot receivers
2022-06-10 08:51:17 +00:00
pSyncNode->pNewNodeReceiver = snapshotReceiverCreate(pSyncNode, EMPTY_RAFT_ID);
if (pSyncNode->pNewNodeReceiver == NULL) return NULL;
sRDebug(pSyncNode->pNewNodeReceiver, "snapshot receiver create while open sync node, data:%p",
pSyncNode->pNewNodeReceiver);
2022-06-01 13:23:39 +00:00
2022-06-20 09:48:56 +00:00
// is config changing
pSyncNode->changing = false;
2022-11-11 14:55:21 +00:00
// replication mgr
if (syncNodeLogReplInit(pSyncNode) < 0) {
sError("vgId:%d, failed to init repl mgr since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
2022-11-11 14:55:21 +00:00
2022-10-16 04:07:02 +00:00
// peer state
if (syncNodePeerStateInit(pSyncNode) < 0) {
sError("vgId:%d, failed to init peer stat since %s.", pSyncNode->vgId, terrstr());
goto _error;
}
2022-10-16 04:07:02 +00:00
2022-11-11 14:55:21 +00:00
//
2022-10-20 06:53:03 +00:00
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
2022-04-18 13:50:56 +00:00
// start in syncNodeStart
2022-03-16 07:09:56 +00:00
// start raft
2022-04-18 13:50:56 +00:00
// syncNodeBecomeFollower(pSyncNode);
2022-03-16 07:09:56 +00:00
2022-08-08 11:46:37 +00:00
int64_t timeNow = taosGetTimestampMs();
pSyncNode->startTime = timeNow;
pSyncNode->leaderTime = timeNow;
2022-08-08 11:46:37 +00:00
pSyncNode->lastReplicateTime = timeNow;
// snapshotting
atomic_store_64(&pSyncNode->snapshottingIndex, SYNC_INDEX_INVALID);
2022-10-31 04:59:42 +00:00
// init log buffer
if (syncLogBufferInit(pSyncNode->pLogBuf, pSyncNode) < 0) {
sError("vgId:%d, failed to init sync log buffer since %s", pSyncNode->vgId, terrstr());
goto _error;
2022-10-31 04:59:42 +00:00
}
pSyncNode->isStart = true;
pSyncNode->electNum = 0;
pSyncNode->becomeLeaderNum = 0;
pSyncNode->configChangeNum = 0;
pSyncNode->hbSlowNum = 0;
pSyncNode->hbrSlowNum = 0;
2022-11-30 03:20:03 +00:00
pSyncNode->tmrRoutineNum = 0;
sNInfo(pSyncNode, "sync open, node:%p electInterval:%d heartbeatInterval:%d heartbeatTimeout:%d", pSyncNode,
tsElectInterval, tsHeartbeatInterval, tsHeartbeatTimeout);
2022-02-26 16:02:18 +00:00
return pSyncNode;
_error:
if (pSyncInfo->pFsm) {
2022-10-13 06:06:27 +00:00
taosMemoryFree(pSyncInfo->pFsm);
pSyncInfo->pFsm = NULL;
}
syncNodeClose(pSyncNode);
pSyncNode = NULL;
return NULL;
2022-02-26 16:02:18 +00:00
}
2022-07-16 07:34:22 +00:00
void syncNodeMaybeUpdateCommitBySnapshot(SSyncNode* pSyncNode) {
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
SSnapshot snapshot = {0};
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
2022-07-16 07:34:22 +00:00
if (snapshot.lastApplyIndex > pSyncNode->commitIndex) {
pSyncNode->commitIndex = snapshot.lastApplyIndex;
}
}
}
2022-10-31 04:59:42 +00:00
int32_t syncNodeRestore(SSyncNode* pSyncNode) {
2022-12-07 14:24:47 +00:00
ASSERTS(pSyncNode->pLogStore != NULL, "log store not created");
ASSERTS(pSyncNode->pLogBuf != NULL, "ring log buffer not created");
2022-10-31 04:59:42 +00:00
SyncIndex lastVer = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
SyncIndex commitIndex = pSyncNode->pLogStore->syncLogCommitIndex(pSyncNode->pLogStore);
SyncIndex endIndex = pSyncNode->pLogBuf->endIndex;
if (lastVer != -1 && endIndex != lastVer + 1) {
terrno = TSDB_CODE_WAL_LOG_INCOMPLETE;
2023-02-06 16:12:05 +00:00
sError("vgId:%d, failed to restore sync node since %s. expected lastLogIndex:%" PRId64 ", lastVer:%" PRId64 "",
pSyncNode->vgId, terrstr(), endIndex - 1, lastVer);
return -1;
}
2022-10-31 04:59:42 +00:00
ASSERT(endIndex == lastVer + 1);
pSyncNode->commitIndex = TMAX(pSyncNode->commitIndex, commitIndex);
sInfo("vgId:%d, restore sync until commitIndex:%" PRId64, pSyncNode->vgId, pSyncNode->commitIndex);
2022-10-31 04:59:42 +00:00
if (syncLogBufferCommit(pSyncNode->pLogBuf, pSyncNode, pSyncNode->commitIndex) < 0) {
2022-10-31 04:59:42 +00:00
return -1;
}
return 0;
}
int32_t syncNodeStart(SSyncNode* pSyncNode) {
// start raft
2023-04-18 11:03:45 +00:00
if(pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex].nodeRole == TAOS_SYNC_ROLE_LEARNER){
syncNodeBecomeLearner(pSyncNode, "first start");
}
else{
if (pSyncNode->replicaNum == 1) {
raftStoreNextTerm(pSyncNode);
syncNodeBecomeLeader(pSyncNode, "one replica start");
2022-10-31 04:59:42 +00:00
2023-04-18 11:03:45 +00:00
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
} else {
syncNodeBecomeFollower(pSyncNode, "first start");
}
2022-10-31 04:59:42 +00:00
}
int32_t ret = 0;
ret = syncNodeStartPingTimer(pSyncNode);
if (ret != 0) {
sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
}
return ret;
2022-04-18 13:50:56 +00:00
}
2022-10-31 04:59:42 +00:00
int32_t syncNodeStartStandBy(SSyncNode* pSyncNode) {
2022-05-13 04:12:37 +00:00
// state change
pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER;
syncNodeStopHeartbeatTimer(pSyncNode);
// reset elect timer, long enough
int32_t electMS = TIMER_MAX_MS;
int32_t ret = syncNodeRestartElectTimer(pSyncNode, electMS);
if (ret < 0) {
sError("vgId:%d, failed to restart elect timer since %s", pSyncNode->vgId, terrstr());
return -1;
}
ret = syncNodeStartPingTimer(pSyncNode);
if (ret < 0) {
sError("vgId:%d, failed to start ping timer since %s", pSyncNode->vgId, terrstr());
return -1;
}
2022-10-31 04:59:42 +00:00
return ret;
2022-05-13 04:12:37 +00:00
}
2022-11-03 01:39:20 +00:00
void syncNodePreClose(SSyncNode* pSyncNode) {
ASSERT(pSyncNode != NULL);
ASSERT(pSyncNode->pFsm != NULL);
ASSERT(pSyncNode->pFsm->FpApplyQueueItems != NULL);
2022-11-03 01:39:20 +00:00
// stop elect timer
syncNodeStopElectTimer(pSyncNode);
// stop heartbeat timer
syncNodeStopHeartbeatTimer(pSyncNode);
// stop ping timer
syncNodeStopPingTimer(pSyncNode);
// clean rsp
syncRespCleanRsp(pSyncNode->pSyncRespMgr);
2022-11-03 01:39:20 +00:00
}
void syncNodePostClose(SSyncNode* pSyncNode) {
if (pSyncNode->pNewNodeReceiver != NULL) {
if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) {
2023-01-05 08:15:42 +00:00
snapshotReceiverStop(pSyncNode->pNewNodeReceiver);
}
sDebug("vgId:%d, snapshot receiver destroy while preclose sync node, data:%p", pSyncNode->vgId,
pSyncNode->pNewNodeReceiver);
snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver);
pSyncNode->pNewNodeReceiver = NULL;
}
2022-11-03 01:39:20 +00:00
}
void syncHbTimerDataFree(SSyncHbTimerData* pData) { taosMemoryFree(pData); }
2022-03-14 08:27:25 +00:00
2022-02-26 16:02:18 +00:00
void syncNodeClose(SSyncNode* pSyncNode) {
2022-11-18 02:04:28 +00:00
if (pSyncNode == NULL) return;
sNInfo(pSyncNode, "sync close, node:%p", pSyncNode);
2022-10-25 06:28:34 +00:00
syncRespCleanRsp(pSyncNode->pSyncRespMgr);
2023-02-13 11:00:10 +00:00
syncNodeStopPingTimer(pSyncNode);
syncNodeStopElectTimer(pSyncNode);
syncNodeStopHeartbeatTimer(pSyncNode);
syncNodeLogReplDestroy(pSyncNode);
2023-02-13 11:00:10 +00:00
2022-05-09 12:14:31 +00:00
syncRespMgrDestroy(pSyncNode->pSyncRespMgr);
pSyncNode->pSyncRespMgr = NULL;
2022-03-14 08:27:25 +00:00
voteGrantedDestroy(pSyncNode->pVotesGranted);
pSyncNode->pVotesGranted = NULL;
2022-03-14 08:27:25 +00:00
votesRespondDestory(pSyncNode->pVotesRespond);
pSyncNode->pVotesRespond = NULL;
2022-03-14 08:27:25 +00:00
syncIndexMgrDestroy(pSyncNode->pNextIndex);
pSyncNode->pNextIndex = NULL;
2022-03-14 08:27:25 +00:00
syncIndexMgrDestroy(pSyncNode->pMatchIndex);
pSyncNode->pMatchIndex = NULL;
2022-03-14 08:27:25 +00:00
logStoreDestory(pSyncNode->pLogStore);
pSyncNode->pLogStore = NULL;
2022-10-31 04:59:42 +00:00
syncLogBufferDestroy(pSyncNode->pLogBuf);
pSyncNode->pLogBuf = NULL;
2022-03-14 08:27:25 +00:00
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
if (pSyncNode->senders[i] != NULL) {
sDebug("vgId:%d, snapshot sender destroy while close, data:%p", pSyncNode->vgId, pSyncNode->senders[i]);
if (snapshotSenderIsStart(pSyncNode->senders[i])) {
snapshotSenderStop(pSyncNode->senders[i], false);
}
snapshotSenderDestroy(pSyncNode->senders[i]);
pSyncNode->senders[i] = NULL;
2022-06-01 03:28:33 +00:00
}
}
2022-06-01 13:23:39 +00:00
if (pSyncNode->pNewNodeReceiver != NULL) {
if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) {
2023-01-05 08:15:42 +00:00
snapshotReceiverStop(pSyncNode->pNewNodeReceiver);
}
sDebug("vgId:%d, snapshot receiver destroy while close, data:%p", pSyncNode->vgId, pSyncNode->pNewNodeReceiver);
2022-06-01 13:23:39 +00:00
snapshotReceiverDestroy(pSyncNode->pNewNodeReceiver);
pSyncNode->pNewNodeReceiver = NULL;
}
if (pSyncNode->pFsm != NULL) {
taosMemoryFree(pSyncNode->pFsm);
}
2023-02-13 11:00:10 +00:00
raftStoreClose(pSyncNode);
taosMemoryFree(pSyncNode);
2022-02-26 16:02:18 +00:00
}
ESyncStrategy syncNodeStrategy(SSyncNode* pSyncNode) { return pSyncNode->raftCfg.snapshotStrategy; }
2022-05-30 05:14:48 +00:00
2022-03-14 08:27:25 +00:00
// timer control --------------
int32_t syncNodeStartPingTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
2022-10-31 15:40:43 +00:00
if (syncIsInit()) {
taosTmrReset(pSyncNode->FpPingTimerCB, pSyncNode->pingTimerMS, pSyncNode, syncEnv()->pTimerManager,
2022-06-15 03:42:04 +00:00
&pSyncNode->pPingTimer);
atomic_store_64(&pSyncNode->pingTimerLogicClock, pSyncNode->pingTimerLogicClockUser);
} else {
2022-07-12 07:04:32 +00:00
sError("vgId:%d, start ping timer error, sync env is stop", pSyncNode->vgId);
2022-06-15 03:42:04 +00:00
}
2022-03-14 08:27:25 +00:00
return ret;
}
int32_t syncNodeStopPingTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
atomic_add_fetch_64(&pSyncNode->pingTimerLogicClockUser, 1);
taosTmrStop(pSyncNode->pPingTimer);
pSyncNode->pPingTimer = NULL;
return ret;
}
int32_t syncNodeStartElectTimer(SSyncNode* pSyncNode, int32_t ms) {
int32_t ret = 0;
2022-10-31 15:40:43 +00:00
if (syncIsInit()) {
2022-06-15 03:42:04 +00:00
pSyncNode->electTimerMS = ms;
2022-11-16 03:13:53 +00:00
2022-11-16 06:05:34 +00:00
int64_t execTime = taosGetTimestampMs() + ms;
atomic_store_64(&(pSyncNode->electTimerParam.executeTime), execTime);
atomic_store_64(&(pSyncNode->electTimerParam.logicClock), pSyncNode->electTimerLogicClock);
pSyncNode->electTimerParam.pSyncNode = pSyncNode;
pSyncNode->electTimerParam.pData = NULL;
2022-11-16 03:13:53 +00:00
2022-11-28 10:21:46 +00:00
taosTmrReset(pSyncNode->FpElectTimerCB, pSyncNode->electTimerMS, (void*)(pSyncNode->rid), syncEnv()->pTimerManager,
2022-06-15 03:42:04 +00:00
&pSyncNode->pElectTimer);
2022-06-15 03:42:04 +00:00
} else {
2022-07-12 07:04:32 +00:00
sError("vgId:%d, start elect timer error, sync env is stop", pSyncNode->vgId);
2022-06-15 03:42:04 +00:00
}
2022-03-14 08:27:25 +00:00
return ret;
}
int32_t syncNodeStopElectTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
2022-10-25 10:03:22 +00:00
atomic_add_fetch_64(&pSyncNode->electTimerLogicClock, 1);
2022-03-14 08:27:25 +00:00
taosTmrStop(pSyncNode->pElectTimer);
pSyncNode->pElectTimer = NULL;
2022-03-14 08:27:25 +00:00
return ret;
}
int32_t syncNodeRestartElectTimer(SSyncNode* pSyncNode, int32_t ms) {
int32_t ret = 0;
syncNodeStopElectTimer(pSyncNode);
syncNodeStartElectTimer(pSyncNode, ms);
return ret;
}
void syncNodeResetElectTimer(SSyncNode* pSyncNode) {
2022-06-09 06:14:45 +00:00
int32_t electMS;
if (pSyncNode->raftCfg.isStandBy) {
2022-06-09 06:14:45 +00:00
electMS = TIMER_MAX_MS;
} else {
electMS = syncUtilElectRandomMS(pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine);
}
(void)syncNodeRestartElectTimer(pSyncNode, electMS);
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "reset elect timer, min:%d, max:%d, ms:%d", pSyncNode->electBaseLine, 2 * pSyncNode->electBaseLine,
electMS);
2022-03-14 10:44:53 +00:00
}
2022-08-08 11:46:37 +00:00
static int32_t syncNodeDoStartHeartbeatTimer(SSyncNode* pSyncNode) {
2022-03-14 08:27:25 +00:00
int32_t ret = 0;
2022-10-31 15:40:43 +00:00
if (syncIsInit()) {
taosTmrReset(pSyncNode->FpHeartbeatTimerCB, pSyncNode->heartbeatTimerMS, pSyncNode, syncEnv()->pTimerManager,
2022-06-15 03:42:04 +00:00
&pSyncNode->pHeartbeatTimer);
atomic_store_64(&pSyncNode->heartbeatTimerLogicClock, pSyncNode->heartbeatTimerLogicClockUser);
} else {
2022-07-12 07:04:32 +00:00
sError("vgId:%d, start heartbeat timer error, sync env is stop", pSyncNode->vgId);
2022-06-15 03:42:04 +00:00
}
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "start heartbeat timer, ms:%d", pSyncNode->heartbeatTimerMS);
2022-03-14 08:27:25 +00:00
return ret;
}
2022-08-08 11:46:37 +00:00
int32_t syncNodeStartHeartbeatTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
2022-10-15 01:28:55 +00:00
#if 0
2022-08-08 11:46:37 +00:00
pSyncNode->heartbeatTimerMS = pSyncNode->hbBaseLine;
ret = syncNodeDoStartHeartbeatTimer(pSyncNode);
#endif
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
SSyncTimer* pSyncTimer = syncNodeGetHbTimer(pSyncNode, &(pSyncNode->peersId[i]));
2022-10-26 02:56:50 +00:00
if (pSyncTimer != NULL) {
syncHbTimerStart(pSyncNode, pSyncTimer);
}
}
2022-08-08 07:10:32 +00:00
return ret;
}
2022-03-14 08:27:25 +00:00
int32_t syncNodeStopHeartbeatTimer(SSyncNode* pSyncNode) {
int32_t ret = 0;
#if 0
2022-03-14 08:27:25 +00:00
atomic_add_fetch_64(&pSyncNode->heartbeatTimerLogicClockUser, 1);
taosTmrStop(pSyncNode->pHeartbeatTimer);
pSyncNode->pHeartbeatTimer = NULL;
#endif
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
SSyncTimer* pSyncTimer = syncNodeGetHbTimer(pSyncNode, &(pSyncNode->peersId[i]));
2022-10-26 02:56:50 +00:00
if (pSyncTimer != NULL) {
syncHbTimerStop(pSyncNode, pSyncTimer);
}
}
2022-03-14 08:27:25 +00:00
return ret;
}
int32_t syncNodeRestartHeartbeatTimer(SSyncNode* pSyncNode) {
syncNodeStopHeartbeatTimer(pSyncNode);
syncNodeStartHeartbeatTimer(pSyncNode);
return 0;
}
int32_t syncNodeSendMsgById(const SRaftId* destRaftId, SSyncNode* pNode, SRpcMsg* pMsg) {
SEpSet* epSet = NULL;
for (int32_t i = 0; i < pNode->peersNum; ++i) {
if (destRaftId->addr == pNode->peersId[i].addr) {
epSet = &pNode->peersEpset[i];
break;
}
}
2023-01-04 03:48:30 +00:00
int32_t code = -1;
if (pNode->syncSendMSg != NULL && epSet != NULL) {
2022-04-18 13:50:56 +00:00
syncUtilMsgHtoN(pMsg->pCont);
2022-05-20 02:30:48 +00:00
pMsg->info.noResp = 1;
2023-01-04 03:48:30 +00:00
code = pNode->syncSendMSg(epSet, pMsg);
}
if (code < 0) {
sError("vgId:%d, sync send msg by id error, epset:%p dnode:%d addr:%" PRId64 " err:0x%x", pNode->vgId, epSet,
DID(destRaftId), destRaftId->addr, terrno);
2022-11-12 12:37:15 +00:00
rpcFreeCont(pMsg->pCont);
terrno = TSDB_CODE_SYN_INTERNAL_ERROR;
2022-04-18 13:50:56 +00:00
}
2023-01-04 03:48:30 +00:00
return code;
2022-03-14 08:27:25 +00:00
}
inline bool syncNodeInConfig(SSyncNode* pNode, const SSyncCfg* pCfg) {
bool b1 = false;
bool b2 = false;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
if (strcmp(pCfg->nodeInfo[i].nodeFqdn, pNode->myNodeInfo.nodeFqdn) == 0 &&
pCfg->nodeInfo[i].nodePort == pNode->myNodeInfo.nodePort) {
b1 = true;
break;
}
}
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pCfg->totalReplicaNum; ++i) {
SRaftId raftId = {
.addr = SYNC_ADDR(&pCfg->nodeInfo[i]),
.vgId = pNode->vgId,
};
if (syncUtilSameId(&raftId, &pNode->myRaftId)) {
b2 = true;
break;
}
}
ASSERT(b1 == b2);
return b1;
}
2022-10-21 07:33:06 +00:00
static bool syncIsConfigChanged(const SSyncCfg* pOldCfg, const SSyncCfg* pNewCfg) {
2023-04-18 11:03:45 +00:00
if (pOldCfg->totalReplicaNum != pNewCfg->totalReplicaNum) return true;
2022-10-21 07:33:06 +00:00
if (pOldCfg->myIndex != pNewCfg->myIndex) return true;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pOldCfg->totalReplicaNum; ++i) {
2022-10-21 07:33:06 +00:00
const SNodeInfo* pOldInfo = &pOldCfg->nodeInfo[i];
const SNodeInfo* pNewInfo = &pNewCfg->nodeInfo[i];
if (strcmp(pOldInfo->nodeFqdn, pNewInfo->nodeFqdn) != 0) return true;
if (pOldInfo->nodePort != pNewInfo->nodePort) return true;
2023-04-18 11:03:45 +00:00
if(pOldInfo->nodeRole != pNewInfo->nodeRole) return true;
2022-10-21 07:33:06 +00:00
}
return false;
}
2022-06-20 12:07:36 +00:00
void syncNodeDoConfigChange(SSyncNode* pSyncNode, SSyncCfg* pNewConfig, SyncIndex lastConfigChangeIndex) {
SSyncCfg oldConfig = pSyncNode->raftCfg.cfg;
2022-10-21 07:33:06 +00:00
if (!syncIsConfigChanged(&oldConfig, pNewConfig)) {
sInfo("vgId:1, sync not reconfig since not changed");
return;
}
2022-10-31 11:57:27 +00:00
pSyncNode->raftCfg.cfg = *pNewConfig;
pSyncNode->raftCfg.lastConfigIndex = lastConfigChangeIndex;
2022-06-13 06:54:38 +00:00
pSyncNode->configChangeNum++;
2022-06-20 12:07:36 +00:00
bool IamInOld = syncNodeInConfig(pSyncNode, &oldConfig);
bool IamInNew = syncNodeInConfig(pSyncNode, pNewConfig);
2022-05-12 07:23:41 +00:00
2022-06-20 12:07:36 +00:00
bool isDrop = false;
bool isAdd = false;
2022-06-18 07:17:58 +00:00
2022-06-20 12:07:36 +00:00
if (IamInOld && !IamInNew) {
isDrop = true;
} else {
isDrop = false;
}
2022-06-20 12:07:36 +00:00
if (!IamInOld && IamInNew) {
isAdd = true;
} else {
isAdd = false;
}
2022-05-12 07:23:41 +00:00
2022-06-23 03:09:01 +00:00
// log begin config change
2023-04-18 11:03:45 +00:00
sNInfo(pSyncNode, "begin do config change, from %d to %d, from %" PRId64 " to %" PRId64 ", replicas:%d",
pSyncNode->vgId,
oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum,
oldConfig.lastIndex, pNewConfig->lastIndex);
2022-06-23 03:09:01 +00:00
2022-06-20 12:07:36 +00:00
if (IamInNew) {
pSyncNode->raftCfg.isStandBy = 0; // change isStandBy to normal
2022-05-12 07:23:41 +00:00
}
2022-06-20 12:07:36 +00:00
if (isDrop) {
pSyncNode->raftCfg.isStandBy = 1; // set standby
2022-05-12 07:23:41 +00:00
}
2022-06-21 07:09:23 +00:00
// add last config index
syncAddCfgIndex(pSyncNode, lastConfigChangeIndex);
2022-05-16 06:10:18 +00:00
2022-06-20 12:07:36 +00:00
if (IamInNew) {
//-----------------------------------------
int32_t ret = 0;
// save snapshot senders
2023-04-18 11:03:45 +00:00
SRaftId oldReplicasId[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
2022-06-20 12:07:36 +00:00
memcpy(oldReplicasId, pSyncNode->replicasId, sizeof(oldReplicasId));
2023-04-18 11:03:45 +00:00
SSyncSnapshotSender* oldSenders[TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA];
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
oldSenders[i] = pSyncNode->senders[i];
2022-11-07 05:51:37 +00:00
sSTrace(oldSenders[i], "snapshot sender save old");
2022-06-20 12:07:36 +00:00
}
2022-06-20 12:07:36 +00:00
// init internal
pSyncNode->myNodeInfo = pSyncNode->raftCfg.cfg.nodeInfo[pSyncNode->raftCfg.cfg.myIndex];
2022-11-10 02:04:05 +00:00
syncUtilNodeInfo2RaftId(&pSyncNode->myNodeInfo, pSyncNode->vgId, &pSyncNode->myRaftId);
2022-06-20 12:07:36 +00:00
// init peersNum, peers, peersId
2023-04-18 11:03:45 +00:00
pSyncNode->peersNum = pSyncNode->raftCfg.cfg.totalReplicaNum - 1;
2022-11-07 15:31:01 +00:00
int32_t j = 0;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
if (i != pSyncNode->raftCfg.cfg.myIndex) {
pSyncNode->peersNodeInfo[j] = pSyncNode->raftCfg.cfg.nodeInfo[i];
syncUtilNodeInfo2EpSet(&pSyncNode->peersNodeInfo[j], &pSyncNode->peersEpset[j]);
2022-06-20 12:07:36 +00:00
j++;
}
}
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
2022-11-10 02:04:05 +00:00
syncUtilNodeInfo2RaftId(&pSyncNode->peersNodeInfo[i], pSyncNode->vgId, &pSyncNode->peersId[i]);
2022-06-20 12:07:36 +00:00
}
2022-06-20 12:07:36 +00:00
// init replicaNum, replicasId
pSyncNode->replicaNum = pSyncNode->raftCfg.cfg.replicaNum;
2023-04-18 11:03:45 +00:00
pSyncNode->totalReplicaNum = pSyncNode->raftCfg.cfg.totalReplicaNum;
for (int32_t i = 0; i < pSyncNode->raftCfg.cfg.totalReplicaNum; ++i) {
syncUtilNodeInfo2RaftId(&pSyncNode->raftCfg.cfg.nodeInfo[i], pSyncNode->vgId, &pSyncNode->replicasId[i]);
2022-06-20 12:07:36 +00:00
}
// update quorum first
pSyncNode->quorum = syncUtilQuorum(pSyncNode->raftCfg.cfg.replicaNum);
2022-06-20 12:07:36 +00:00
syncIndexMgrUpdate(pSyncNode->pNextIndex, pSyncNode);
syncIndexMgrUpdate(pSyncNode->pMatchIndex, pSyncNode);
voteGrantedUpdate(pSyncNode->pVotesGranted, pSyncNode);
votesRespondUpdate(pSyncNode->pVotesRespond, pSyncNode);
2022-06-18 07:17:58 +00:00
2022-06-20 12:07:36 +00:00
// reset snapshot senders
2022-06-20 12:07:36 +00:00
// clear new
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
pSyncNode->senders[i] = NULL;
2022-06-20 12:07:36 +00:00
}
2022-06-18 07:17:58 +00:00
2022-06-20 12:07:36 +00:00
// reset new
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pSyncNode->totalReplicaNum; ++i) {
2022-06-20 12:07:36 +00:00
// reset sender
bool reset = false;
2023-04-18 11:03:45 +00:00
for (int32_t j = 0; j < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++j) {
2022-11-18 10:58:50 +00:00
if (syncUtilSameId(&(pSyncNode->replicasId)[i], &oldReplicasId[j]) && oldSenders[j] != NULL) {
sNTrace(pSyncNode, "snapshot sender reset for:%" PRId64 ", newIndex:%d, dnode:%d, %p",
(pSyncNode->replicasId)[i].addr, i, DID(&pSyncNode->replicasId[i]), oldSenders[j]);
2022-06-20 12:07:36 +00:00
pSyncNode->senders[i] = oldSenders[j];
2022-06-20 12:07:36 +00:00
oldSenders[j] = NULL;
reset = true;
// reset replicaIndex
int32_t oldreplicaIndex = pSyncNode->senders[i]->replicaIndex;
pSyncNode->senders[i]->replicaIndex = i;
2022-06-20 12:07:36 +00:00
sNTrace(pSyncNode, "snapshot sender udpate replicaIndex from %d to %d, dnode:%d, %p, reset:%d",
oldreplicaIndex, i, DID(&pSyncNode->replicasId[i]), pSyncNode->senders[i], reset);
2022-11-18 10:58:50 +00:00
break;
2022-06-20 12:07:36 +00:00
}
}
}
2022-06-20 12:07:36 +00:00
// create new
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
if (pSyncNode->senders[i] == NULL) {
pSyncNode->senders[i] = snapshotSenderCreate(pSyncNode, i);
if (pSyncNode->senders[i] == NULL) {
// will be created later while send snapshot
sSError(pSyncNode->senders[i], "snapshot sender create failed while reconfig");
} else {
sSDebug(pSyncNode->senders[i], "snapshot sender create while reconfig, data:%p", pSyncNode->senders[i]);
}
2022-11-18 02:04:28 +00:00
} else {
sSDebug(pSyncNode->senders[i], "snapshot sender already exist, data:%p", pSyncNode->senders[i]);
2022-06-20 12:07:36 +00:00
}
}
2022-06-20 12:07:36 +00:00
// free old
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
2022-06-20 12:07:36 +00:00
if (oldSenders[i] != NULL) {
sSDebug(oldSenders[i], "snapshot sender destroy old, data:%p replica-index:%d", oldSenders[i], i);
2022-06-20 12:07:36 +00:00
snapshotSenderDestroy(oldSenders[i]);
oldSenders[i] = NULL;
}
}
// persist cfg
syncWriteCfgFile(pSyncNode);
2022-06-20 12:07:36 +00:00
// change isStandBy to normal (election timeout)
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
syncNodeBecomeLeader(pSyncNode, "");
// Raft 3.6.2 Committing entries from previous terms
syncNodeAppendNoop(pSyncNode);
// syncMaybeAdvanceCommitIndex(pSyncNode);
2022-06-20 12:07:36 +00:00
} else {
syncNodeBecomeFollower(pSyncNode, "");
2022-06-20 12:07:36 +00:00
}
} else {
// persist cfg
syncWriteCfgFile(pSyncNode);
2023-04-18 11:03:45 +00:00
sNInfo(pSyncNode, "do not config change from %d to %d", oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum);
2022-06-13 06:54:38 +00:00
}
2022-06-10 08:51:17 +00:00
2022-06-20 12:07:36 +00:00
_END:
2022-06-23 03:09:01 +00:00
// log end config change
2023-04-18 11:03:45 +00:00
sNInfo(pSyncNode, "end do config change, from %d to %d", oldConfig.totalReplicaNum, pNewConfig->totalReplicaNum);
2022-03-21 11:26:48 +00:00
}
2022-03-14 10:44:53 +00:00
// raft state change --------------
void syncNodeUpdateTerm(SSyncNode* pSyncNode, SyncTerm term) {
2023-02-13 11:00:10 +00:00
if (term > raftStoreGetTerm(pSyncNode)) {
2023-01-09 04:01:36 +00:00
raftStoreSetTerm(pSyncNode, term);
char tmpBuf[64];
snprintf(tmpBuf, sizeof(tmpBuf), "update term to %" PRId64, term);
syncNodeBecomeFollower(pSyncNode, tmpBuf);
2023-01-09 04:01:36 +00:00
raftStoreClearVote(pSyncNode);
2022-03-14 10:44:53 +00:00
}
}
void syncNodeUpdateTermWithoutStepDown(SSyncNode* pSyncNode, SyncTerm term) {
2023-02-13 11:00:10 +00:00
if (term > raftStoreGetTerm(pSyncNode)) {
2023-01-09 04:01:36 +00:00
raftStoreSetTerm(pSyncNode, term);
}
}
2022-10-19 08:08:42 +00:00
void syncNodeStepDown(SSyncNode* pSyncNode, SyncTerm newTerm) {
2023-02-13 11:00:10 +00:00
SyncTerm currentTerm = raftStoreGetTerm(pSyncNode);
if (currentTerm > newTerm) {
sNTrace(pSyncNode, "step down, ignore, new-term:%" PRId64 ", current-term:%" PRId64, newTerm, currentTerm);
2022-11-02 03:24:42 +00:00
return;
}
2022-10-19 08:08:42 +00:00
do {
2023-02-13 11:00:10 +00:00
sNTrace(pSyncNode, "step down, new-term:%" PRId64 ", current-term:%" PRId64, newTerm, currentTerm);
2022-10-19 08:08:42 +00:00
} while (0);
2023-02-13 11:00:10 +00:00
if (currentTerm < newTerm) {
2023-01-09 04:01:36 +00:00
raftStoreSetTerm(pSyncNode, newTerm);
2022-10-19 08:08:42 +00:00
char tmpBuf[64];
snprintf(tmpBuf, sizeof(tmpBuf), "step down, update term to %" PRId64, newTerm);
2022-10-19 08:08:42 +00:00
syncNodeBecomeFollower(pSyncNode, tmpBuf);
2023-01-09 04:01:36 +00:00
raftStoreClearVote(pSyncNode);
2022-10-19 08:08:42 +00:00
} else {
if (pSyncNode->state != TAOS_SYNC_STATE_FOLLOWER) {
syncNodeBecomeFollower(pSyncNode, "step down");
}
}
}
void syncNodeLeaderChangeRsp(SSyncNode* pSyncNode) { syncRespCleanRsp(pSyncNode->pSyncRespMgr); }
2022-06-10 07:19:11 +00:00
void syncNodeBecomeFollower(SSyncNode* pSyncNode, const char* debugStr) {
2022-03-16 08:20:01 +00:00
// maybe clear leader cache
2022-03-14 10:44:53 +00:00
if (pSyncNode->state == TAOS_SYNC_STATE_LEADER) {
pSyncNode->leaderCache = EMPTY_RAFT_ID;
}
pSyncNode->hbSlowNum = 0;
2022-03-16 08:20:01 +00:00
// state change
2022-03-14 10:44:53 +00:00
pSyncNode->state = TAOS_SYNC_STATE_FOLLOWER;
syncNodeStopHeartbeatTimer(pSyncNode);
// trace log
sNTrace(pSyncNode, "become follower %s", debugStr);
2022-06-18 07:17:58 +00:00
// send rsp to client
syncNodeLeaderChangeRsp(pSyncNode);
// call back
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeFollowerCb != NULL) {
pSyncNode->pFsm->FpBecomeFollowerCb(pSyncNode->pFsm);
}
2022-10-20 06:53:03 +00:00
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
2022-10-31 04:59:42 +00:00
// reset log buffer
syncLogBufferReset(pSyncNode->pLogBuf, pSyncNode);
// reset elect timer
syncNodeResetElectTimer(pSyncNode);
2022-03-14 10:44:53 +00:00
}
2023-04-18 11:03:45 +00:00
void syncNodeBecomeLearner(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->hbSlowNum = 0;
// state change
pSyncNode->state = TAOS_SYNC_STATE_LEARNER;
// trace log
sNTrace(pSyncNode, "become learner %s", debugStr);
// call back
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeLearnerCb != NULL) {
pSyncNode->pFsm->FpBecomeLearnerCb(pSyncNode->pFsm);
}
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
// reset log buffer
syncLogBufferReset(pSyncNode->pLogBuf, pSyncNode);
}
2022-03-14 10:44:53 +00:00
// TLA+ Spec
// \* Candidate i transitions to leader.
// BecomeLeader(i) ==
// /\ state[i] = Candidate
// /\ votesGranted[i] \in Quorum
// /\ state' = [state EXCEPT ![i] = Leader]
// /\ nextIndex' = [nextIndex EXCEPT ![i] =
// [j \in Server |-> Len(log[i]) + 1]]
// /\ matchIndex' = [matchIndex EXCEPT ![i] =
// [j \in Server |-> 0]]
// /\ elections' = elections \cup
// {[eterm |-> currentTerm[i],
// eleader |-> i,
// elog |-> log[i],
// evotes |-> votesGranted[i],
// evoterLog |-> voterLog[i]]}
// /\ UNCHANGED <<messages, currentTerm, votedFor, candidateVars, logVars>>
//
2022-06-10 07:19:11 +00:00
void syncNodeBecomeLeader(SSyncNode* pSyncNode, const char* debugStr) {
pSyncNode->leaderTime = taosGetTimestampMs();
pSyncNode->becomeLeaderNum++;
pSyncNode->hbrSlowNum = 0;
// reset restoreFinish
pSyncNode->restoreFinish = false;
2022-03-16 08:20:01 +00:00
// state change
2022-03-14 10:44:53 +00:00
pSyncNode->state = TAOS_SYNC_STATE_LEADER;
2022-03-16 08:20:01 +00:00
// set leader cache
2022-03-14 10:44:53 +00:00
pSyncNode->leaderCache = pSyncNode->myRaftId;
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->pNextIndex->replicaNum; ++i) {
SyncIndex lastIndex;
SyncTerm lastTerm;
int32_t code = syncNodeGetLastIndexTerm(pSyncNode, &lastIndex, &lastTerm);
ASSERT(code == 0);
pSyncNode->pNextIndex->index[i] = lastIndex + 1;
2022-03-14 10:44:53 +00:00
}
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) {
2022-03-16 08:20:01 +00:00
// maybe overwrite myself, no harm
// just do it!
2022-03-14 10:44:53 +00:00
pSyncNode->pMatchIndex->index[i] = SYNC_INDEX_INVALID;
}
2022-10-19 08:08:42 +00:00
// init peer mgr
syncNodePeerStateInit(pSyncNode);
2022-11-02 07:25:14 +00:00
#if 0
// update sender private term
SSyncSnapshotSender* pMySender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->myRaftId));
if (pMySender != NULL) {
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->pMatchIndex->replicaNum; ++i) {
if (pSyncNode->senders[i]->privateTerm > pMySender->privateTerm) {
pMySender->privateTerm = pSyncNode->senders[i]->privateTerm;
}
}
(pMySender->privateTerm) += 100;
}
2022-11-02 07:25:14 +00:00
#endif
// close receiver
if (snapshotReceiverIsStart(pSyncNode->pNewNodeReceiver)) {
2023-01-05 08:15:42 +00:00
snapshotReceiverStop(pSyncNode->pNewNodeReceiver);
}
2022-03-16 08:20:01 +00:00
// stop elect timer
2022-03-14 10:44:53 +00:00
syncNodeStopElectTimer(pSyncNode);
2022-03-16 08:20:01 +00:00
2022-03-16 12:03:22 +00:00
// start heartbeat timer
syncNodeStartHeartbeatTimer(pSyncNode);
2022-06-18 07:17:58 +00:00
2022-10-15 01:28:55 +00:00
// send heartbeat right now
syncNodeHeartbeatPeers(pSyncNode);
2022-06-18 07:17:58 +00:00
// call back
if (pSyncNode->pFsm != NULL && pSyncNode->pFsm->FpBecomeLeaderCb != NULL) {
pSyncNode->pFsm->FpBecomeLeaderCb(pSyncNode->pFsm);
}
2022-10-20 06:53:03 +00:00
// min match index
pSyncNode->minMatchIndex = SYNC_INDEX_INVALID;
2022-10-31 04:59:42 +00:00
// reset log buffer
syncLogBufferReset(pSyncNode->pLogBuf, pSyncNode);
2022-06-18 07:17:58 +00:00
// trace log
sNInfo(pSyncNode, "become leader %s", debugStr);
2022-03-14 10:44:53 +00:00
}
void syncNodeCandidate2Leader(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
bool granted = voteGrantedMajority(pSyncNode->pVotesGranted);
if (!granted) {
sError("vgId:%d, not granted by majority.", pSyncNode->vgId);
return;
}
2022-06-10 07:19:11 +00:00
syncNodeBecomeLeader(pSyncNode, "candidate to leader");
2022-03-24 03:40:36 +00:00
2022-11-07 11:13:12 +00:00
sNTrace(pSyncNode, "state change syncNodeCandidate2Leader");
2022-04-18 13:50:56 +00:00
2022-11-11 14:55:21 +00:00
int32_t ret = syncNodeAppendNoop(pSyncNode);
2022-11-17 09:21:51 +00:00
if (ret < 0) {
sError("vgId:%d, failed to append noop entry since %s", pSyncNode->vgId, terrstr());
}
2022-11-11 14:55:21 +00:00
SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
ASSERT(lastIndex >= 0);
2023-02-13 11:00:10 +00:00
sInfo("vgId:%d, become leader. term:%" PRId64 ", commit index:%" PRId64 ", last index:%" PRId64 "", pSyncNode->vgId,
raftStoreGetTerm(pSyncNode), pSyncNode->commitIndex, lastIndex);
2022-10-31 04:59:42 +00:00
}
2022-10-15 01:28:55 +00:00
bool syncNodeIsMnode(SSyncNode* pSyncNode) { return (pSyncNode->vgId == 1); }
2022-10-16 04:07:02 +00:00
int32_t syncNodePeerStateInit(SSyncNode* pSyncNode) {
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < TSDB_MAX_REPLICA + TSDB_MAX_LEARNER_REPLICA; ++i) {
2022-10-16 04:07:02 +00:00
pSyncNode->peerStates[i].lastSendIndex = SYNC_INDEX_INVALID;
pSyncNode->peerStates[i].lastSendTime = 0;
}
return 0;
2022-03-14 10:44:53 +00:00
}
void syncNodeFollower2Candidate(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_FOLLOWER);
2022-03-14 10:44:53 +00:00
pSyncNode->state = TAOS_SYNC_STATE_CANDIDATE;
2022-11-11 14:55:21 +00:00
SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
2023-02-06 16:12:05 +00:00
sInfo("vgId:%d, become candidate from follower. term:%" PRId64 ", commit index:%" PRId64 ", last index:%" PRId64,
2023-02-13 11:00:10 +00:00
pSyncNode->vgId, raftStoreGetTerm(pSyncNode), pSyncNode->commitIndex, lastIndex);
2022-04-18 13:50:56 +00:00
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "follower to candidate");
2022-03-14 10:44:53 +00:00
}
void syncNodeLeader2Follower(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_LEADER);
2022-06-10 07:19:11 +00:00
syncNodeBecomeFollower(pSyncNode, "leader to follower");
2022-11-11 14:55:21 +00:00
SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
2023-02-06 16:12:05 +00:00
sInfo("vgId:%d, become follower from leader. term:%" PRId64 ", commit index:%" PRId64 ", last index:%" PRId64,
2023-02-13 11:00:10 +00:00
pSyncNode->vgId, raftStoreGetTerm(pSyncNode), pSyncNode->commitIndex, lastIndex);
2022-11-11 14:55:21 +00:00
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "leader to follower");
2022-03-14 10:44:53 +00:00
}
void syncNodeCandidate2Follower(SSyncNode* pSyncNode) {
ASSERT(pSyncNode->state == TAOS_SYNC_STATE_CANDIDATE);
2022-06-10 07:19:11 +00:00
syncNodeBecomeFollower(pSyncNode, "candidate to follower");
2022-11-11 14:55:21 +00:00
SyncIndex lastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
2023-02-06 16:12:05 +00:00
sInfo("vgId:%d, become follower from candidate. term:%" PRId64 ", commit index:%" PRId64 ", last index:%" PRId64,
2023-02-13 11:00:10 +00:00
pSyncNode->vgId, raftStoreGetTerm(pSyncNode), pSyncNode->commitIndex, lastIndex);
2022-11-11 14:55:21 +00:00
2022-11-07 05:51:37 +00:00
sNTrace(pSyncNode, "candidate to follower");
2022-03-14 10:44:53 +00:00
}
2022-03-16 08:20:01 +00:00
// just called by syncNodeVoteForSelf
// need assert
2022-03-14 10:44:53 +00:00
void syncNodeVoteForTerm(SSyncNode* pSyncNode, SyncTerm term, SRaftId* pRaftId) {
2023-02-13 11:00:10 +00:00
ASSERT(term == raftStoreGetTerm(pSyncNode));
bool voted = raftStoreHasVoted(pSyncNode);
ASSERT(!voted);
2022-03-14 10:44:53 +00:00
2023-01-09 04:01:36 +00:00
raftStoreVote(pSyncNode, pRaftId);
2022-03-14 10:44:53 +00:00
}
2022-03-16 08:20:01 +00:00
// simulate get vote from outside
2023-02-13 11:00:10 +00:00
void syncNodeVoteForSelf(SSyncNode* pSyncNode, SyncTerm currentTerm) {
syncNodeVoteForTerm(pSyncNode, currentTerm, &pSyncNode->myRaftId);
2022-03-14 10:44:53 +00:00
2022-11-12 05:17:56 +00:00
SRpcMsg rpcMsg = {0};
int32_t ret = syncBuildRequestVoteReply(&rpcMsg, pSyncNode->vgId);
2022-11-12 07:24:49 +00:00
if (ret != 0) return;
2022-03-14 10:44:53 +00:00
2022-11-12 05:17:56 +00:00
SyncRequestVoteReply* pMsg = rpcMsg.pCont;
2022-03-14 10:44:53 +00:00
pMsg->srcId = pSyncNode->myRaftId;
pMsg->destId = pSyncNode->myRaftId;
2023-02-13 11:00:10 +00:00
pMsg->term = currentTerm;
2022-03-14 10:44:53 +00:00
pMsg->voteGranted = true;
voteGrantedVote(pSyncNode->pVotesGranted, pMsg);
votesRespondAdd(pSyncNode->pVotesRespond, pMsg);
2022-11-12 05:17:56 +00:00
rpcFreeCont(rpcMsg.pCont);
2022-03-14 10:44:53 +00:00
}
2022-06-29 08:40:36 +00:00
// return if has a snapshot
2022-06-05 11:47:54 +00:00
bool syncNodeHasSnapshot(SSyncNode* pSyncNode) {
bool ret = false;
2022-06-21 09:45:08 +00:00
SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0, .lastConfigIndex = -1};
if (pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
2022-06-05 11:47:54 +00:00
if (snapshot.lastApplyIndex >= SYNC_INDEX_BEGIN) {
ret = true;
}
}
return ret;
}
2022-06-29 08:40:36 +00:00
// return max(logLastIndex, snapshotLastIndex)
// if no snapshot and log, return -1
SyncIndex syncNodeGetLastIndex(const SSyncNode* pSyncNode) {
2022-06-29 08:40:36 +00:00
SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0, .lastConfigIndex = -1};
if (pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
2022-06-05 11:47:54 +00:00
}
SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
SyncIndex lastIndex = logLastIndex > snapshot.lastApplyIndex ? logLastIndex : snapshot.lastApplyIndex;
return lastIndex;
}
2022-06-29 08:40:36 +00:00
// return the last term of snapshot and log
// if error, return SYNC_TERM_INVALID (by syncLogLastTerm)
2022-06-05 11:47:54 +00:00
SyncTerm syncNodeGetLastTerm(SSyncNode* pSyncNode) {
SyncTerm lastTerm = 0;
2022-06-06 03:24:25 +00:00
if (syncNodeHasSnapshot(pSyncNode)) {
// has snapshot
2022-06-21 09:45:08 +00:00
SSnapshot snapshot = {.data = NULL, .lastApplyIndex = -1, .lastApplyTerm = 0, .lastConfigIndex = -1};
if (pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
2022-06-06 03:24:25 +00:00
}
2022-06-08 08:45:40 +00:00
SyncIndex logLastIndex = pSyncNode->pLogStore->syncLogLastIndex(pSyncNode->pLogStore);
if (logLastIndex > snapshot.lastApplyIndex) {
lastTerm = pSyncNode->pLogStore->syncLogLastTerm(pSyncNode->pLogStore);
2022-06-06 03:24:25 +00:00
} else {
lastTerm = snapshot.lastApplyTerm;
}
2022-06-01 05:53:21 +00:00
} else {
2022-06-06 03:24:25 +00:00
// no snapshot
lastTerm = pSyncNode->pLogStore->syncLogLastTerm(pSyncNode->pLogStore);
}
2022-06-06 03:24:25 +00:00
2022-06-05 11:47:54 +00:00
return lastTerm;
}
// get last index and term along with snapshot
int32_t syncNodeGetLastIndexTerm(SSyncNode* pSyncNode, SyncIndex* pLastIndex, SyncTerm* pLastTerm) {
*pLastIndex = syncNodeGetLastIndex(pSyncNode);
*pLastTerm = syncNodeGetLastTerm(pSyncNode);
return 0;
}
2022-05-30 09:31:55 +00:00
2022-06-29 08:40:36 +00:00
// return append-entries first try index
2022-06-06 03:24:25 +00:00
SyncIndex syncNodeSyncStartIndex(SSyncNode* pSyncNode) {
SyncIndex syncStartIndex = syncNodeGetLastIndex(pSyncNode) + 1;
return syncStartIndex;
}
2022-06-29 08:40:36 +00:00
// if index > 0, return index - 1
// else, return -1
SyncIndex syncNodeGetPreIndex(SSyncNode* pSyncNode, SyncIndex index) {
SyncIndex preIndex = index - 1;
if (preIndex < SYNC_INDEX_INVALID) {
preIndex = SYNC_INDEX_INVALID;
}
return preIndex;
}
2022-06-29 08:40:36 +00:00
// if index < 0, return SYNC_TERM_INVALID
// if index == 0, return 0
// if index > 0, return preTerm
// if error, return SYNC_TERM_INVALID
SyncTerm syncNodeGetPreTerm(SSyncNode* pSyncNode, SyncIndex index) {
if (index < SYNC_INDEX_BEGIN) {
return SYNC_TERM_INVALID;
}
if (index == SYNC_INDEX_BEGIN) {
return 0;
}
SyncTerm preTerm = 0;
SyncIndex preIndex = index - 1;
SSyncRaftEntry* pPreEntry = NULL;
SLRUCache* pCache = pSyncNode->pLogStore->pCache;
LRUHandle* h = taosLRUCacheLookup(pCache, &preIndex, sizeof(preIndex));
int32_t code = 0;
if (h) {
pPreEntry = (SSyncRaftEntry*)taosLRUCacheValue(pCache, h);
code = 0;
pSyncNode->pLogStore->cacheHit++;
sNTrace(pSyncNode, "hit cache index:%" PRId64 ", bytes:%u, %p", preIndex, pPreEntry->bytes, pPreEntry);
} else {
pSyncNode->pLogStore->cacheMiss++;
sNTrace(pSyncNode, "miss cache index:%" PRId64, preIndex);
code = pSyncNode->pLogStore->syncLogGetEntry(pSyncNode->pLogStore, preIndex, &pPreEntry);
}
2022-07-29 13:05:56 +00:00
SSnapshot snapshot = {.data = NULL,
.lastApplyIndex = SYNC_INDEX_INVALID,
.lastApplyTerm = SYNC_TERM_INVALID,
.lastConfigIndex = SYNC_INDEX_INVALID};
if (code == 0) {
ASSERT(pPreEntry != NULL);
preTerm = pPreEntry->term;
if (h) {
taosLRUCacheRelease(pCache, h, false);
} else {
2022-11-17 09:21:51 +00:00
syncEntryDestroy(pPreEntry);
}
return preTerm;
} else {
if (pSyncNode->pFsm->FpGetSnapshotInfo != NULL) {
pSyncNode->pFsm->FpGetSnapshotInfo(pSyncNode->pFsm, &snapshot);
if (snapshot.lastApplyIndex == preIndex) {
return snapshot.lastApplyTerm;
}
}
}
sNError(pSyncNode, "sync node get pre term error, index:%" PRId64 ", snap-index:%" PRId64 ", snap-term:%" PRId64,
2022-11-07 05:51:37 +00:00
index, snapshot.lastApplyIndex, snapshot.lastApplyTerm);
return SYNC_TERM_INVALID;
}
2022-06-05 11:47:54 +00:00
// get pre index and term of "index"
int32_t syncNodeGetPreIndexTerm(SSyncNode* pSyncNode, SyncIndex index, SyncIndex* pPreIndex, SyncTerm* pPreTerm) {
*pPreIndex = syncNodeGetPreIndex(pSyncNode, index);
2022-06-06 03:24:25 +00:00
*pPreTerm = syncNodeGetPreTerm(pSyncNode, index);
2022-05-30 09:31:55 +00:00
return 0;
}
2022-03-05 04:28:34 +00:00
static void syncNodeEqPingTimer(void* param, void* tmrId) {
2022-11-12 02:08:28 +00:00
if (!syncIsInit()) return;
2022-03-14 06:05:40 +00:00
2022-11-12 02:08:28 +00:00
SSyncNode* pNode = param;
if (atomic_load_64(&pNode->pingTimerLogicClockUser) <= atomic_load_64(&pNode->pingTimerLogicClock)) {
SRpcMsg rpcMsg = {0};
2022-11-12 12:29:49 +00:00
int32_t code = syncBuildTimeout(&rpcMsg, SYNC_TIMEOUT_PING, atomic_load_64(&pNode->pingTimerLogicClock),
2022-11-12 02:08:28 +00:00
pNode->pingTimerMS, pNode);
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("failed to build ping msg");
2022-11-12 02:08:28 +00:00
rpcFreeCont(rpcMsg.pCont);
return;
2022-04-18 13:50:56 +00:00
}
2022-03-14 06:05:40 +00:00
2022-11-25 10:19:25 +00:00
// sTrace("enqueue ping msg");
2022-11-12 02:08:28 +00:00
code = pNode->syncEqMsg(pNode->msgcb, &rpcMsg);
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("failed to sync enqueue ping msg since %s", terrstr());
2022-11-12 02:08:28 +00:00
rpcFreeCont(rpcMsg.pCont);
return;
2022-06-15 03:42:04 +00:00
}
2022-03-14 06:05:40 +00:00
2022-11-12 02:08:28 +00:00
taosTmrReset(syncNodeEqPingTimer, pNode->pingTimerMS, pNode, syncEnv()->pTimerManager, &pNode->pPingTimer);
2022-06-10 08:51:17 +00:00
}
2022-03-14 06:05:40 +00:00
}
2022-03-07 06:18:46 +00:00
static void syncNodeEqElectTimer(void* param, void* tmrId) {
2022-11-15 05:43:30 +00:00
if (!syncIsInit()) return;
2022-06-21 07:09:23 +00:00
2022-11-28 10:21:46 +00:00
int64_t rid = (int64_t)param;
SSyncNode* pNode = syncNodeAcquire(rid);
2022-03-07 06:18:46 +00:00
if (pNode == NULL) return;
2022-11-28 10:21:46 +00:00
if (pNode->syncEqMsg == NULL) {
syncNodeRelease(pNode);
return;
}
2022-06-15 03:42:04 +00:00
2022-11-16 06:05:34 +00:00
int64_t tsNow = taosGetTimestampMs();
2022-11-28 10:21:46 +00:00
if (tsNow < pNode->electTimerParam.executeTime) {
syncNodeRelease(pNode);
return;
}
2022-03-07 06:18:46 +00:00
2022-11-12 02:08:28 +00:00
SRpcMsg rpcMsg = {0};
2022-11-16 06:05:34 +00:00
int32_t code =
syncBuildTimeout(&rpcMsg, SYNC_TIMEOUT_ELECTION, pNode->electTimerParam.logicClock, pNode->electTimerMS, pNode);
2022-11-16 03:13:53 +00:00
2022-11-12 02:08:28 +00:00
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("failed to build elect msg");
2022-11-28 10:21:46 +00:00
syncNodeRelease(pNode);
2022-11-12 02:08:28 +00:00
return;
2022-10-25 10:03:22 +00:00
}
2022-11-12 02:08:28 +00:00
SyncTimeout* pTimeout = rpcMsg.pCont;
2022-11-16 03:13:53 +00:00
sNTrace(pNode, "enqueue elect msg lc:%" PRId64, pTimeout->logicClock);
2022-11-12 02:08:28 +00:00
code = pNode->syncEqMsg(pNode->msgcb, &rpcMsg);
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("failed to sync enqueue elect msg since %s", terrstr());
2022-11-12 02:08:28 +00:00
rpcFreeCont(rpcMsg.pCont);
2022-11-28 10:21:46 +00:00
syncNodeRelease(pNode);
2022-11-16 06:05:34 +00:00
return;
2022-03-05 04:28:34 +00:00
}
2022-11-28 10:21:46 +00:00
syncNodeRelease(pNode);
2022-03-05 04:28:34 +00:00
}
2022-03-08 06:19:50 +00:00
static void syncNodeEqHeartbeatTimer(void* param, void* tmrId) {
2022-11-12 02:08:28 +00:00
if (!syncIsInit()) return;
2022-11-12 02:08:28 +00:00
SSyncNode* pNode = param;
2023-04-18 11:03:45 +00:00
if (pNode->totalReplicaNum > 1) {
2022-11-12 02:08:28 +00:00
if (atomic_load_64(&pNode->heartbeatTimerLogicClockUser) <= atomic_load_64(&pNode->heartbeatTimerLogicClock)) {
SRpcMsg rpcMsg = {0};
2022-11-12 12:29:49 +00:00
int32_t code = syncBuildTimeout(&rpcMsg, SYNC_TIMEOUT_HEARTBEAT, atomic_load_64(&pNode->heartbeatTimerLogicClock),
2022-11-12 02:08:28 +00:00
pNode->heartbeatTimerMS, pNode);
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("failed to build heartbeat msg");
2022-11-12 02:08:28 +00:00
return;
}
2022-03-08 06:19:50 +00:00
sTrace("vgId:%d, enqueue heartbeat timer", pNode->vgId);
2022-11-12 02:08:28 +00:00
code = pNode->syncEqMsg(pNode->msgcb, &rpcMsg);
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("failed to enqueue heartbeat msg since %s", terrstr());
2022-11-12 02:08:28 +00:00
rpcFreeCont(rpcMsg.pCont);
return;
}
2022-11-12 02:08:28 +00:00
taosTmrReset(syncNodeEqHeartbeatTimer, pNode->heartbeatTimerMS, pNode, syncEnv()->pTimerManager,
&pNode->pHeartbeatTimer);
} else {
2022-11-12 02:08:28 +00:00
sTrace("==syncNodeEqHeartbeatTimer== heartbeatTimerLogicClock:%" PRId64 ", heartbeatTimerLogicClockUser:%" PRId64,
pNode->heartbeatTimerLogicClock, pNode->heartbeatTimerLogicClockUser);
}
2022-03-08 06:19:50 +00:00
}
}
static void syncNodeEqPeerHeartbeatTimer(void* param, void* tmrId) {
int64_t hbDataRid = (int64_t)param;
int64_t tsNow = taosGetTimestampMs();
SSyncHbTimerData* pData = syncHbTimerDataAcquire(hbDataRid);
if (pData == NULL) {
2023-04-24 08:06:56 +00:00
sError("hb timer get pData NULL, %" PRId64, hbDataRid);
return;
}
SSyncNode* pSyncNode = syncNodeAcquire(pData->syncNodeRid);
2022-11-09 09:38:46 +00:00
if (pSyncNode == NULL) {
syncHbTimerDataRelease(pData);
2022-11-18 03:06:35 +00:00
sError("hb timer get pSyncNode NULL");
return;
}
SSyncTimer* pSyncTimer = pData->pTimer;
if (!pSyncNode->isStart) {
syncNodeRelease(pSyncNode);
syncHbTimerDataRelease(pData);
2022-11-18 03:06:35 +00:00
sError("vgId:%d, hb timer sync node already stop", pSyncNode->vgId);
2022-11-09 09:38:46 +00:00
return;
}
2022-10-20 06:53:03 +00:00
if (pSyncNode->state != TAOS_SYNC_STATE_LEADER) {
syncNodeRelease(pSyncNode);
syncHbTimerDataRelease(pData);
2022-11-18 03:06:35 +00:00
sError("vgId:%d, hb timer sync node not leader", pSyncNode->vgId);
2022-10-20 06:53:03 +00:00
return;
}
2023-04-18 11:03:45 +00:00
sTrace("vgId:%d, eq peer hb timer, rid:%" PRId64 " addr:%" PRId64, pSyncNode->vgId, hbDataRid, pData->destId.addr);
2023-04-18 11:03:45 +00:00
if (pSyncNode->totalReplicaNum > 1) {
2022-11-18 03:06:35 +00:00
int64_t timerLogicClock = atomic_load_64(&pSyncTimer->logicClock);
int64_t msgLogicClock = atomic_load_64(&pData->logicClock);
if (timerLogicClock == msgLogicClock) {
if (tsNow > pData->execTime) {
pData->execTime += pSyncTimer->timerMS;
SRpcMsg rpcMsg = {0};
(void)syncBuildHeartbeat(&rpcMsg, pSyncNode->vgId);
pSyncNode->minMatchIndex = syncMinMatchIndex(pSyncNode);
SyncHeartbeat* pSyncMsg = rpcMsg.pCont;
pSyncMsg->srcId = pSyncNode->myRaftId;
pSyncMsg->destId = pData->destId;
2023-02-13 11:00:10 +00:00
pSyncMsg->term = raftStoreGetTerm(pSyncNode);
pSyncMsg->commitIndex = pSyncNode->commitIndex;
pSyncMsg->minMatchIndex = pSyncNode->minMatchIndex;
pSyncMsg->privateTerm = 0;
pSyncMsg->timeStamp = tsNow;
// update reset time
int64_t timerElapsed = tsNow - pSyncTimer->timeStamp;
pSyncTimer->timeStamp = tsNow;
// send msg
2023-04-18 11:03:45 +00:00
sTrace("vgId:%d, send heartbeat to dnode:%d", pSyncNode->vgId, DID(&(pSyncMsg->destId)));
syncLogSendHeartbeat(pSyncNode, pSyncMsg, false, timerElapsed, pData->execTime);
syncNodeSendHeartbeat(pSyncNode, &pSyncMsg->destId, &rpcMsg);
} else {
}
2022-11-18 03:06:35 +00:00
if (syncIsInit()) {
// sTrace("vgId:%d, reset peer hb timer", pSyncNode->vgId);
taosTmrReset(syncNodeEqPeerHeartbeatTimer, pSyncTimer->timerMS / HEARTBEAT_TICK_NUM, (void*)hbDataRid,
syncEnv()->pTimerManager, &pSyncTimer->pTimer);
2022-11-18 03:06:35 +00:00
} else {
sError("sync env is stop, reset peer hb timer error");
}
} else {
2022-11-18 03:06:35 +00:00
sTrace("vgId:%d, do not send hb, timerLogicClock:%" PRId64 ", msgLogicClock:%" PRId64 "", pSyncNode->vgId,
timerLogicClock, msgLogicClock);
}
}
syncHbTimerDataRelease(pData);
syncNodeRelease(pSyncNode);
}
2022-09-20 11:57:02 +00:00
static void deleteCacheEntry(const void* key, size_t keyLen, void* value) { taosMemoryFree(value); }
int32_t syncCacheEntry(SSyncLogStore* pLogStore, SSyncRaftEntry* pEntry, LRUHandle** h) {
SSyncLogStoreData* pData = pLogStore->data;
sNTrace(pData->pSyncNode, "in cache index:%" PRId64 ", bytes:%u, %p", pEntry->index, pEntry->bytes, pEntry);
2022-11-07 15:31:01 +00:00
int32_t code = 0;
int32_t entryLen = sizeof(*pEntry) + pEntry->dataLen;
2022-09-20 11:57:02 +00:00
LRUStatus status = taosLRUCacheInsert(pLogStore->pCache, &pEntry->index, sizeof(pEntry->index), pEntry, entryLen,
deleteCacheEntry, h, TAOS_LRU_PRIORITY_LOW);
if (status != TAOS_LRU_STATUS_OK) {
code = -1;
}
return code;
}
2022-10-31 04:59:42 +00:00
int32_t syncNodeAppend(SSyncNode* ths, SSyncRaftEntry* pEntry) {
if (pEntry->dataLen < sizeof(SMsgHead)) {
sError("vgId:%d, cannot append an invalid client request with no msg head. type:%s, dataLen:%d", ths->vgId,
TMSG_INFO(pEntry->originalRpcType), pEntry->dataLen);
syncEntryDestroy(pEntry);
return -1;
}
2022-10-31 04:59:42 +00:00
// append to log buffer
if (syncLogBufferAppend(ths->pLogBuf, ths, pEntry) < 0) {
sError("vgId:%d, failed to enqueue sync log buffer, index:%" PRId64, ths->vgId, pEntry->index);
2023-03-22 08:08:16 +00:00
ASSERT(terrno != 0);
(void)syncFsmExecute(ths, ths->pFsm, ths->state, raftStoreGetTerm(ths), pEntry, terrno);
syncEntryDestroy(pEntry);
2022-10-31 04:59:42 +00:00
return -1;
}
// proceed match index, with replicating on needed
SyncIndex matchIndex = syncLogBufferProceed(ths->pLogBuf, ths, NULL);
2022-10-31 04:59:42 +00:00
2023-02-06 16:12:05 +00:00
sTrace("vgId:%d, append raft entry. index:%" PRId64 ", term:%" PRId64 " pBuf: [%" PRId64 " %" PRId64 " %" PRId64
", %" PRId64 ")",
ths->vgId, pEntry->index, pEntry->term, ths->pLogBuf->startIndex, ths->pLogBuf->commitIndex,
ths->pLogBuf->matchIndex, ths->pLogBuf->endIndex);
2022-11-11 14:55:21 +00:00
2022-10-31 04:59:42 +00:00
// multi replica
if (ths->replicaNum > 1) {
return 0;
}
// single replica
(void)syncNodeUpdateCommitIndex(ths, matchIndex);
if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) {
sError("vgId:%d, failed to commit until commitIndex:%" PRId64 "", ths->vgId, ths->commitIndex);
return -1;
}
return 0;
}
bool syncNodeHeartbeatReplyTimeout(SSyncNode* pSyncNode) {
2023-04-18 11:03:45 +00:00
if (pSyncNode->totalReplicaNum == 1) {
return false;
}
int32_t toCount = 0;
int64_t tsNow = taosGetTimestampMs();
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
int64_t recvTime = syncIndexMgrGetRecvTime(pSyncNode->pMatchIndex, &(pSyncNode->peersId[i]));
if (recvTime == 0 || recvTime == -1) {
continue;
}
if (tsNow - recvTime > tsHeartbeatTimeout) {
toCount++;
}
}
bool b = (toCount >= pSyncNode->quorum ? true : false);
return b;
}
bool syncNodeSnapshotSending(SSyncNode* pSyncNode) {
if (pSyncNode == NULL) return false;
bool b = false;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pSyncNode->totalReplicaNum; ++i) {
if (pSyncNode->senders[i] != NULL && pSyncNode->senders[i]->start) {
b = true;
break;
}
}
return b;
}
bool syncNodeSnapshotRecving(SSyncNode* pSyncNode) {
if (pSyncNode == NULL) return false;
if (pSyncNode->pNewNodeReceiver == NULL) return false;
if (pSyncNode->pNewNodeReceiver->start) return true;
return false;
}
2022-03-24 09:30:50 +00:00
static int32_t syncNodeAppendNoop(SSyncNode* ths) {
2022-10-31 04:59:42 +00:00
SyncIndex index = syncLogBufferGetEndIndex(ths->pLogBuf);
2023-02-13 11:00:10 +00:00
SyncTerm term = raftStoreGetTerm(ths);
2022-10-31 04:59:42 +00:00
SSyncRaftEntry* pEntry = syncEntryBuildNoop(term, index, ths->vgId);
if (pEntry == NULL) {
terrno = TSDB_CODE_OUT_OF_MEMORY;
return -1;
}
2022-11-11 14:55:21 +00:00
int32_t ret = syncNodeAppend(ths, pEntry);
return 0;
2022-10-31 04:59:42 +00:00
}
static int32_t syncNodeAppendNoopOld(SSyncNode* ths) {
2022-03-24 09:30:50 +00:00
int32_t ret = 0;
SyncIndex index = ths->pLogStore->syncLogWriteIndex(ths->pLogStore);
2023-02-13 11:00:10 +00:00
SyncTerm term = raftStoreGetTerm(ths);
2022-04-18 13:50:56 +00:00
SSyncRaftEntry* pEntry = syncEntryBuildNoop(term, index, ths->vgId);
ASSERT(pEntry != NULL);
2022-03-24 09:30:50 +00:00
2022-09-20 11:57:02 +00:00
LRUHandle* h = NULL;
2022-03-24 09:30:50 +00:00
if (ths->state == TAOS_SYNC_STATE_LEADER) {
int32_t code = ths->pLogStore->syncLogAppendEntry(ths->pLogStore, pEntry, false);
if (code != 0) {
2022-11-14 09:33:48 +00:00
sError("append noop error");
return -1;
}
syncCacheEntry(ths->pLogStore, pEntry, &h);
2022-03-24 09:30:50 +00:00
}
2022-09-20 11:57:02 +00:00
if (h) {
taosLRUCacheRelease(ths->pLogStore->pCache, h, false);
} else {
2022-10-31 04:59:42 +00:00
syncEntryDestroy(pEntry);
2022-09-20 11:57:02 +00:00
}
2022-03-24 09:30:50 +00:00
return ret;
}
2022-11-12 12:37:15 +00:00
int32_t syncNodeOnHeartbeat(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
SyncHeartbeat* pMsg = pRpcMsg->pCont;
bool resetElect = false;
2022-12-01 04:53:38 +00:00
const STraceId* trace = &pRpcMsg->info.traceId;
char tbuf[40] = {0};
TRACE_TO_STR(trace, tbuf);
int64_t tsMs = taosGetTimestampMs();
2022-11-25 10:19:25 +00:00
int64_t timeDiff = tsMs - pMsg->timeStamp;
2022-12-01 04:53:38 +00:00
syncLogRecvHeartbeat(ths, pMsg, timeDiff, tbuf);
2023-03-23 14:31:20 +00:00
if (!syncNodeInRaftGroup(ths, &pMsg->srcId)) {
sWarn(
"vgId:%d, drop heartbeat msg from dnode:%d, because it come from another cluster:%d, differ from current "
"cluster:%d",
ths->vgId, DID(&(pMsg->srcId)), CID(&(pMsg->srcId)), CID(&(ths->myRaftId)));
return 0;
}
2022-11-12 13:31:01 +00:00
SRpcMsg rpcMsg = {0};
(void)syncBuildHeartbeatReply(&rpcMsg, ths->vgId);
2023-02-13 11:00:10 +00:00
SyncTerm currentTerm = raftStoreGetTerm(ths);
2022-11-12 13:31:01 +00:00
SyncHeartbeatReply* pMsgReply = rpcMsg.pCont;
pMsgReply->destId = pMsg->srcId;
pMsgReply->srcId = ths->myRaftId;
2023-02-13 11:00:10 +00:00
pMsgReply->term = currentTerm;
pMsgReply->privateTerm = 8864; // magic number
pMsgReply->startTime = ths->startTime;
2022-11-25 13:51:57 +00:00
pMsgReply->timeStamp = tsMs;
2023-04-18 11:03:45 +00:00
sTrace(
"vgId:%d, heartbeat msg from dnode:%d, cluster:%d, Msgterm:%" PRId64 " currentTerm:%" PRId64,
ths->vgId, DID(&(pMsg->srcId)), CID(&(pMsg->srcId)), pMsg->term, currentTerm);
2023-02-13 11:00:10 +00:00
if (pMsg->term == currentTerm && ths->state != TAOS_SYNC_STATE_LEADER) {
syncIndexMgrSetRecvTime(ths->pNextIndex, &(pMsg->srcId), tsMs);
resetElect = true;
2022-10-20 06:53:03 +00:00
ths->minMatchIndex = pMsg->minMatchIndex;
2023-04-18 11:03:45 +00:00
if (ths->state == TAOS_SYNC_STATE_FOLLOWER || ths->state == TAOS_SYNC_STATE_LEARNER) {
2022-11-13 09:14:03 +00:00
SRpcMsg rpcMsgLocalCmd = {0};
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd =
(ths->state == TAOS_SYNC_STATE_LEARNER) ? SYNC_LOCAL_CMD_LEARNER_CMT : SYNC_LOCAL_CMD_FOLLOWER_CMT;
pSyncMsg->commitIndex = pMsg->commitIndex;
pSyncMsg->currentTerm = pMsg->term;
if (ths->syncEqMsg != NULL && ths->msgcb != NULL) {
int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd);
if (code != 0) {
sError("vgId:%d, failed to enqueue commit msg from heartbeat since %s, code:%d", ths->vgId, terrstr(), code);
rpcFreeCont(rpcMsgLocalCmd.pCont);
} else {
sTrace("vgId:%d, enqueue commit msg from heartbeat, commit-index:%" PRId64 ", term:%" PRId64, ths->vgId,
pMsg->commitIndex, pMsg->term);
}
}
}
}
2023-04-18 11:03:45 +00:00
if (pMsg->term >= currentTerm && ths->state == TAOS_SYNC_STATE_LEADER) {
2022-11-13 09:14:03 +00:00
SRpcMsg rpcMsgLocalCmd = {0};
(void)syncBuildLocalCmd(&rpcMsgLocalCmd, ths->vgId);
SyncLocalCmd* pSyncMsg = rpcMsgLocalCmd.pCont;
pSyncMsg->cmd = SYNC_LOCAL_CMD_STEP_DOWN;
pSyncMsg->currentTerm = pMsg->term;
pSyncMsg->commitIndex = pMsg->commitIndex;
2022-11-01 07:40:23 +00:00
if (ths->syncEqMsg != NULL && ths->msgcb != NULL) {
int32_t code = ths->syncEqMsg(ths->msgcb, &rpcMsgLocalCmd);
if (code != 0) {
sError("vgId:%d, sync enqueue step-down msg error, code:%d", ths->vgId, code);
rpcFreeCont(rpcMsgLocalCmd.pCont);
} else {
sTrace("vgId:%d, sync enqueue step-down msg, new-term:%" PRId64, ths->vgId, pMsg->term);
}
}
}
// reply
syncNodeSendMsgById(&pMsgReply->destId, ths, &rpcMsg);
if (resetElect) syncNodeResetElectTimer(ths);
return 0;
}
2022-11-12 13:31:01 +00:00
int32_t syncNodeOnHeartbeatReply(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
2022-12-01 07:07:40 +00:00
const STraceId* trace = &pRpcMsg->info.traceId;
char tbuf[40] = {0};
TRACE_TO_STR(trace, tbuf);
2022-11-17 09:21:51 +00:00
SyncHeartbeatReply* pMsg = pRpcMsg->pCont;
2022-11-17 12:27:23 +00:00
SSyncLogReplMgr* pMgr = syncNodeGetLogReplMgr(ths, &pMsg->srcId);
if (pMgr == NULL) {
sError("vgId:%d, failed to get log repl mgr for the peer at addr 0x016%" PRIx64 "", ths->vgId, pMsg->srcId.addr);
return -1;
}
2022-11-25 03:06:20 +00:00
int64_t tsMs = taosGetTimestampMs();
2022-12-01 07:07:40 +00:00
syncLogRecvHeartbeatReply(ths, pMsg, tsMs - pMsg->timeStamp, tbuf);
2022-11-28 06:27:10 +00:00
2022-11-25 03:06:20 +00:00
syncIndexMgrSetRecvTime(ths->pMatchIndex, &pMsg->srcId, tsMs);
return syncLogReplProcessHeartbeatReply(pMgr, ths, pMsg);
}
2022-11-17 09:21:51 +00:00
int32_t syncNodeOnHeartbeatReplyOld(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
2022-11-12 13:31:01 +00:00
SyncHeartbeatReply* pMsg = pRpcMsg->pCont;
2022-12-01 04:53:38 +00:00
const STraceId* trace = &pRpcMsg->info.traceId;
char tbuf[40] = {0};
TRACE_TO_STR(trace, tbuf);
2022-11-24 01:00:57 +00:00
int64_t tsMs = taosGetTimestampMs();
2022-11-25 10:19:25 +00:00
int64_t timeDiff = tsMs - pMsg->timeStamp;
2022-12-01 04:53:38 +00:00
syncLogRecvHeartbeatReply(ths, pMsg, timeDiff, tbuf);
2022-11-24 01:00:57 +00:00
// update last reply time, make decision whether the other node is alive or not
2022-11-24 01:00:57 +00:00
syncIndexMgrSetRecvTime(ths->pMatchIndex, &pMsg->srcId, tsMs);
return 0;
}
2022-11-13 09:14:03 +00:00
int32_t syncNodeOnLocalCmd(SSyncNode* ths, const SRpcMsg* pRpcMsg) {
SyncLocalCmd* pMsg = pRpcMsg->pCont;
syncLogRecvLocalCmd(ths, pMsg, "");
if (pMsg->cmd == SYNC_LOCAL_CMD_STEP_DOWN) {
syncNodeStepDown(ths, pMsg->currentTerm);
} else if (pMsg->cmd == SYNC_LOCAL_CMD_FOLLOWER_CMT || pMsg->cmd == SYNC_LOCAL_CMD_LEARNER_CMT) {
if (syncLogBufferIsEmpty(ths->pLogBuf)) {
sError("vgId:%d, sync log buffer is empty.", ths->vgId);
return 0;
}
SyncTerm matchTerm = syncLogBufferGetLastMatchTerm(ths->pLogBuf);
if (pMsg->currentTerm == matchTerm) {
(void)syncNodeUpdateCommitIndex(ths, pMsg->commitIndex);
}
if (syncLogBufferCommit(ths->pLogBuf, ths, ths->commitIndex) < 0) {
2023-02-06 16:12:05 +00:00
sError("vgId:%d, failed to commit raft log since %s. commit index:%" PRId64 "", ths->vgId, terrstr(),
ths->commitIndex);
}
} else {
sError("error local cmd");
}
return 0;
}
2022-03-18 07:21:40 +00:00
// TLA+ Spec
// ClientRequest(i, v) ==
// /\ state[i] = Leader
// /\ LET entry == [term |-> currentTerm[i],
// value |-> v]
// newLog == Append(log[i], entry)
// IN log' = [log EXCEPT ![i] = newLog]
// /\ UNCHANGED <<messages, serverVars, candidateVars,
// leaderVars, commitIndex>>
//
2022-03-21 08:28:50 +00:00
int32_t syncNodeOnClientRequest(SSyncNode* ths, SRpcMsg* pMsg, SyncIndex* pRetIndex) {
2022-11-07 05:51:37 +00:00
sNTrace(ths, "on client request");
2022-10-31 04:59:42 +00:00
int32_t code = 0;
2022-11-17 12:27:23 +00:00
SyncIndex index = syncLogBufferGetEndIndex(ths->pLogBuf);
2023-02-13 11:00:10 +00:00
SyncTerm term = raftStoreGetTerm(ths);
2022-11-17 12:27:23 +00:00
SSyncRaftEntry* pEntry = NULL;
2022-11-17 09:21:51 +00:00
if (pMsg->msgType == TDMT_SYNC_CLIENT_REQUEST) {
pEntry = syncEntryBuildFromClientRequest(pMsg->pCont, term, index);
} else {
pEntry = syncEntryBuildFromRpcMsg(pMsg, term, index);
2022-10-31 04:59:42 +00:00
}
if (pEntry == NULL) {
sError("vgId:%d, failed to process client request since %s.", ths->vgId, terrstr());
return -1;
}
2022-10-31 04:59:42 +00:00
if (ths->state == TAOS_SYNC_STATE_LEADER) {
if (pRetIndex) {
(*pRetIndex) = index;
}
int32_t code = syncNodeAppend(ths, pEntry);
return code;
} else {
syncEntryDestroy(pEntry);
pEntry = NULL;
return -1;
2022-10-31 04:59:42 +00:00
}
}
2022-04-19 13:39:42 +00:00
const char* syncStr(ESyncState state) {
switch (state) {
case TAOS_SYNC_STATE_FOLLOWER:
2022-06-02 06:35:59 +00:00
return "follower";
2022-04-19 13:39:42 +00:00
case TAOS_SYNC_STATE_CANDIDATE:
2022-06-02 06:35:59 +00:00
return "candidate";
2022-04-19 13:39:42 +00:00
case TAOS_SYNC_STATE_LEADER:
2022-06-02 06:35:59 +00:00
return "leader";
2022-11-30 04:02:54 +00:00
case TAOS_SYNC_STATE_ERROR:
2022-06-02 06:35:59 +00:00
return "error";
2022-11-30 04:02:54 +00:00
case TAOS_SYNC_STATE_OFFLINE:
return "offline";
2023-04-18 11:03:45 +00:00
case TAOS_SYNC_STATE_LEARNER:
return "learner";
2022-11-30 04:02:54 +00:00
default:
return "unknown";
2022-04-19 13:39:42 +00:00
}
2022-04-22 09:11:56 +00:00
}
2022-06-06 08:02:25 +00:00
int32_t syncNodeUpdateNewConfigIndex(SSyncNode* ths, SSyncCfg* pNewCfg) {
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < pNewCfg->totalReplicaNum; ++i) {
SRaftId raftId = {
.addr = SYNC_ADDR(&pNewCfg->nodeInfo[i]),
.vgId = ths->vgId,
};
if (syncUtilSameId(&(ths->myRaftId), &raftId)) {
pNewCfg->myIndex = i;
return 0;
}
}
return -1;
}
2022-06-25 12:31:42 +00:00
bool syncNodeIsOptimizedOneReplica(SSyncNode* ths, SRpcMsg* pMsg) {
return (ths->replicaNum == 1 && syncUtilUserCommit(pMsg->msgType) && ths->vgId != 1);
}
bool syncNodeInRaftGroup(SSyncNode* ths, SRaftId* pRaftId) {
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
if (syncUtilSameId(&((ths->replicasId)[i]), pRaftId)) {
return true;
}
}
return false;
2022-06-08 03:03:28 +00:00
}
SSyncSnapshotSender* syncNodeGetSnapshotSender(SSyncNode* ths, SRaftId* pDestId) {
SSyncSnapshotSender* pSender = NULL;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
2022-06-08 03:03:28 +00:00
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pSender = (ths->senders)[i];
}
}
return pSender;
2022-06-10 14:29:28 +00:00
}
2022-06-20 09:48:56 +00:00
SSyncTimer* syncNodeGetHbTimer(SSyncNode* ths, SRaftId* pDestId) {
SSyncTimer* pTimer = NULL;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pTimer = &((ths->peerHeartbeatTimerArr)[i]);
}
}
return pTimer;
}
2022-10-16 04:07:02 +00:00
SPeerState* syncNodeGetPeerState(SSyncNode* ths, const SRaftId* pDestId) {
SPeerState* pState = NULL;
2023-04-18 11:03:45 +00:00
for (int32_t i = 0; i < ths->totalReplicaNum; ++i) {
2022-10-16 04:07:02 +00:00
if (syncUtilSameId(pDestId, &((ths->replicasId)[i]))) {
pState = &((ths->peerStates)[i]);
}
}
return pState;
}
bool syncNodeNeedSendAppendEntries(SSyncNode* ths, const SRaftId* pDestId, const SyncAppendEntries* pMsg) {
SPeerState* pState = syncNodeGetPeerState(ths, pDestId);
2022-10-21 07:22:34 +00:00
if (pState == NULL) {
sError("vgId:%d, replica maybe dropped", ths->vgId);
2022-10-21 07:22:34 +00:00
return false;
}
2022-10-16 04:07:02 +00:00
SyncIndex sendIndex = pMsg->prevLogIndex + 1;
int64_t tsNow = taosGetTimestampMs();
if (pState->lastSendIndex == sendIndex && tsNow - pState->lastSendTime < SYNC_APPEND_ENTRIES_TIMEOUT_MS) {
return false;
}
return true;
}
2022-06-20 09:48:56 +00:00
bool syncNodeCanChange(SSyncNode* pSyncNode) {
if (pSyncNode->changing) {
sError("sync cannot change");
return false;
}
if ((pSyncNode->commitIndex >= SYNC_INDEX_BEGIN)) {
SyncIndex lastIndex = syncNodeGetLastIndex(pSyncNode);
if (pSyncNode->commitIndex != lastIndex) {
sError("sync cannot change2");
return false;
}
}
2022-11-07 15:31:01 +00:00
for (int32_t i = 0; i < pSyncNode->peersNum; ++i) {
2022-06-20 09:48:56 +00:00
SSyncSnapshotSender* pSender = syncNodeGetSnapshotSender(pSyncNode, &(pSyncNode->peersId)[i]);
2022-10-25 06:28:34 +00:00
if (pSender != NULL && pSender->start) {
2022-06-20 09:48:56 +00:00
sError("sync cannot change3");
return false;
}
}
return true;
2022-07-20 09:19:42 +00:00
}