2024-10-17 09:04:34 +00:00
package cmd
import (
"bytes"
"context"
"crypto/tls"
"fmt"
"io"
"net"
"net/http"
"net/url"
"strconv"
"sync"
"time"
"github.com/sirupsen/logrus"
"github.com/taosdata/taoskeeper/db"
"github.com/taosdata/taoskeeper/infrastructure/config"
"github.com/taosdata/taoskeeper/infrastructure/log"
"github.com/taosdata/taoskeeper/util"
"github.com/taosdata/taoskeeper/util/pool"
)
var logger = log . GetLogger ( "CMD" )
var MAX_SQL_LEN = 1000000
type Command struct {
fromTime time . Time
client * http . Client
conn * db . Connector
username string
password string
url * url . URL
}
func NewCommand ( conf * config . Config ) * Command {
client := & http . Client {
Transport : & http . Transport {
Proxy : http . ProxyFromEnvironment ,
DialContext : ( & net . Dialer {
Timeout : 30 * time . Second ,
KeepAlive : 30 * time . Second ,
} ) . DialContext ,
IdleConnTimeout : 90 * time . Second ,
TLSHandshakeTimeout : 10 * time . Second ,
ExpectContinueTimeout : 1 * time . Second ,
DisableCompression : true ,
TLSClientConfig : & tls . Config {
InsecureSkipVerify : true ,
} ,
} ,
}
conn , err := db . NewConnectorWithDb ( conf . TDengine . Username , conf . TDengine . Password , conf . TDengine . Host , conf . TDengine . Port , conf . Metrics . Database . Name , conf . TDengine . Usessl )
if err != nil {
logger . Errorf ( "init db connect error, msg:%s" , err )
panic ( err )
}
2025-01-06 14:35:08 +00:00
return & Command {
2024-10-17 09:04:34 +00:00
client : client ,
conn : conn ,
username : conf . TDengine . Username ,
password : conf . TDengine . Password ,
url : & url . URL {
Scheme : "http" ,
Host : fmt . Sprintf ( "%s:%d" , conf . TDengine . Host , conf . TDengine . Port ) ,
Path : "/influxdb/v1/write" ,
RawQuery : fmt . Sprintf ( "db=%s&precision=ms" , conf . Metrics . Database . Name ) ,
} ,
}
}
func ( cmd * Command ) Process ( conf * config . Config ) {
if len ( conf . Transfer ) > 0 && len ( conf . Drop ) > 0 {
logger . Errorf ( "transfer and drop can't be set at the same time" )
return
}
if len ( conf . Transfer ) > 0 && conf . Transfer != "old_taosd_metric" {
logger . Errorf ( "transfer only support old_taosd_metric" )
return
}
if conf . Transfer == "old_taosd_metric" {
cmd . ProcessTransfer ( conf )
return
}
if len ( conf . Drop ) > 0 && conf . Drop != "old_taosd_metric_stables" {
logger . Errorf ( "drop only support old_taosd_metric_stables" )
return
}
if conf . Drop == "old_taosd_metric_stables" {
cmd . ProcessDrop ( conf )
return
}
}
func ( cmd * Command ) ProcessTransfer ( conf * config . Config ) {
2025-01-06 14:35:08 +00:00
fromTime , err := time . Parse ( time . RFC3339 , conf . FromTime )
2024-10-17 09:04:34 +00:00
if err != nil {
logger . Errorf ( "parse fromTime error, msg:%s" , err )
return
}
cmd . fromTime = fromTime
funcs := [ ] func ( ) error {
cmd . TransferTaosdClusterBasicInfo ,
cmd . TransferTaosdClusterInfo ,
cmd . TransferTaosdVgroupsInfo ,
cmd . TransferTaosdDnodesInfo ,
cmd . TransferTaosdDnodesStatus ,
cmd . TransferTaosdDnodesLogDirs1 ,
cmd . TransferTaosdDnodesLogDirs2 ,
cmd . TransferTaosdDnodesDataDirs ,
cmd . TransferTaosdMnodesInfo ,
cmd . TransferTaosdVnodesInfo ,
}
wg := sync . WaitGroup { }
wg . Add ( len ( funcs ) )
for i := range funcs {
index := i
err := pool . GoroutinePool . Submit ( func ( ) {
defer wg . Done ( )
funcs [ index ] ( )
} )
if err != nil {
panic ( err )
}
}
wg . Wait ( )
logger . Info ( "transfer all old taosd metric success!!" )
}
func ( cmd * Command ) TransferTaosdClusterInfo ( ) error {
sql := "select a.cluster_id, master_uptime * 3600 * 24 as cluster_uptime, dbs_total, tbs_total, stbs_total, dnodes_total, dnodes_alive, mnodes_total, mnodes_alive, vgroups_total, vgroups_alive, vnodes_total, vnodes_alive, connections_total, topics_total, streams_total, b.expire_time as grants_expire_time, b.timeseries_used as grants_timeseries_used, b.timeseries_total as grants_timeseries_total, a.ts from cluster_info a, grants_info b where a.ts = b.ts and a.cluster_id = b.cluster_id and"
dstTable := "taosd_cluster_info"
return cmd . TransferTableToDst ( sql , dstTable , 1 )
}
func ( cmd * Command ) TransferTaosdVgroupsInfo ( ) error {
sql := "select cluster_id, vgroup_id, database_name, tables_num, CASE status WHEN 'ready' THEN 1 ELSE 0 END as status, ts from vgroups_info a where "
dstTable := "taosd_vgroups_info"
return cmd . TransferTableToDst ( sql , dstTable , 3 )
}
func ( cmd * Command ) TransferTaosdDnodesInfo ( ) error {
sql := "select a.cluster_id, a.dnode_id, a.dnode_ep, uptime * 3600 * 24 as uptime, cpu_engine, cpu_system, cpu_cores, mem_engine, mem_system as mem_free, mem_total, disk_used, disk_total, disk_engine, net_in as system_net_in, net_out as system_net_out, io_read, io_write, io_read_disk, io_write_disk, vnodes_num, masters, has_mnode, has_qnode, has_snode, has_bnode, errors, b.error as error_log_count, b.info as info_log_count, b.debug as debug_log_count, b.trace as trace_log_count, a.ts as ts from dnodes_info a, log_summary b where a.ts = b.ts and a.dnode_id = b.dnode_id and a. dnode_ep = b.dnode_ep and "
dstTable := "taosd_dnodes_info"
return cmd . TransferTableToDst ( sql , dstTable , 3 )
}
2025-01-16 01:17:16 +00:00
2024-10-17 09:04:34 +00:00
func ( cmd * Command ) TransferTaosdDnodesStatus ( ) error {
sql := "select cluster_id, dnode_id, dnode_ep, CASE status WHEN 'ready' THEN 1 ELSE 0 END as status, ts from d_info a where "
dstTable := "taosd_dnodes_status"
return cmd . TransferTableToDst ( sql , dstTable , 3 )
}
func ( cmd * Command ) TransferTaosdDnodesLogDirs1 ( ) error {
sql := "select cluster_id, dnode_id, dnode_ep, name as log_dir_name, avail, used, total, ts from log_dir a where "
dstTable := "taosd_dnodes_log_dirs"
return cmd . TransferTableToDst ( sql , dstTable , 4 )
}
2025-01-16 01:17:16 +00:00
2024-10-17 09:04:34 +00:00
func ( cmd * Command ) TransferTaosdDnodesLogDirs2 ( ) error {
sql := "select cluster_id, dnode_id, dnode_ep, name as log_dir_name, avail, used, total, ts from temp_dir a where "
dstTable := "taosd_dnodes_log_dirs"
return cmd . TransferTableToDst ( sql , dstTable , 4 )
}
func ( cmd * Command ) TransferTaosdDnodesDataDirs ( ) error {
sql := "select cluster_id, dnode_id, dnode_ep, name as data_dir_name, `level` as data_dir_level, avail, used, total, ts from data_dir a where "
dstTable := "taosd_dnodes_data_dirs"
return cmd . TransferTableToDst ( sql , dstTable , 5 )
}
func ( cmd * Command ) TransferTaosdMnodesInfo ( ) error {
sql := "select cluster_id, mnode_id, mnode_ep, CASE role WHEN 'offline' THEN 0 WHEN 'follower' THEN 100 WHEN 'candidate' THEN 101 WHEN 'leader' THEN 102 WHEN 'learner' THEN 104 ELSE 103 END as role, ts from m_info a where "
dstTable := "taosd_mnodes_info"
return cmd . TransferTableToDst ( sql , dstTable , 3 )
}
func ( cmd * Command ) TransferTaosdVnodesInfo ( ) error {
sql := "select cluster_id, 0 as vgroup_id, 'UNKNOWN' as database_name, dnode_id, CASE vnode_role WHEN 'offline' THEN 0 WHEN 'follower' THEN 100 WHEN 'candidate' THEN 101 WHEN 'leader' THEN 102 WHEN 'learner' THEN 104 ELSE 103 END as role, ts from vnodes_role a where "
dstTable := "taosd_vnodes_info"
return cmd . TransferTableToDst ( sql , dstTable , 4 )
}
func ( cmd * Command ) ProcessDrop ( conf * config . Config ) {
var dropStableList = [ ] string {
"log_dir" ,
"dnodes_info" ,
"data_dir" ,
"log_summary" ,
"m_info" ,
"vnodes_role" ,
"cluster_info" ,
"temp_dir" ,
"grants_info" ,
"vgroups_info" ,
"d_info" ,
"taosadapter_system_cpu_percent" ,
"taosadapter_restful_http_request_in_flight" ,
"taosadapter_restful_http_request_summary_milliseconds" ,
"taosadapter_restful_http_request_fail" ,
"taosadapter_system_mem_percent" ,
"taosadapter_restful_http_request_total" ,
}
ctx := context . Background ( )
logger . Infof ( "use database:%s" , conf . Metrics . Database . Name )
for _ , stable := range dropStableList {
2025-06-01 11:30:02 +00:00
if _ , err := cmd . conn . Exec ( ctx , "DROP STABLE IF EXISTS " + stable , util . GetQidOwn ( config . Conf . InstanceID ) ) ; err != nil {
2024-10-17 09:04:34 +00:00
logger . Errorf ( "drop stable %s, error:%s" , stable , err )
panic ( err )
}
}
logger . Info ( "drop old taosd metric stables success!!" )
}
func ( cmd * Command ) TransferDataToDest ( data * db . Data , dstTable string , tagNum int ) {
if len ( data . Data ) < 1 {
return
}
2025-01-16 01:17:16 +00:00
var buf bytes . Buffer
2024-10-17 09:04:34 +00:00
for _ , row := range data . Data {
// get one row here
buf . WriteString ( dstTable )
// write tags
var tag string
for j := 0 ; j < tagNum ; j ++ {
switch v := row [ j ] . ( type ) {
case int :
tag = fmt . Sprint ( v )
case int32 :
tag = fmt . Sprint ( v )
case int64 :
tag = fmt . Sprint ( v )
case string :
tag = v
default :
panic ( fmt . Sprintf ( "Unexpected type for row[%d]: %T" , j , row [ j ] ) )
}
if tag != "" {
buf . WriteString ( fmt . Sprintf ( ",%s=%s" , data . Head [ j ] , util . EscapeInfluxProtocol ( tag ) ) )
} else {
buf . WriteString ( fmt . Sprintf ( ",%s=%s" , data . Head [ j ] , "unknown" ) )
logger . Errorf ( "tag value is empty, tag_name:%s" , data . Head [ j ] )
}
}
buf . WriteString ( " " )
// write metrics
for j := tagNum ; j < len ( row ) - 1 ; j ++ {
switch v := row [ j ] . ( type ) {
case int :
buf . WriteString ( fmt . Sprintf ( "%s=%ff64" , data . Head [ j ] , float64 ( v ) ) )
case int32 :
buf . WriteString ( fmt . Sprintf ( "%s=%ff64" , data . Head [ j ] , float64 ( v ) ) )
case int64 :
buf . WriteString ( fmt . Sprintf ( "%s=%ff64" , data . Head [ j ] , float64 ( v ) ) )
case float32 :
buf . WriteString ( fmt . Sprintf ( "%s=%sf64" , data . Head [ j ] , strconv . FormatFloat ( float64 ( v ) , 'f' , - 1 , 64 ) ) )
case float64 :
buf . WriteString ( fmt . Sprintf ( "%s=%sf64" , data . Head [ j ] , strconv . FormatFloat ( v , 'f' , - 1 , 64 ) ) )
default :
panic ( fmt . Sprintf ( "Unexpected type for row[%d]: %T" , j , row [ j ] ) )
}
if j != len ( row ) - 2 {
buf . WriteString ( "," )
}
}
// write timestamp
buf . WriteString ( " " )
buf . WriteString ( fmt . Sprintf ( "%v" , row [ len ( row ) - 1 ] . ( time . Time ) . UnixMilli ( ) ) )
buf . WriteString ( "\n" )
if buf . Len ( ) >= MAX_SQL_LEN {
if logger . Logger . IsLevelEnabled ( logrus . TraceLevel ) {
logger . Tracef ( "buf:%v" , buf . String ( ) )
}
2025-01-16 01:17:16 +00:00
if err := cmd . lineWriteBody ( & buf ) ; err != nil {
2024-10-17 09:04:34 +00:00
logger . Errorf ( "insert data error, msg:%s" , err )
panic ( err )
}
buf . Reset ( )
}
}
if buf . Len ( ) > 0 {
if logger . Logger . IsLevelEnabled ( logrus . TraceLevel ) {
logger . Tracef ( "buf:%v" , buf . String ( ) )
}
2025-01-16 01:17:16 +00:00
if err := cmd . lineWriteBody ( & buf ) ; err != nil {
2024-10-17 09:04:34 +00:00
logger . Errorf ( "insert data error, msg:%s" , err )
panic ( err )
}
}
}
// cluster_info
func ( cmd * Command ) TransferTaosdClusterBasicInfo ( ) error {
ctx := context . Background ( )
endTime := time . Now ( )
delta := time . Hour * 24 * 10
var createTableSql = "create stable if not exists taosd_cluster_basic " +
2024-12-03 03:45:57 +00:00
"(ts timestamp, first_ep varchar(255), first_ep_dnode_id INT, cluster_version varchar(20)) " +
2024-10-17 09:04:34 +00:00
"tags (cluster_id varchar(50))"
2025-06-01 11:30:02 +00:00
if _ , err := cmd . conn . Exec ( ctx , createTableSql , util . GetQidOwn ( config . Conf . InstanceID ) ) ; err != nil {
2024-10-17 09:04:34 +00:00
logger . Errorf ( "create taosd_cluster_basic error, msg:%s" , err )
return err
}
logger . Tracef ( "fromeTime:%d" , cmd . fromTime . UnixMilli ( ) )
for current := cmd . fromTime ; current . Before ( endTime ) ; current = current . Add ( time . Duration ( delta ) ) {
querySql := fmt . Sprintf ( "select cluster_id, first_ep, first_ep_dnode_id, `version` as cluster_version, ts from cluster_info where ts > %d and ts <= %d" ,
current . UnixMilli ( ) , current . Add ( time . Duration ( delta ) ) . UnixMilli ( ) )
logger . Tracef ( "query sql:%s" , querySql )
2025-06-01 11:30:02 +00:00
data , err := cmd . conn . Query ( ctx , querySql , util . GetQidOwn ( config . Conf . InstanceID ) )
2024-10-17 09:04:34 +00:00
if err != nil {
logger . Errorf ( "query cluster_info error, msg:%s" , err )
return err
}
// transfer data to new table, only this table need use insert statement
var buf bytes . Buffer
2025-08-26 07:07:51 +00:00
// Use a map to split the 2D array into multiple 2D arrays
2024-10-17 09:04:34 +00:00
result := make ( map [ string ] [ ] [ ] interface { } )
for _ , row := range data . Data {
key := row [ 0 ] . ( string ) // 使用第一列的值作为 key
result [ key ] = append ( result [ key ] , row )
}
2025-08-26 07:07:51 +00:00
// Migrate data according to different tags
2024-10-17 09:04:34 +00:00
for _ , dataByCluster := range result {
buf . Reset ( )
for _ , row := range dataByCluster {
if len ( buf . Bytes ( ) ) == 0 {
sql := fmt . Sprintf (
"insert into taosd_cluster_basic_%s using taosd_cluster_basic tags ('%s') values " ,
row [ 0 ] . ( string ) , row [ 0 ] . ( string ) )
buf . WriteString ( sql )
}
sql := fmt . Sprintf (
"(%d, '%s', %d, '%s')" ,
row [ 4 ] . ( time . Time ) . UnixMilli ( ) , row [ 1 ] . ( string ) , row [ 2 ] . ( int32 ) , row [ 3 ] . ( string ) )
buf . WriteString ( sql )
if buf . Len ( ) >= MAX_SQL_LEN {
2025-06-01 11:30:02 +00:00
rowsAffected , err := cmd . conn . Exec ( context . Background ( ) , buf . String ( ) , util . GetQidOwn ( config . Conf . InstanceID ) )
2024-10-17 09:04:34 +00:00
if err != nil {
logger . Errorf ( "insert taosd_cluster_basic error, msg:%s" , err )
return err
}
if rowsAffected <= 0 {
logger . Errorf ( "insert taosd_cluster_basic failed, rowsAffected:%d" , rowsAffected )
}
buf . Reset ( )
}
}
if buf . Len ( ) > 0 {
2025-06-01 11:30:02 +00:00
rowsAffected , err := cmd . conn . Exec ( context . Background ( ) , buf . String ( ) , util . GetQidOwn ( config . Conf . InstanceID ) )
2024-10-17 09:04:34 +00:00
if err != nil {
logger . Errorf ( "insert taosd_cluster_basic error, msg:%s" , err )
return err
}
if rowsAffected <= 0 {
logger . Errorf ( "insert taosd_cluster_basic failed, rowsAffected:%d" , rowsAffected )
}
}
}
}
logger . Info ( "transfer stable taosd_cluster_basic success!!" )
return nil
}
// cluster_info
func ( cmd * Command ) TransferTableToDst ( sql string , dstTable string , tagNum int ) error {
ctx := context . Background ( )
endTime := time . Now ( )
delta := time . Hour * 24 * 10
logger . Tracef ( "fromTime:%d" , cmd . fromTime . UnixMilli ( ) )
for current := cmd . fromTime ; current . Before ( endTime ) ; current = current . Add ( time . Duration ( delta ) ) {
querySql := fmt . Sprintf ( sql + " a.ts > %d and a.ts <= %d" ,
current . UnixMilli ( ) , current . Add ( time . Duration ( delta ) ) . UnixMilli ( ) )
logger . Tracef ( "query sql:%s" , querySql )
2025-06-01 11:30:02 +00:00
data , err := cmd . conn . Query ( ctx , querySql , util . GetQidOwn ( config . Conf . InstanceID ) )
2024-10-17 09:04:34 +00:00
if err != nil {
logger . Errorf ( "query cluster_info error, msg:%s" , err )
return err
}
// transfer data to new table, only this table need use insert statement
cmd . TransferDataToDest ( data , dstTable , tagNum )
}
logger . Info ( "transfer stable " + dstTable + " success!!" )
return nil
}
func ( cmd * Command ) lineWriteBody ( buf * bytes . Buffer ) error {
header := map [ string ] [ ] string {
"Connection" : { "keep-alive" } ,
}
req := & http . Request {
Method : http . MethodPost ,
URL : cmd . url ,
Proto : "HTTP/1.1" ,
ProtoMajor : 1 ,
ProtoMinor : 1 ,
Header : header ,
Host : cmd . url . Host ,
}
req . SetBasicAuth ( cmd . username , cmd . password )
req . Body = io . NopCloser ( buf )
resp , err := cmd . client . Do ( req )
if err != nil {
logger . Errorf ( "writing metrics exception, msg:%s" , err )
return err
}
defer resp . Body . Close ( )
2025-01-16 01:17:16 +00:00
2024-10-17 09:04:34 +00:00
if resp . StatusCode != http . StatusNoContent {
body , _ := io . ReadAll ( resp . Body )
return fmt . Errorf ( "unexpected status code %d:body:%s" , resp . StatusCode , string ( body ) )
}
return nil
}