################################################################### # Copyright (c) 2016 by TAOS Technologies, Inc. # All rights reserved. # # This file is proprietary and confidential to TAOS Technologies. # No part of this file may be reproduced, stored, transmitted, # disclosed or used in any form or by any means other than as # expressly provided by the written permission from Jianhui Tao # ################################################################### # -*- coding: utf-8 -*- import os import json import csv import datetime import frame import frame.eos import frame.etool from frame.log import * from frame.cases import * from frame.sql import * from frame.caseBase import * from frame import * class TDTestCase(TBase): def caseDescription(self): """ [TS-5089] taosBenchmark support exporting csv """ def clear_directory(self, target_dir: str = 'csv'): try: if not os.path.exists(target_dir): return for entry in os.listdir(target_dir): entry_path = os.path.join(target_dir, entry) if os.path.isfile(entry_path) or os.path.islink(entry_path): os.unlink(entry_path) else: shutil.rmtree(entry_path) tdLog.debug("clear succ, dir: %s " % (target_dir)) except OSError as e: tdLog.exit("clear fail, dir: %s " % (target_dir)) def convert_timestamp(self, ts, ts_format): dt_object = datetime.datetime.fromtimestamp(ts / 1000) formatted_time = dt_object.strftime(ts_format) return formatted_time def calc_time_slice_partitions(self, total_start_ts, total_end_ts, ts_step, ts_format, ts_interval): interval_days = int(ts_interval[:-1]) n_days_millis = interval_days * 24 * 60 * 60 * 1000 dt_start = datetime.datetime.fromtimestamp(total_start_ts / 1000.0) formatted_str = dt_start.strftime(ts_format) s0_dt = datetime.datetime.strptime(formatted_str, ts_format) s0 = int(s0_dt.timestamp() * 1000) partitions = [] current_s = s0 while current_s <= total_end_ts: current_end = current_s + n_days_millis start_actual = max(current_s, total_start_ts) end_actual = min(current_end, total_end_ts) if start_actual >= end_actual: count = 0 else: delta = end_actual - start_actual delta delta_start = start_actual - total_start_ts delta_end = end_actual - total_start_ts if delta % ts_step: count = delta // ts_step + 1 else: count = delta // ts_step partitions.append({ "start_ts": current_s, "end_ts": current_end, "start_time": self.convert_timestamp(current_s, ts_format), "end_time": self.convert_timestamp(current_end, ts_format), "count": count }) current_s += n_days_millis # partitions = [p for p in partitions if p['count'] > 0] return partitions def check_stb_csv_correct(self, csv_file_name, all_rows, interlace_rows): # open as csv tbname_idx = 14 count = 0 batch = 0 name = "" header = True with open(csv_file_name) as file: rows = csv.reader(file) for row in rows: if header: header = False continue # interlace_rows if name == "": name = row[tbname_idx] batch = 1 else: if name == row[tbname_idx]: batch += 1 else: # switch to another child table if batch != interlace_rows: tdLog.exit(f"interlace rows is not as expected. tbname={name}, actual: {batch}, expected: {interlace_rows}, count: {count}, csv_file_name: {csv_file_name}") batch = 1 name = row[tbname_idx] # count ++ count += 1 # batch if batch != interlace_rows: tdLog.exit(f"interlace rows is not as expected. tbname={name}, actual: {batch}, expected: {interlace_rows}, count: {count}, csv_file_name: {csv_file_name}") # check all rows if count != all_rows: tdLog.exit(f"total rows is not as expected. actual: {count}, expected: {all_rows}, csv_file_name: {csv_file_name}") tdLog.info(f"check generate csv file successfully. csv_file_name: {csv_file_name}, count: {count}, interlace_rows: {interlace_rows}") # check correct def check_stb_correct(self, data, db, stb): filepath = data["output_dir"] stbName = stb["name"] child_count = stb["childtable_to"] - stb["childtable_from"] insert_rows = stb["insert_rows"] interlace_rows = stb["interlace_rows"] csv_file_prefix = stb["csv_file_prefix"] csv_ts_format = stb.get("csv_ts_format", None) csv_ts_interval = stb.get("csv_ts_interval", None) ts_step = stb["timestamp_step"] total_start_ts = stb["start_timestamp"] total_end_ts = total_start_ts + ts_step * insert_rows all_rows = child_count * insert_rows if interlace_rows > 0: # interlace if not csv_ts_format: # normal csv_file_name = f"{filepath}{csv_file_prefix}.csv" self.check_stb_csv_correct(csv_file_name, all_rows, interlace_rows) else: # time slice partitions = self.calc_time_slice_partitions(total_start_ts, total_end_ts, ts_step, csv_ts_format, csv_ts_interval) for part in partitions: csv_file_name = f"{filepath}{csv_file_prefix}_{part['start_time']}_{part['end_time']}.csv" self.check_stb_csv_correct(csv_file_name, part['count'] * child_count, interlace_rows) else: # batch thread_count = stb["thread_count"] interlace_rows = insert_rows if not csv_ts_format: # normal for i in range(thread_count): csv_file_name = f"{filepath}{csv_file_prefix}_{i + 1}.csv" if i < child_count % thread_count: self.check_stb_csv_correct(csv_file_name, insert_rows * (child_count // thread_count + 1), interlace_rows) else: self.check_stb_csv_correct(csv_file_name, insert_rows * (child_count // thread_count), interlace_rows) else: # time slice for i in range(thread_count): partitions = self.calc_time_slice_partitions(total_start_ts, total_end_ts, ts_step, csv_ts_format, csv_ts_interval) for part in partitions: csv_file_name = f"{filepath}{csv_file_prefix}_{i + 1}_{part['start_time']}_{part['end_time']}.csv" if i < child_count % thread_count: slice_rows = part['count'] * (child_count // thread_count + 1) else: slice_rows = part['count'] * (child_count // thread_count) self.check_stb_csv_correct(csv_file_name, slice_rows, part['count']) # check result def check_result(self, json_file): # csv with open(json_file) as file: data = json.load(file) # read json database = data["databases"][0] stables = database["super_tables"] for stable in stables: # check csv context correct self.check_stb_correct(data, database, stable) def exec_benchmark(self, benchmark, json_file, options=""): cmd = f"{benchmark} {options} -f {json_file}" eos.exe(cmd) def check_export_csv_main(self, benchmark, json_file, options=""): # clear self.clear_directory() # exec self.exec_benchmark(benchmark, json_file, options) # check result self.check_result(json_file) def check_export_csv_others(self, benchmark, json_file, options=""): # clear self.clear_directory() # file ts interval second new_json_file = self.genNewJson(json_file, self.func_csv_ts_interval_second) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/data_20231115061320_20231115061321.csv", 10001) self.deleteFile(new_json_file) # file ts interval minute new_json_file = self.genNewJson(json_file, self.func_csv_ts_interval_minute) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/data_202311150613_202311150614.csv", 10001) self.deleteFile(new_json_file) # file ts interval hour new_json_file = self.genNewJson(json_file, self.func_csv_ts_interval_hour) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/data_2023111506_2023111507.csv", 10001) self.deleteFile(new_json_file) # db precision us new_json_file = self.genNewJson(json_file, self.func_db_precision_us) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/data_20231115_20231116.csv", 10001) self.deleteFile(new_json_file) # db precision ns new_json_file = self.genNewJson(json_file, self.func_db_precision_ns) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/data_20231115_20231116.csv", 10001) self.deleteFile(new_json_file) # thread num new_json_file = self.genNewJson(json_file, self.func_thread_num) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/data_10.csv", 1001) self.deleteFile(new_json_file) # create sql new_json_file = self.genNewJson(json_file, self.func_create_sql) self.exec_benchmark(benchmark, new_json_file, options) self.check_file_line_count("./csv/create_stmt.txt", 2) self.deleteFile(new_json_file) # gzip new_json_file = self.genNewJson(json_file, self.func_gzip) self.exec_benchmark(benchmark, new_json_file, options) eos.exe("gunzip ./csv/data.csv.gz") self.check_file_line_count("./csv/data.csv", 10001) self.deleteFile(new_json_file) def func_csv_ts_interval_second(self, data): db = data['databases'][0] stb = db["super_tables"][0] stb['timestamp_step'] = '10' stb['csv_ts_format'] = '%Y%m%d%H%M%S' stb['csv_ts_interval'] = '1s' def func_csv_ts_interval_minute(self, data): db = data['databases'][0] stb = db["super_tables"][0] stb['timestamp_step'] = '600' stb['csv_ts_format'] = '%Y%m%d%H%M' stb['csv_ts_interval'] = '1m' def func_csv_ts_interval_hour(self, data): db = data['databases'][0] stb = db["super_tables"][0] stb['timestamp_step'] = '36000' stb['csv_ts_format'] = '%Y%m%d%H' stb['csv_ts_interval'] = '1h' def func_db_precision_us(self, data): db = data['databases'][0] db['dbinfo']['precision'] = 'us' stb = db["super_tables"][0] stb['start_timestamp'] = 1700000000000000 def func_db_precision_ns(self, data): db = data['databases'][0] db['dbinfo']['precision'] = 'ns' stb = db["super_tables"][0] stb['start_timestamp'] = 1700000000000000000 def func_thread_num(self, data): data['thread_count'] = 12 db = data['databases'][0] stb = db["super_tables"][0] stb.pop('interlace_rows', None) stb.pop('csv_ts_format', None) stb.pop('csv_ts_interval', None) def func_create_sql(self, data): db = data['databases'][0] dbinfo = db['dbinfo'] dbinfo['buffer'] = 256 dbinfo['cachemode'] = 'none' stb = db["super_tables"][0] stb['primary_key'] = 1 stb['columns'][0] = { "type": "bool", "name": "bc", "encode": 'simple8b', 'compress': 'lz4', 'level': 'medium'} stb['comment'] = "csv export sample" stb['delay'] = 10 stb['file_factor'] = 20 stb['rollup'] = 'min' stb['max_delay'] = '300s' stb['watermark'] = '10m' stb['columns'][1] = { "type": "float", "name": "fc", "min": 1, "sma": "yes"} stb['columns'][2] = { "type": "double", "name": "dc", "min":10, "max":10, "sma": "yes"} def func_gzip(self, data): db = data['databases'][0] stb = db["super_tables"][0] stb.pop('csv_ts_format', None) stb.pop('csv_ts_interval', None) stb['csv_compress_level'] = "fast" def check_file_line_count(self, filename, expected_lines): try: with open(filename, 'r', encoding='utf-8') as file: actual_lines = sum(1 for line in file) if expected_lines >= 0: is_correct = actual_lines == expected_lines if not is_correct: tdLog.exit(f"check csv data failed, actual: {actual_lines}, expected: {expected_lines}, filename: {filename}") except FileNotFoundError: tdLog.exit(f"check csv data failed, file not exists. filename: {filename}") def run(self): # path benchmark = etool.benchMarkFile() # check normal json_file = "tools/benchmark/basic/json/csv-export.json" self.check_export_csv_main(benchmark, json_file) # check others json_file = "tools/benchmark/basic/json/csv-export-template.json" self.check_export_csv_others(benchmark, json_file) def stop(self): tdSql.close() tdLog.success("%s successfully executed" % __file__) tdCases.addWindows(__file__, TDTestCase()) tdCases.addLinux(__file__, TDTestCase())