TDengine/include/util/tcompression.h

393 lines
17 KiB
C
Raw Normal View History

2019-07-11 08:36:16 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2022-02-28 02:47:44 +00:00
#ifndef _TD_UTIL_COMPRESSION_H_
#define _TD_UTIL_COMPRESSION_H_
#include "os.h"
#include "taos.h"
#include "tutil.h"
2019-07-11 08:36:16 +00:00
#ifdef __cplusplus
extern "C" {
#endif
2020-04-27 09:40:57 +00:00
#define COMP_OVERFLOW_BYTES 2
2022-02-28 02:47:44 +00:00
#define BITS_PER_BYTE 8
2019-07-11 08:36:16 +00:00
// Masks
2020-09-30 03:02:52 +00:00
#define INT64MASK(_x) ((((uint64_t)1) << _x) - 1)
2019-07-11 08:36:16 +00:00
#define INT32MASK(_x) (((uint32_t)1 << _x) - 1)
2022-02-28 02:47:44 +00:00
#define INT8MASK(_x) (((uint8_t)1 << _x) - 1)
2019-07-11 08:36:16 +00:00
// Compression algorithm
#define NO_COMPRESSION 0
#define ONE_STAGE_COMP 1
#define TWO_STAGE_COMP 2
2021-07-02 02:08:31 +00:00
//
// compressed data first byte foramt
// ------ 7 bit ---- | ---- 1 bit ----
// algorithm mode
//
// compression data mode save first byte lower 1 bit
2022-02-28 02:47:44 +00:00
#define MODE_NOCOMPRESS 0 // original data
#define MODE_COMPRESS 1 // compatible old compress
2021-07-02 02:08:31 +00:00
// compression algorithm save first byte higher 7 bit
2022-02-28 02:47:44 +00:00
#define ALGO_SZ_LOSSY 1 // SZ compress
#define HEAD_MODE(x) x % 2
#define HEAD_ALGO(x) x / 2
extern int32_t tsCompressINTImp(const char *const input, const int32_t nelements, char *const output, const char type);
extern int32_t tsDecompressINTImp(const char *const input, const int32_t nelements, char *const output,
const char type);
extern int32_t tsCompressBoolImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsDecompressBoolImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsCompressStringImp(const char *const input, int32_t inputSize, char *const output, int32_t outputSize);
extern int32_t tsDecompressStringImp(const char *const input, int32_t compressedSize, char *const output,
int32_t outputSize);
extern int32_t tsCompressTimestampImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsDecompressTimestampImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsCompressDoubleImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsDecompressDoubleImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsCompressFloatImp(const char *const input, const int32_t nelements, char *const output);
extern int32_t tsDecompressFloatImp(const char *const input, const int32_t nelements, char *const output);
2021-06-28 09:51:57 +00:00
// lossy
2022-02-28 02:47:44 +00:00
extern int32_t tsCompressFloatLossyImp(const char *input, const int32_t nelements, char *const output);
extern int32_t tsDecompressFloatLossyImp(const char *input, int32_t compressedSize, const int32_t nelements,
char *const output);
extern int32_t tsCompressDoubleLossyImp(const char *input, const int32_t nelements, char *const output);
extern int32_t tsDecompressDoubleLossyImp(const char *input, int32_t compressedSize, const int32_t nelements,
char *const output);
2020-04-30 07:07:51 +00:00
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-07 12:39:49 +00:00
extern bool lossyFloat;
extern bool lossyDouble;
2022-02-28 02:47:44 +00:00
int32_t tsCompressInit();
void tsCompressExit();
#endif
2021-07-02 02:08:31 +00:00
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressTinyint(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_TINYINT);
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressTinyint(const char *const input, int32_t compressedSize,
const int32_t nelements, char *const output, int32_t outputSize,
char algorithm, char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_TINYINT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressSmallint(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_SMALLINT);
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressSmallint(const char *const input, int32_t compressedSize,
const int32_t nelements, char *const output, int32_t outputSize,
char algorithm, char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_SMALLINT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressInt(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_INT);
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressInt(const char *const input, int32_t compressedSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_INT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressBigint(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_BIGINT);
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressBigint(const char *const input, int32_t compressedSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_BIGINT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressBool(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressBoolImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressBoolImp(input, nelements, buffer);
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressBool(const char *const input, int32_t compressedSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressBoolImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressBoolImp(buffer, nelements, output);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressString(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(input, inputSize, output, outputSize);
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressString(const char *const input, int32_t compressedSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
return tsDecompressStringImp(input, compressedSize, output, outputSize);
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressFloat(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-02 02:08:31 +00:00
// lossy mode
2022-02-28 02:47:44 +00:00
if (lossyFloat) {
2021-07-02 02:08:31 +00:00
return tsCompressFloatLossyImp(input, nelements, output);
2022-02-28 02:47:44 +00:00
// lossless mode
2020-04-30 07:07:51 +00:00
} else {
2022-02-28 02:47:44 +00:00
#endif
2021-07-02 02:08:31 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressFloatImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressFloatImp(input, nelements, buffer);
2021-07-21 06:51:20 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
2021-07-02 02:08:31 +00:00
} else {
assert(0);
return -1;
2022-02-28 02:47:44 +00:00
}
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
#endif
2020-04-30 07:07:51 +00:00
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressFloat(const char *const input, int32_t compressedSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2022-02-28 02:47:44 +00:00
if (HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY) {
2021-07-02 02:08:31 +00:00
// decompress lossy
return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
2020-04-30 07:07:51 +00:00
} else {
2022-02-28 02:47:44 +00:00
#endif
2021-07-02 02:08:31 +00:00
// decompress lossless
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressFloatImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
return tsDecompressFloatImp(buffer, nelements, output);
} else {
assert(0);
return -1;
}
2022-02-28 02:47:44 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
#endif
2020-04-30 07:07:51 +00:00
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressDouble(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm, char *const buffer,
int32_t bufferSize) {
#ifdef TD_TSZ
if (lossyDouble) {
2021-07-02 02:08:31 +00:00
// lossy mode
return tsCompressDoubleLossyImp(input, nelements, output);
2020-04-30 07:07:51 +00:00
} else {
2022-02-28 02:47:44 +00:00
#endif
2021-07-02 02:08:31 +00:00
// lossless mode
if (algorithm == ONE_STAGE_COMP) {
return tsCompressDoubleImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressDoubleImp(input, nelements, buffer);
2021-07-02 02:08:31 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
return -1;
}
2022-02-28 02:47:44 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
2022-02-28 02:47:44 +00:00
#endif
2020-04-30 07:07:51 +00:00
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressDouble(const char *const input, int32_t compressedSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
#ifdef TD_TSZ
if (HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY) {
2021-07-02 02:08:31 +00:00
// decompress lossy
return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
2020-04-30 07:07:51 +00:00
} else {
2022-02-28 02:47:44 +00:00
#endif
2021-07-02 02:08:31 +00:00
// decompress lossless
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressDoubleImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
return tsDecompressDoubleImp(buffer, nelements, output);
} else {
assert(0);
return -1;
}
2022-02-28 02:47:44 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
2022-02-28 02:47:44 +00:00
#endif
2020-04-30 07:07:51 +00:00
}
2022-02-28 02:47:44 +00:00
#ifdef TD_TSZ
2021-06-28 09:51:57 +00:00
//
// lossy float double
//
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressFloatLossy(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2021-06-28 09:51:57 +00:00
return tsCompressFloatLossyImp(input, nelements, output);
}
2021-06-24 11:58:45 +00:00
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressFloatLossy(const char *const input, int32_t compressedSize,
const int32_t nelements, char *const output, int32_t outputSize,
char algorithm, char *const buffer, int32_t bufferSize) {
2021-06-28 09:51:57 +00:00
return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
}
2021-06-24 11:58:45 +00:00
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressDoubleLossy(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2021-06-28 09:51:57 +00:00
return tsCompressDoubleLossyImp(input, nelements, output);
}
2021-06-24 11:58:45 +00:00
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressDoubleLossy(const char *const input, int32_t compressedSize,
const int32_t nelements, char *const output, int32_t outputSize,
char algorithm, char *const buffer, int32_t bufferSize) {
2021-06-28 09:51:57 +00:00
return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
}
2021-06-24 11:58:45 +00:00
#endif
2021-06-24 11:58:45 +00:00
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsCompressTimestamp(const char *const input, int32_t inputSize, const int32_t nelements,
char *const output, int32_t outputSize, char algorithm,
char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressTimestampImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
2022-02-28 02:47:44 +00:00
int32_t len = tsCompressTimestampImp(input, nelements, buffer);
2020-04-30 07:07:51 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2022-02-28 02:47:44 +00:00
static FORCE_INLINE int32_t tsDecompressTimestamp(const char *const input, int32_t compressedSize,
const int32_t nelements, char *const output, int32_t outputSize,
char algorithm, char *const buffer, int32_t bufferSize) {
2020-04-30 07:07:51 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressTimestampImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressTimestampImp(buffer, nelements, output);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2019-07-11 08:36:16 +00:00
#ifdef __cplusplus
}
#endif
2022-02-28 02:47:44 +00:00
#endif /*_TD_UTIL_COMPRESSION_H_*/