TDengine/include/util/tcompression.h

369 lines
15 KiB
C
Raw Normal View History

2019-07-11 08:36:16 +00:00
/*
* Copyright (c) 2019 TAOS Data, Inc. <jhtao@taosdata.com>
*
* This program is free software: you can use, redistribute, and/or modify
* it under the terms of the GNU Affero General Public License, version 3
* or later ("AGPL"), as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
2021-10-04 12:42:53 +00:00
#ifndef _TD_UTIL_COMPRESSION_H
#define _TD_UTIL_COMPRESSION_H
2019-07-11 08:36:16 +00:00
#ifdef __cplusplus
extern "C" {
#endif
2021-09-30 13:21:51 +00:00
#include "tdef.h"
2020-04-30 07:07:51 +00:00
#include "tutil.h"
2019-07-11 08:36:16 +00:00
2020-04-27 09:40:57 +00:00
#define COMP_OVERFLOW_BYTES 2
2019-07-11 08:36:16 +00:00
#define BITS_PER_BYTE 8
// Masks
2020-09-30 03:02:52 +00:00
#define INT64MASK(_x) ((((uint64_t)1) << _x) - 1)
2019-07-11 08:36:16 +00:00
#define INT32MASK(_x) (((uint32_t)1 << _x) - 1)
#define INT8MASK(_x) (((uint8_t)1 << _x) - 1)
// Compression algorithm
#define NO_COMPRESSION 0
#define ONE_STAGE_COMP 1
#define TWO_STAGE_COMP 2
2021-07-02 02:08:31 +00:00
//
// compressed data first byte foramt
// ------ 7 bit ---- | ---- 1 bit ----
// algorithm mode
//
// compression data mode save first byte lower 1 bit
#define MODE_NOCOMPRESS 0 // original data
#define MODE_COMPRESS 1 // compatible old compress
// compression algorithm save first byte higher 7 bit
#define ALGO_SZ_LOSSY 1 // SZ compress
#define HEAD_MODE(x) x%2
#define HEAD_ALGO(x) x/2
2020-04-30 07:07:51 +00:00
extern int tsCompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsDecompressINTImp(const char *const input, const int nelements, char *const output, const char type);
extern int tsCompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressBoolImp(const char *const input, const int nelements, char *const output);
extern int tsCompressStringImp(const char *const input, int inputSize, char *const output, int outputSize);
extern int tsDecompressStringImp(const char *const input, int compressedSize, char *const output, int outputSize);
extern int tsCompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressTimestampImp(const char *const input, const int nelements, char *const output);
extern int tsCompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressDoubleImp(const char *const input, const int nelements, char *const output);
extern int tsCompressFloatImp(const char *const input, const int nelements, char *const output);
extern int tsDecompressFloatImp(const char *const input, const int nelements, char *const output);
2021-06-28 09:51:57 +00:00
// lossy
2021-07-07 12:39:49 +00:00
extern int tsCompressFloatLossyImp(const char * input, const int nelements, char *const output);
extern int tsDecompressFloatLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
extern int tsCompressDoubleLossyImp(const char * input, const int nelements, char *const output);
extern int tsDecompressDoubleLossyImp(const char * input, int compressedSize, const int nelements, char *const output);
2020-04-30 07:07:51 +00:00
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-07 12:39:49 +00:00
extern bool lossyFloat;
extern bool lossyDouble;
// init call
int tsCompressInit();
// exit call
void tsCompressExit();
#endif
2021-07-02 02:08:31 +00:00
2020-04-30 07:07:51 +00:00
static FORCE_INLINE int tsCompressTinyint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_TINYINT);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsDecompressTinyint(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_TINYINT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_TINYINT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsCompressSmallint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_SMALLINT);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsDecompressSmallint(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_SMALLINT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_SMALLINT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsCompressInt(const char *const input, int inputSize, const int nelements, char *const output, int outputSize, char algorithm,
char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_INT);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsDecompressInt(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_INT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_INT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsCompressBigint(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsCompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressINTImp(input, nelements, buffer, TSDB_DATA_TYPE_BIGINT);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsDecompressBigint(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressINTImp(input, nelements, output, TSDB_DATA_TYPE_BIGINT);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressINTImp(buffer, nelements, output, TSDB_DATA_TYPE_BIGINT);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsCompressBool(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsCompressBoolImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressBoolImp(input, nelements, buffer);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsDecompressBool(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressBoolImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressBoolImp(buffer, nelements, output);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsCompressString(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
return tsCompressStringImp(input, inputSize, output, outputSize);
}
static FORCE_INLINE int tsDecompressString(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
return tsDecompressStringImp(input, compressedSize, output, outputSize);
}
static FORCE_INLINE int tsCompressFloat(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-02 02:08:31 +00:00
// lossy mode
2021-07-07 12:39:49 +00:00
if(lossyFloat) {
2021-07-02 02:08:31 +00:00
return tsCompressFloatLossyImp(input, nelements, output);
// lossless mode
2020-04-30 07:07:51 +00:00
} else {
#endif
2021-07-02 02:08:31 +00:00
if (algorithm == ONE_STAGE_COMP) {
return tsCompressFloatImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressFloatImp(input, nelements, buffer);
2021-07-21 06:51:20 +00:00
return tsCompressStringImp(buffer, len, output, outputSize);
2021-07-02 02:08:31 +00:00
} else {
assert(0);
return -1;
}
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
#endif
2020-04-30 07:07:51 +00:00
}
static FORCE_INLINE int tsDecompressFloat(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-02 02:08:31 +00:00
if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
// decompress lossy
return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
2020-04-30 07:07:51 +00:00
} else {
#endif
2021-07-02 02:08:31 +00:00
// decompress lossless
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressFloatImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
return tsDecompressFloatImp(buffer, nelements, output);
} else {
assert(0);
return -1;
}
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
#endif
2020-04-30 07:07:51 +00:00
}
2020-04-30 07:07:51 +00:00
static FORCE_INLINE int tsCompressDouble(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-07 12:39:49 +00:00
if(lossyDouble){
2021-07-02 02:08:31 +00:00
// lossy mode
return tsCompressDoubleLossyImp(input, nelements, output);
2020-04-30 07:07:51 +00:00
} else {
#endif
2021-07-02 02:08:31 +00:00
// lossless mode
if (algorithm == ONE_STAGE_COMP) {
return tsCompressDoubleImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressDoubleImp(input, nelements, buffer);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
return -1;
}
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
#endif
2020-04-30 07:07:51 +00:00
}
static FORCE_INLINE int tsDecompressDouble(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-07-02 02:08:31 +00:00
if(HEAD_ALGO(input[0]) == ALGO_SZ_LOSSY){
// decompress lossy
return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
2020-04-30 07:07:51 +00:00
} else {
#endif
2021-07-02 02:08:31 +00:00
// decompress lossless
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressDoubleImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
return tsDecompressDoubleImp(buffer, nelements, output);
} else {
assert(0);
return -1;
}
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2020-04-30 07:07:51 +00:00
}
#endif
2020-04-30 07:07:51 +00:00
}
2021-07-19 12:22:34 +00:00
#ifdef TD_TSZ
2021-06-28 09:51:57 +00:00
//
// lossy float double
//
2021-06-24 11:58:45 +00:00
static FORCE_INLINE int tsCompressFloatLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
2021-06-28 09:51:57 +00:00
return tsCompressFloatLossyImp(input, nelements, output);
}
2021-06-24 11:58:45 +00:00
static FORCE_INLINE int tsDecompressFloatLossy(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize){
2021-06-28 09:51:57 +00:00
return tsDecompressFloatLossyImp(input, compressedSize, nelements, output);
}
2021-06-24 11:58:45 +00:00
static FORCE_INLINE int tsCompressDoubleLossy(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize){
2021-06-28 09:51:57 +00:00
return tsCompressDoubleLossyImp(input, nelements, output);
}
2021-06-24 11:58:45 +00:00
static FORCE_INLINE int tsDecompressDoubleLossy(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize){
2021-06-28 09:51:57 +00:00
return tsDecompressDoubleLossyImp(input, compressedSize, nelements, output);
}
2021-06-24 11:58:45 +00:00
#endif
2021-06-24 11:58:45 +00:00
2020-04-30 07:07:51 +00:00
static FORCE_INLINE int tsCompressTimestamp(const char *const input, int inputSize, const int nelements, char *const output, int outputSize,
char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsCompressTimestampImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
int len = tsCompressTimestampImp(input, nelements, buffer);
return tsCompressStringImp(buffer, len, output, outputSize);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
static FORCE_INLINE int tsDecompressTimestamp(const char *const input, int compressedSize, const int nelements, char *const output,
int outputSize, char algorithm, char *const buffer, int bufferSize) {
if (algorithm == ONE_STAGE_COMP) {
return tsDecompressTimestampImp(input, nelements, output);
} else if (algorithm == TWO_STAGE_COMP) {
if (tsDecompressStringImp(input, compressedSize, buffer, bufferSize) < 0) return -1;
2020-04-30 07:07:51 +00:00
return tsDecompressTimestampImp(buffer, nelements, output);
} else {
assert(0);
2020-08-08 03:06:32 +00:00
return -1;
2020-04-30 07:07:51 +00:00
}
}
2019-07-11 08:36:16 +00:00
#ifdef __cplusplus
}
#endif
2021-10-04 12:42:53 +00:00
#endif /*_TD_UTIL_COMPRESSION_H*/