From 1627c6baa603e2d44b0cb62562bf655ab974ec52 Mon Sep 17 00:00:00 2001 From: Lukas Fittl Date: Fri, 22 Apr 2022 17:36:56 -0700 Subject: [PATCH] hypo_estimate_index: Fix size estimation of IndexTupleData/ItemIdData The previous code assumed that both IndexTupleData and ItemIdData exist once per index key (by multiplying the struct size with entry->ncolumns), when in fact these structs only exist once per index tuple. This would have resulted in assuming a larger than actual average index tuple, and ultimately a larger hypothetical index size. In passing, rewrite an adjacent comment that explains this calculation for easier readability, and note that Postgres 13 deduplication is not yet considered. --- hypopg_index.c | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/hypopg_index.c b/hypopg_index.c index bf7ca38..ec5b387 100644 --- a/hypopg_index.c +++ b/hypopg_index.c @@ -1760,23 +1760,28 @@ hypo_estimate_index(hypoIndex * entry, RelOptInfo *rel) if (entry->relam == BTREE_AM_OID) { /* ------------------------------- - * quick estimating of index size: + * quick estimating of index size. Each B-tree index tuple contains: * - * sizeof(PageHeader) : 24 (1 per page) - * sizeof(BTPageOpaqueData): 16 (1 per page) - * sizeof(IndexTupleData): 8 (1 per tuple, referencing heap) - * sizeof(ItemIdData): 4 (1 per tuple, storing the index item) - * default fillfactor: 90% - * no NULL handling + * - sizeof(ItemIdData): 4 (after page header, references index tuple) + * - sizeof(IndexTupleData): 8 (index tuple header, includes heap TID) + * - actual data size based on each key's average attribute width + * + * For this estimation it is assumed all values are not NULL. + * + * Additionally, the following data is present once in each page: + * + * - sizeof(PageHeader) : 24 + * - sizeof(BTPageOpaqueData): 16 + * + * for calculating fill of index pages this uses: + * + * fillfactor parameter, or default fillfactor (90%) * fixed additional bloat: 20% * - * I'll also need to read more carefully nbtree code to check if - * this is accurate enough. - * + * Note that this does not yet consider B-tree deduplication added in + * Postgres 13. */ - line_size = ind_avg_width + - +(sizeof(IndexTupleData) * entry->ncolumns) - + MAXALIGN(sizeof(ItemIdData) * entry->ncolumns); + line_size = ind_avg_width + MAXALIGN(sizeof(IndexTupleData)) + sizeof(ItemIdData); usable_page_size = BLCKSZ - SizeOfPageHeaderData - sizeof(BTPageOpaqueData); bloat_factor = (200.0