Merge pull request #28 from yuzupy/hash_estimate

Fix for hash partitioning estimate
2026-05-24 09:38:21 +00:00 · 2018-10-10 07:11:09 +02:00 · 2018-10-10 07:11:09 +02:00 · 5c5dac6dd4
commit 5c5dac6dd4
parent 33ea28b538 473749cc41
4 changed files with 76 additions and 51 deletions
--- a/expected/hypo_table.out
+++ b/expected/hypo_table.out
@ -327,6 +327,8 @@ SELECT * FROM hypopg_analyze('hypo_part_list',100);
 
 (1 row)

+SELECT * FROM hypopg_analyze('hypo_part_hash',100);
+ERROR:  hypopg: hypopg_analyze() on hypothetical hash partitioning is not supported
 SELECT * FROM hypopg_analyze('hypo_part_multi',100);
 hypopg_analyze 
 ----------------
@ -962,33 +964,33 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_list t1, hypo_part_list t2 WHERE t1.id_ke
 (12 rows)

 EXPLAIN (COSTS OFF) SELECT * FROM part_hash t1, hypo_part_hash t2 WHERE t1.id = t2.id;
-                       QUERY PLAN                        
---------------------------------------------------------
+                          QUERY PLAN                           
+---------------------------------------------------------------
 Hash Join
-   Hash Cond: (hypo_part_hash_0.id = t1.id)
+   Hash Cond: (t1.id = hypo_part_hash_0.id)
   ->  Append
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_0
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_1
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_2
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_3
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_4
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_5
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_6
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_7
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_8
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_9
+         ->  Seq Scan on part_hash_0 t1
+         ->  Seq Scan on part_hash_1 t1_1
+         ->  Seq Scan on part_hash_2 t1_2
+         ->  Seq Scan on part_hash_3 t1_3
+         ->  Seq Scan on part_hash_4 t1_4
+         ->  Seq Scan on part_hash_5 t1_5
+         ->  Seq Scan on part_hash_6 t1_6
+         ->  Seq Scan on part_hash_7 t1_7
+         ->  Seq Scan on part_hash_8 t1_8
+         ->  Seq Scan on part_hash_9 t1_9
   ->  Hash
         ->  Append
-               ->  Seq Scan on part_hash_0 t1
-               ->  Seq Scan on part_hash_1 t1_1
-               ->  Seq Scan on part_hash_2 t1_2
-               ->  Seq Scan on part_hash_3 t1_3
-               ->  Seq Scan on part_hash_4 t1_4
-               ->  Seq Scan on part_hash_5 t1_5
-               ->  Seq Scan on part_hash_6 t1_6
-               ->  Seq Scan on part_hash_7 t1_7
-               ->  Seq Scan on part_hash_8 t1_8
-               ->  Seq Scan on part_hash_9 t1_9
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_0
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_1
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_2
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_3
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_4
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_5
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_6
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_7
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_8
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_9
 (25 rows)

 EXPLAIN (COSTS OFF) SELECT * FROM part_multi t1, hypo_part_multi t2 WHERE t1.dpt = t2.dpt and t1.dpt = 2;
@ -1191,8 +1193,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_range t1, hypo_part_range t2 WHERE t1.id
 (13 rows)

 EXPLAIN (COSTS OFF) SELECT * FROM part_hash t1, hypo_part_hash t2 WHERE t1.id = t2.id;
-                       QUERY PLAN                        
---------------------------------------------------------
+                          QUERY PLAN                           
+---------------------------------------------------------------
 Append
   ->  Hash Join
         Hash Cond: (hypo_part_hash_0.id = t1.id)
@ -1200,35 +1202,35 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_hash t1, hypo_part_hash t2 WHERE t1.id =
         ->  Hash
               ->  Seq Scan on part_hash_0 t1
   ->  Hash Join
-         Hash Cond: (hypo_part_hash_1.id = t1_1.id)
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_1
+         Hash Cond: (t1_1.id = hypo_part_hash_1.id)
+         ->  Seq Scan on part_hash_1 t1_1
         ->  Hash
-               ->  Seq Scan on part_hash_1 t1_1
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_1
   ->  Hash Join
-         Hash Cond: (hypo_part_hash_2.id = t1_2.id)
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_2
+         Hash Cond: (t1_2.id = hypo_part_hash_2.id)
+         ->  Seq Scan on part_hash_2 t1_2
         ->  Hash
-               ->  Seq Scan on part_hash_2 t1_2
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_2
   ->  Hash Join
         Hash Cond: (hypo_part_hash_3.id = t1_3.id)
         ->  Seq Scan on hypo_part_hash hypo_part_hash_3
         ->  Hash
               ->  Seq Scan on part_hash_3 t1_3
   ->  Hash Join
-         Hash Cond: (hypo_part_hash_4.id = t1_4.id)
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_4
+         Hash Cond: (t1_4.id = hypo_part_hash_4.id)
+         ->  Seq Scan on part_hash_4 t1_4
         ->  Hash
-               ->  Seq Scan on part_hash_4 t1_4
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_4
   ->  Hash Join
         Hash Cond: (hypo_part_hash_5.id = t1_5.id)
         ->  Seq Scan on hypo_part_hash hypo_part_hash_5
         ->  Hash
               ->  Seq Scan on part_hash_5 t1_5
   ->  Hash Join
-         Hash Cond: (hypo_part_hash_6.id = t1_6.id)
-         ->  Seq Scan on hypo_part_hash hypo_part_hash_6
+         Hash Cond: (t1_6.id = hypo_part_hash_6.id)
+         ->  Seq Scan on part_hash_6 t1_6
         ->  Hash
-               ->  Seq Scan on part_hash_6 t1_6
+               ->  Seq Scan on hypo_part_hash hypo_part_hash_6
   ->  Hash Join
         Hash Cond: (hypo_part_hash_7.id = t1_7.id)
         ->  Seq Scan on hypo_part_hash hypo_part_hash_7
@ -1318,14 +1320,14 @@ EXPLAIN (COSTS OFF) SELECT * FROM hypo_part_range WHERE id = 42;
 -- =====================
 -- simple UPDATE and DELETE on hypothetically partitioned table
 EXPLAIN (COSTS OFF) UPDATE hypo_part_range set id = id;
-ERROR:  hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
+ERROR:  hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
 EXPLAIN DELETE FROM hypo_part_range WHERE id = 42;
-ERROR:  hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
+ERROR:  hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
 -- UPDATE and DELETE on hypothetically partitioned table inside CTE
 EXPLAIN (COSTS OFF) WITH s AS (UPDATE hypo_part_range set id = id returning *) SELECT 1;
-ERROR:  hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
+ERROR:  hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
 EXPLAIN (COSTS OFF) WITH s AS (DELETE FROM hypo_part_range WHERE id = 42 returning *) SELECT 1;
-ERROR:  hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
+ERROR:  hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
 -- UPDATE and DELETE involving hypothetically partitioned table, but on regular
 -- tables
 CREATE TABLE foo(id integer);
--- a/hypopg_analyze.c
+++ b/hypopg_analyze.c
@ -43,6 +43,7 @@
 #include "utils/guc.h"
 #include "utils/lsyscache.h"
 #if PG_VERSION_NUM >= 100000
+#include "utils/partcache.h"
 #include "utils/ruleutils.h"
 #endif
 #include "utils/selfuncs.h"
@ -568,6 +569,9 @@ HYPO_PARTITION_NOT_SUPPORTED();
 			|| fraction <= 0 || fraction > 100)
 		elog(ERROR, "hypopg: invalid fraction: %f", fraction);

+	if (root_entry->partkey->strategy == PARTITION_STRATEGY_HASH)
+		elog(ERROR, "hypopg: hypopg_analyze() on hypothetical hash partitioning is not supported");
+
 	/* Connect to SPI manager */
 	if ((ret = SPI_connect()) < 0)
 		/* internal error */
--- a/hypopg_table.c
+++ b/hypopg_table.c
@ -1996,22 +1996,25 @@ hypo_injectHypotheticalPartitioning(PlannerInfo *root,
 	}

 	/*
-	 * If this rel is partitioned by hash, we should rewrite the rel->pages
-	 * and the rel->pages here according to the number of partitions.
+	 * If this rel is a partition, we will estimate pages and tuples according
+	 * to its partition bound and root table's pages and tuples.
 	 *
-	 * If this rel is partitioned list or range, we add the partition constraints
-	 * to the rte->securityQuals so that the rel->rows is computed correctly at
-	 * the set_baserel_size_estimates(). We shouldn't rewrite the rel->pages
-	 * and the rel->tuples here, because they will be rewritten at the later hook.
+	 * In the case of RANGE/LIST partitioning, we will compute selectivity
+	 * according to the partition constraints including its ancestors'.
+	 * On the other hand, in the case of HASH partitioning, we will multiply
+	 * the number of partitions including its ancestors'.  After that we will
+	 * compute pages and tuples using the selectivity and the product of the
+	 * number of partitions.
 	 */
 	if (rel->reloptkind != RELOPT_BASEREL
 		&&HYPO_RTI_IS_TAGGED(rel->relid,root))
 	{
-		Oid partoid;
+		Oid partoid, parentid;
 		hypoTable *part;
 		RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
 		Selectivity selectivity;
 		double pages;
+		int nparts = 1;

 		Assert(rte->values_lists);
 		partoid = linitial_oid(rte->values_lists);
@ -2039,9 +2042,23 @@ hypo_injectHypotheticalPartitioning(PlannerInfo *root,
 							root)->values_lists), false)->tablename,
 				selectivity);

-		pages = ceil(rel->pages * selectivity);
+		/* retrieve the all its ancestors' nparts and multiply them */
+		parentid = part->parentid;
+		do
+		{
+			hypoTable *parent = hypo_find_table(parentid, false);
+			if (parent->partkey->strategy == PARTITION_STRATEGY_HASH)
+			{
+				PartitionDesc partdesc = hypo_generate_partitiondesc(parent);
+				nparts *= partdesc->nparts;
+			}
+			parentid = parent->parentid;
+		} while (OidIsValid(parentid));
+
+		/* compute pages and tuples using selectivity and nparts */
+		pages = ceil(rel->pages * selectivity / nparts);
 		rel->pages = (BlockNumber) pages;
-		rel->tuples = clamp_row_est(rel->tuples * selectivity);
+		rel->tuples = clamp_row_est(rel->tuples * selectivity / nparts);
 	}

 	/*
@ -2191,7 +2208,8 @@ hypo_get_qual_from_partbound(hypoTable *parent, PartitionBoundSpec *spec)

 	case PARTITION_STRATEGY_HASH:
 		Assert(spec->strategy == PARTITION_STRATEGY_HASH);
-		my_qual = hypo_get_qual_for_hash(parent, spec);
+		//my_qual = hypo_get_qual_for_hash(parent, spec);
+		/* Do not add the list */
 		break;

 	case PARTITION_STRATEGY_LIST:
--- a/test/sql/hypo_table.sql
+++ b/test/sql/hypo_table.sql
@ -117,6 +117,7 @@ SELECT tablename FROM hypopg_add_partition('hypo_part_multi_3_q4', 'PARTITION OF
 VACUUM ANALYZE;
 SELECT * FROM hypopg_analyze('hypo_part_range',100);
 SELECT * FROM hypopg_analyze('hypo_part_list',100);
+SELECT * FROM hypopg_analyze('hypo_part_hash',100);
 SELECT * FROM hypopg_analyze('hypo_part_multi',100);