Merge pull request #28 from yuzupy/hash_estimate

Fix for hash partitioning estimate
This commit is contained in:
Julien Rouhaud 2018-10-10 07:11:09 +02:00 committed by GitHub
commit 5c5dac6dd4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 76 additions and 51 deletions

View file

@ -327,6 +327,8 @@ SELECT * FROM hypopg_analyze('hypo_part_list',100);
(1 row)
SELECT * FROM hypopg_analyze('hypo_part_hash',100);
ERROR: hypopg: hypopg_analyze() on hypothetical hash partitioning is not supported
SELECT * FROM hypopg_analyze('hypo_part_multi',100);
hypopg_analyze
----------------
@ -962,33 +964,33 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_list t1, hypo_part_list t2 WHERE t1.id_ke
(12 rows)
EXPLAIN (COSTS OFF) SELECT * FROM part_hash t1, hypo_part_hash t2 WHERE t1.id = t2.id;
QUERY PLAN
---------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------
Hash Join
Hash Cond: (hypo_part_hash_0.id = t1.id)
Hash Cond: (t1.id = hypo_part_hash_0.id)
-> Append
-> Seq Scan on hypo_part_hash hypo_part_hash_0
-> Seq Scan on hypo_part_hash hypo_part_hash_1
-> Seq Scan on hypo_part_hash hypo_part_hash_2
-> Seq Scan on hypo_part_hash hypo_part_hash_3
-> Seq Scan on hypo_part_hash hypo_part_hash_4
-> Seq Scan on hypo_part_hash hypo_part_hash_5
-> Seq Scan on hypo_part_hash hypo_part_hash_6
-> Seq Scan on hypo_part_hash hypo_part_hash_7
-> Seq Scan on hypo_part_hash hypo_part_hash_8
-> Seq Scan on hypo_part_hash hypo_part_hash_9
-> Seq Scan on part_hash_0 t1
-> Seq Scan on part_hash_1 t1_1
-> Seq Scan on part_hash_2 t1_2
-> Seq Scan on part_hash_3 t1_3
-> Seq Scan on part_hash_4 t1_4
-> Seq Scan on part_hash_5 t1_5
-> Seq Scan on part_hash_6 t1_6
-> Seq Scan on part_hash_7 t1_7
-> Seq Scan on part_hash_8 t1_8
-> Seq Scan on part_hash_9 t1_9
-> Hash
-> Append
-> Seq Scan on part_hash_0 t1
-> Seq Scan on part_hash_1 t1_1
-> Seq Scan on part_hash_2 t1_2
-> Seq Scan on part_hash_3 t1_3
-> Seq Scan on part_hash_4 t1_4
-> Seq Scan on part_hash_5 t1_5
-> Seq Scan on part_hash_6 t1_6
-> Seq Scan on part_hash_7 t1_7
-> Seq Scan on part_hash_8 t1_8
-> Seq Scan on part_hash_9 t1_9
-> Seq Scan on hypo_part_hash hypo_part_hash_0
-> Seq Scan on hypo_part_hash hypo_part_hash_1
-> Seq Scan on hypo_part_hash hypo_part_hash_2
-> Seq Scan on hypo_part_hash hypo_part_hash_3
-> Seq Scan on hypo_part_hash hypo_part_hash_4
-> Seq Scan on hypo_part_hash hypo_part_hash_5
-> Seq Scan on hypo_part_hash hypo_part_hash_6
-> Seq Scan on hypo_part_hash hypo_part_hash_7
-> Seq Scan on hypo_part_hash hypo_part_hash_8
-> Seq Scan on hypo_part_hash hypo_part_hash_9
(25 rows)
EXPLAIN (COSTS OFF) SELECT * FROM part_multi t1, hypo_part_multi t2 WHERE t1.dpt = t2.dpt and t1.dpt = 2;
@ -1191,8 +1193,8 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_range t1, hypo_part_range t2 WHERE t1.id
(13 rows)
EXPLAIN (COSTS OFF) SELECT * FROM part_hash t1, hypo_part_hash t2 WHERE t1.id = t2.id;
QUERY PLAN
---------------------------------------------------------
QUERY PLAN
---------------------------------------------------------------
Append
-> Hash Join
Hash Cond: (hypo_part_hash_0.id = t1.id)
@ -1200,35 +1202,35 @@ EXPLAIN (COSTS OFF) SELECT * FROM part_hash t1, hypo_part_hash t2 WHERE t1.id =
-> Hash
-> Seq Scan on part_hash_0 t1
-> Hash Join
Hash Cond: (hypo_part_hash_1.id = t1_1.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_1
Hash Cond: (t1_1.id = hypo_part_hash_1.id)
-> Seq Scan on part_hash_1 t1_1
-> Hash
-> Seq Scan on part_hash_1 t1_1
-> Seq Scan on hypo_part_hash hypo_part_hash_1
-> Hash Join
Hash Cond: (hypo_part_hash_2.id = t1_2.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_2
Hash Cond: (t1_2.id = hypo_part_hash_2.id)
-> Seq Scan on part_hash_2 t1_2
-> Hash
-> Seq Scan on part_hash_2 t1_2
-> Seq Scan on hypo_part_hash hypo_part_hash_2
-> Hash Join
Hash Cond: (hypo_part_hash_3.id = t1_3.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_3
-> Hash
-> Seq Scan on part_hash_3 t1_3
-> Hash Join
Hash Cond: (hypo_part_hash_4.id = t1_4.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_4
Hash Cond: (t1_4.id = hypo_part_hash_4.id)
-> Seq Scan on part_hash_4 t1_4
-> Hash
-> Seq Scan on part_hash_4 t1_4
-> Seq Scan on hypo_part_hash hypo_part_hash_4
-> Hash Join
Hash Cond: (hypo_part_hash_5.id = t1_5.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_5
-> Hash
-> Seq Scan on part_hash_5 t1_5
-> Hash Join
Hash Cond: (hypo_part_hash_6.id = t1_6.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_6
Hash Cond: (t1_6.id = hypo_part_hash_6.id)
-> Seq Scan on part_hash_6 t1_6
-> Hash
-> Seq Scan on part_hash_6 t1_6
-> Seq Scan on hypo_part_hash hypo_part_hash_6
-> Hash Join
Hash Cond: (hypo_part_hash_7.id = t1_7.id)
-> Seq Scan on hypo_part_hash hypo_part_hash_7
@ -1318,14 +1320,14 @@ EXPLAIN (COSTS OFF) SELECT * FROM hypo_part_range WHERE id = 42;
-- =====================
-- simple UPDATE and DELETE on hypothetically partitioned table
EXPLAIN (COSTS OFF) UPDATE hypo_part_range set id = id;
ERROR: hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
ERROR: hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
EXPLAIN DELETE FROM hypo_part_range WHERE id = 42;
ERROR: hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
ERROR: hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
-- UPDATE and DELETE on hypothetically partitioned table inside CTE
EXPLAIN (COSTS OFF) WITH s AS (UPDATE hypo_part_range set id = id returning *) SELECT 1;
ERROR: hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
ERROR: hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
EXPLAIN (COSTS OFF) WITH s AS (DELETE FROM hypo_part_range WHERE id = 42 returning *) SELECT 1;
ERROR: hypopg: UPDATE and DELETE on hypothetical partitioned tables are not supported
ERROR: hypopg: UPDATE and DELETE on hypothetically partitioned tables are not supported
-- UPDATE and DELETE involving hypothetically partitioned table, but on regular
-- tables
CREATE TABLE foo(id integer);

View file

@ -43,6 +43,7 @@
#include "utils/guc.h"
#include "utils/lsyscache.h"
#if PG_VERSION_NUM >= 100000
#include "utils/partcache.h"
#include "utils/ruleutils.h"
#endif
#include "utils/selfuncs.h"
@ -568,6 +569,9 @@ HYPO_PARTITION_NOT_SUPPORTED();
|| fraction <= 0 || fraction > 100)
elog(ERROR, "hypopg: invalid fraction: %f", fraction);
if (root_entry->partkey->strategy == PARTITION_STRATEGY_HASH)
elog(ERROR, "hypopg: hypopg_analyze() on hypothetical hash partitioning is not supported");
/* Connect to SPI manager */
if ((ret = SPI_connect()) < 0)
/* internal error */

View file

@ -1996,22 +1996,25 @@ hypo_injectHypotheticalPartitioning(PlannerInfo *root,
}
/*
* If this rel is partitioned by hash, we should rewrite the rel->pages
* and the rel->pages here according to the number of partitions.
* If this rel is a partition, we will estimate pages and tuples according
* to its partition bound and root table's pages and tuples.
*
* If this rel is partitioned list or range, we add the partition constraints
* to the rte->securityQuals so that the rel->rows is computed correctly at
* the set_baserel_size_estimates(). We shouldn't rewrite the rel->pages
* and the rel->tuples here, because they will be rewritten at the later hook.
* In the case of RANGE/LIST partitioning, we will compute selectivity
* according to the partition constraints including its ancestors'.
* On the other hand, in the case of HASH partitioning, we will multiply
* the number of partitions including its ancestors'. After that we will
* compute pages and tuples using the selectivity and the product of the
* number of partitions.
*/
if (rel->reloptkind != RELOPT_BASEREL
&&HYPO_RTI_IS_TAGGED(rel->relid,root))
{
Oid partoid;
Oid partoid, parentid;
hypoTable *part;
RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
Selectivity selectivity;
double pages;
int nparts = 1;
Assert(rte->values_lists);
partoid = linitial_oid(rte->values_lists);
@ -2039,9 +2042,23 @@ hypo_injectHypotheticalPartitioning(PlannerInfo *root,
root)->values_lists), false)->tablename,
selectivity);
pages = ceil(rel->pages * selectivity);
/* retrieve the all its ancestors' nparts and multiply them */
parentid = part->parentid;
do
{
hypoTable *parent = hypo_find_table(parentid, false);
if (parent->partkey->strategy == PARTITION_STRATEGY_HASH)
{
PartitionDesc partdesc = hypo_generate_partitiondesc(parent);
nparts *= partdesc->nparts;
}
parentid = parent->parentid;
} while (OidIsValid(parentid));
/* compute pages and tuples using selectivity and nparts */
pages = ceil(rel->pages * selectivity / nparts);
rel->pages = (BlockNumber) pages;
rel->tuples = clamp_row_est(rel->tuples * selectivity);
rel->tuples = clamp_row_est(rel->tuples * selectivity / nparts);
}
/*
@ -2191,7 +2208,8 @@ hypo_get_qual_from_partbound(hypoTable *parent, PartitionBoundSpec *spec)
case PARTITION_STRATEGY_HASH:
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
my_qual = hypo_get_qual_for_hash(parent, spec);
//my_qual = hypo_get_qual_for_hash(parent, spec);
/* Do not add the list */
break;
case PARTITION_STRATEGY_LIST:

View file

@ -117,6 +117,7 @@ SELECT tablename FROM hypopg_add_partition('hypo_part_multi_3_q4', 'PARTITION OF
VACUUM ANALYZE;
SELECT * FROM hypopg_analyze('hypo_part_range',100);
SELECT * FROM hypopg_analyze('hypo_part_list',100);
SELECT * FROM hypopg_analyze('hypo_part_hash',100);
SELECT * FROM hypopg_analyze('hypo_part_multi',100);