Merge pull request #18 from yuzupy/size_estimation

Size estimation
This commit is contained in:
Julien Rouhaud 2018-04-19 08:19:11 +02:00 committed by GitHub
commit e1007c32ee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 195 additions and 78 deletions

View file

@ -356,16 +356,16 @@ hypo_get_relation_info_hook(PlannerInfo *root,
*/
static void
hypo_set_rel_pathlist_hook(PlannerInfo *root,
RelOptInfo *rel,
Index rti,
RangeTblEntry *rte)
RelOptInfo *rel,
Index rti,
RangeTblEntry *rte)
{
if(HYPO_ENABLED() && hypo_table_oid_is_hypothetical(rte->relid) &&
rte->relkind == 'r')
hypo_markDummyIfExcluded(root,rel,rti,rte);
if(HYPO_ENABLED() && hypo_table_oid_is_hypothetical(rte->relid)
&& rte->relkind == 'r')
hypo_setPartitionPathlist(root,rel,rti,rte);
if (prev_set_rel_pathlist_hook)
prev_set_rel_pathlist_hook(root, rel, rti, rte);
prev_set_rel_pathlist_hook(root, rel, rti, rte);
}

View file

@ -51,7 +51,7 @@
#include "include/hypopg_import.h"
int max_parallel_workers_per_gather = 2;
@ -1565,4 +1565,60 @@ make_inh_translation_list(Relation oldrelation, Relation newrelation,
*translated_vars = vars;
}
/*
* Copied from src/backend/optimizer/path/allpaths.c, not exported
*
* set_plain_rel_pathlist
* Build access paths for a plain relation (no subquery, no inheritance)
*/
void
set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Relids required_outer;
/*
* We don't support pushing join clauses into the quals of a seqscan, but
* it could still have required parameterization due to LATERAL refs in
* its tlist.
*/
required_outer = rel->lateral_relids;
/* Consider sequential scan */
add_path(rel, create_seqscan_path(root, rel, required_outer, 0));
/* If appropriate, consider parallel sequential scan */
if (rel->consider_parallel && required_outer == NULL)
create_plain_partial_paths(root, rel);
/* Consider index scans */
create_index_paths(root, rel);
/* Consider TID scans */
create_tidscan_paths(root, rel);
}
/*
* Copied from src/backend/optimizer/path/allpaths.c, not exported
*
* create_plain_partial_paths
* Build partial access paths for parallel scan of a plain relation
*/
void
create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel)
{
int parallel_workers;
parallel_workers = compute_parallel_worker(rel, rel->pages, -1,
max_parallel_workers_per_gather);
/* If any limit was set to zero, the user doesn't want a parallel scan. */
if (parallel_workers <= 0)
return;
/* Add an unordered partial path based on a parallel sequential scan. */
add_partial_path(rel, create_seqscan_path(root, rel, NULL, parallel_workers));
}
#endif

View file

@ -35,8 +35,11 @@
#include "nodes/nodes.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/predtest.h"
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/var.h"
#include "parser/parsetree.h"
#include "parser/parse_utilcmd.h"
#include "rewrite/rewriteManip.h"
@ -1848,99 +1851,153 @@ hypo_injectHypotheticalPartitioning(PlannerInfo *root,
pcinfo->parent_relid = oldsize;
pcinfo->child_rels = partitioned_child_rels;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
/* add partition info to this rel */
hypo_partition_table(root, rel, parent);
}
/*
* if this rel is partition, rewrite tuples and pages using selectivity
* which is computed according to its partition constraints
/*
* If this rel is partition, we add the partition constraints to the
* rte->securityQuals so that the relation which is need not be scanned
* is marked as Dummy at the set_append_rel_size() and the rel->rows is
* computed correctly at the set_baserel_size_estimates(). We shouldn't
* rewrite the rel->pages and the rel->tuples here, because they will be
* rewritten at the later hook.
*
* TODO: should comfirm that the tuples will not referred till the
* set_baserel_size_esimates() and think about rel->reltarget->width
*
*/
if (rel->reloptkind != RELOPT_BASEREL
&&HYPO_RTI_IS_TAGGED(rel->relid,root))
{
&&HYPO_RTI_IS_TAGGED(rel->relid,root))
{
List *constraints;
PlannerInfo *root_dummy;
Selectivity selectivity;
/* get its partition constraints */
constraints = hypo_get_partition_constraints(root, rel, parent);
/*
* to compute selectivity, make dummy PlannerInfo and then rewrite
* tuples and pages using this selectivity
* to compute rel->rows at set_baserel_size_estimates using parent's
* statistics, parent's tuples and baserestrictinfo, we add the partition
* constraints to its rte->securityQuals
*/
root_dummy = makeNode(PlannerInfo);
root_dummy = root;
root_dummy->simple_rel_array[rel->relid] = rel;
selectivity = clauselist_selectivity(root_dummy,
constraints,
0,
JOIN_INNER,
NULL);
rel->pages = rint(rel->pages * selectivity);
rel->tuples = clamp_row_est(rel->tuples * selectivity);
}
planner_rt_fetch(rel->relid, root)->securityQuals = list_make1(constraints);
}
}
/*
* If this rel is need not be scanned, we have to mark it as dummy to omit it
* from the appendrel
* If this rel is partition, we remove the partition constraints from the
* its rel->baserestrictinfo and rewrite some items of its RelOptInfo:
* the rel->pages, the rel->tuples rel->baserestrictcost. After that
* we call the set_plain_rel_pathlist() to re-create its pathlist using
* the new RelOptInfo.
*
* It is inspired on relation_excluded_by_constraints
*/
void hypo_markDummyIfExcluded(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
void hypo_setPartitionPathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
hypoTable *parent;
List *constraints;
List *safe_constraints = NIL;
ListCell *lc;
ListCell *l;
Index parentRTindex;
RelOptInfo *parentrel;
hypoTable *parent = hypo_find_table(rte->relid);
List *constraints = hypo_get_partition_constraints(root, rel, parent);
PlannerInfo *root_dummy;
Selectivity selectivity;
double pages;
Assert(HYPO_ENABLED());
Assert(hypo_table_oid_is_hypothetical(rte->relid));
Assert(rte->relkind == 'r');
parent = hypo_find_table(rte->relid);
/* get its partition constraints */
constraints = hypo_get_partition_constraints(root, rel, parent);
/*
* We do not currently enforce that CHECK constraints contain only
* immutable functions, so it's necessary to check here. We daren't draw
* conclusions from plan-time evaluation of non-immutable functions. Since
* they're ANDed, we can just ignore any mutable constraints in the list,
* and reason about the rest.
/*
* get the parent's rel and copy its rel->baserestrictinfo to
* the own rel->baserestrictinfo.
* this part is inspired on set_append_rel_size().
*/
foreach(lc, constraints)
foreach(l, root->append_rel_list)
{
Node *pred = (Node *) lfirst(lc);
if (!contain_mutable_functions(pred))
safe_constraints = lappend(safe_constraints, pred);
}
/* if this partition need not be scanned, we call the set_dummy_rel_pathlist()
* to mark it as dummy */
if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo, false))
set_dummy_rel_pathlist(rel);
/*
TODO: re-estimate parent size just like set_append_rel_size()
*/
AppendRelInfo *appinfo = (AppendRelInfo *)lfirst(l);
List *childquals = NIL;
Index cq_min_security = UINT_MAX;
ListCell *lc;
if(appinfo->child_relid == rti)
{
parentRTindex = appinfo->parent_relid;
parentrel = root->simple_rel_array[parentRTindex];
foreach(lc, parentrel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
Node *childqual;
ListCell *lc2;
Assert(IsA(rinfo, RestrictInfo));
childqual = adjust_appendrel_attrs(root,
(Node *) rinfo->clause,
1, &appinfo);
childqual = eval_const_expressions(root, childqual);
/* might have gotten an AND clause, if so flatten it */
foreach(lc2, make_ands_implicit((Expr *) childqual))
{
Node *onecq = (Node *) lfirst(lc2);
bool pseudoconstant;
/* check for pseudoconstant (no Vars or volatile functions) */
pseudoconstant =
!contain_vars_of_level(onecq, 0) &&
!contain_volatile_functions(onecq);
if (pseudoconstant)
{
/* tell createplan.c to check for gating quals */
root->hasPseudoConstantQuals = true;
}
/* reconstitute RestrictInfo with appropriate properties */
childquals = lappend(childquals,
make_restrictinfo((Expr *) onecq,
rinfo->is_pushed_down,
rinfo->outerjoin_delayed,
pseudoconstant,
rinfo->security_level,
NULL, NULL, NULL));
/* track minimum security level among child quals */
cq_min_security = Min(cq_min_security, rinfo->security_level);
}
}
rel->baserestrictinfo = childquals;
rel->baserestrict_min_security = cq_min_security;
break;
}
}
/*
* make dummy PlannerInfo to compute the selectivity, and then rewrite
* tuples and pages using this selectivity
*/
root_dummy = makeNode(PlannerInfo);
root_dummy = root;
root_dummy->simple_rel_array[rti] = rel;
selectivity = clauselist_selectivity(root_dummy,
constraints,
0,
JOIN_INNER,
NULL);
pages = ceil(rel->pages * selectivity);
rel->pages = (BlockNumber)pages;
rel->tuples = clamp_row_est(rel->tuples * selectivity);
/* recompute the rel->baserestrictcost*/
cost_qual_eval(&rel->baserestrictcost, rel->baserestrictinfo, root);
/*
* call the set_plain_rel_pathlist() to re-create its pathlist using
* the new RelOptInfo
*/
set_plain_rel_pathlist(root, rel, rte);
}
/*
* If this is the table we want to hypothetically partition, modifies its
* metadata to add partitioning information

View file

@ -14,6 +14,7 @@
#include "nodes/pg_list.h"
#include "optimizer/planner.h"
#include "optimizer/pathnode.h"
#include "utils/rel.h"
@ -132,7 +133,8 @@ List *get_range_nulltest(PartitionKey key);
void make_inh_translation_list(Relation oldrelation, Relation newrelation,
Index newvarno,
List **translated_vars);
void set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte);
void create_plain_partial_paths(PlannerInfo *root, RelOptInfo *rel);
/* Copied from src/backend/catalog/partition.c, not exported */
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)

View file

@ -73,5 +73,7 @@ void hypo_injectHypotheticalPartitioning(PlannerInfo *root,
RelOptInfo *rel);
void hypo_markDummyIfExcluded(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
void hypo_setPartitionPathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte);
#endif