From 01aa5accc84f70e58233100fffeaef2f505b7816 Mon Sep 17 00:00:00 2001 From: yuzupy Date: Tue, 20 Mar 2018 15:12:32 +0000 Subject: [PATCH] Add infrastructure to inject hypothetical partitioning --- hypopg.c | 49 +- hypopg_table.c | 1366 +++++++++++++++++++++++++++++++++++++++- include/hypopg_table.h | 7 + 3 files changed, 1410 insertions(+), 12 deletions(-) diff --git a/hypopg.c b/hypopg.c index a368c8b..a0894a6 100644 --- a/hypopg.c +++ b/hypopg.c @@ -20,6 +20,7 @@ #include "include/hypopg.h" #include "include/hypopg_import.h" #include "include/hypopg_index.h" +#include "include/hypopg_table.h" PG_MODULE_MAGIC; @@ -71,6 +72,12 @@ static void hypo_get_relation_info_hook(PlannerInfo *root, RelOptInfo *rel); static get_relation_info_hook_type prev_get_relation_info_hook = NULL; +static void hypo_set_rel_pathlist_hook(PlannerInfo *root, + RelOptInfo *rel, + Index rti, + RangeTblEntry *rte); +static set_rel_pathlist_hook_type prev_set_rel_pathlist_hook = NULL; + static bool hypo_query_walker(Node *node); void @@ -89,6 +96,9 @@ _PG_init(void) prev_explain_get_index_name_hook = explain_get_index_name_hook; explain_get_index_name_hook = hypo_explain_get_index_name_hook; + prev_set_rel_pathlist_hook = set_rel_pathlist_hook; + set_rel_pathlist_hook = hypo_set_rel_pathlist_hook; + isExplain = false; hypoIndexes = NIL; @@ -124,7 +134,7 @@ _PG_fini(void) ExecutorEnd_hook = prev_ExecutorEnd_hook; get_relation_info_hook = prev_get_relation_info_hook; explain_get_index_name_hook = prev_explain_get_index_name_hook; - + set_rel_pathlist_hook = prev_set_rel_pathlist_hook; } /*--------------------------------- @@ -187,12 +197,12 @@ hypo_utility_hook( { isExplain = query_or_expression_tree_walker( #if PG_VERSION_NUM >= 100000 - (Node *) pstmt, + (Node *) pstmt, #else - parsetree, + parsetree, #endif - hypo_query_walker, - NULL, 0); + hypo_query_walker, + NULL, 0); if (prev_utility_hook) prev_utility_hook( @@ -318,8 +328,8 @@ hypo_get_relation_info_hook(PlannerInfo *root, * hypothetical index found, add it to the relation's * indextlist */ - hypo_injectHypotheticalIndex(root, relationObjectId, - inhparent, rel, relation, entry); + hypo_injectHypotheticalIndex(root, relationObjectId, + inhparent, rel, relation, entry); } } } @@ -328,10 +338,35 @@ hypo_get_relation_info_hook(PlannerInfo *root, heap_close(relation, AccessShareLock); } + + hypo_createHypotheticalTable(root, relationObjectId, rel); + + if (prev_get_relation_info_hook) prev_get_relation_info_hook(root, relationObjectId, inhparent, rel); } +/* + * if this child relation is excluded by constraints, call set_dummy_rel_pathlist + */ +static void +hypo_set_rel_pathlist_hook(PlannerInfo *root, + RelOptInfo *rel, + Index rti, + RangeTblEntry *rte) +{ + if(isExplain && hypo_is_enabled) + { + + hypo_setHypotheticalDummyrel(root,rel,rti,rte); + + } + + if (prev_set_rel_pathlist_hook) + prev_set_rel_pathlist_hook(root, rel, rti, rte); +} + + /* * Reset statistics. */ diff --git a/hypopg_table.c b/hypopg_table.c index 1209120..021dd0b 100644 --- a/hypopg_table.c +++ b/hypopg_table.c @@ -13,8 +13,11 @@ *------------------------------------------------------------------------- */ +#include + #include "postgres.h" #include "fmgr.h" +//#include "port.h" #include "funcapi.h" #include "miscadmin.h" @@ -24,10 +27,18 @@ #include "access/htup_details.h" #include "access/nbtree.h" #include "catalog/namespace.h" +#include "catalog/pg_class.h" #include "catalog/pg_inherits_fn.h" #include "catalog/pg_opclass.h" #include "catalog/pg_type.h" #include "nodes/makefuncs.h" +#include "nodes/relation.h" +#include "nodes/nodes.h" +#include "optimizer/clauses.h" +#include "optimizer/cost.h" +#include "optimizer/predtest.h" +#include "optimizer/prep.h" +#include "parser/parsetree.h" #include "parser/parse_utilcmd.h" #include "rewrite/rewriteManip.h" #include "tcop/utility.h" @@ -81,6 +92,38 @@ static const hypoTable *hypo_table_store_parsetree(CreateStmt *node, const char *queryString, Oid parentid); static PartitionBoundSpec *hypo_transformPartitionBound(ParseState *pstate, hypoTable *parent, PartitionBoundSpec *spec); +static Oid get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, + bool *need_relabel); +static Expr *make_partition_op_expr(PartitionKey key, int keynum, + uint16 strategy, Expr *arg1, Expr *arg2); +static void hypo_partition_table(PlannerInfo *root, RelOptInfo *rel, + hypoTable *entry); +static List *hypo_get_partition_constraints(PlannerInfo *root, RelOptInfo *rel, + hypoTable *parent); +static List *hypo_get_qual_from_partbound(hypoTable *parent, PartitionBoundSpec *spec); +static List *hypo_get_qual_for_hash(hypoTable *parent, PartitionBoundSpec *spec); +static List *hypo_get_qual_for_list(hypoTable *parent, PartitionBoundSpec *spec); +static void get_range_key_properties(PartitionKey key, int keynum, + PartitionRangeDatum *ldatum, + PartitionRangeDatum *udatum, + ListCell **partexprs_item, + Expr **keyCol, + Const **lower_val, Const **upper_val); +static List *get_range_nulltest(PartitionKey key); +static List *hypo_get_qual_for_range(hypoTable *parent, PartitionBoundSpec *spec, + bool for_default); + + +static void make_inh_translation_list(Relation oldrelation, Relation newrelation, + Index newvarno, + List **translated_vars); + + +#define F_SATISFIES_HASH_PARTITION 5028 +#define rtiIsHandled(rti, root) (planner_rt_fetch(rti, root)->security_barrier) +#define handleRti(rti, root) (planner_rt_fetch(rti, root)->security_barrier = true) +/* imported from src/backend/catalog/partition.c */ +#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1) /* Add an hypoTable to hypoTables */ @@ -651,6 +694,7 @@ hypo_generate_partitiondesc(hypoTable *parent) elog(ERROR, "unexpected partition strategy: %d", (int) key->strategy); } + result->boundinfo = boundinfo; /* * Now assign OIDs from the original array into mapped indexes of the @@ -805,7 +849,7 @@ hypo_generate_partkey(CreateStmt *stmt, Oid parentid, hypoTable *entry) key->parttypid[i] = exprType(lfirst(partexprs_item)); key->parttypmod[i] = exprTypmod(lfirst(partexprs_item)); key->parttypcoll[i] = exprCollation(lfirst(partexprs_item)); - + partexprs_item = lnext(partexprs_item); } get_typlenbyvalalign(key->parttypid[i], @@ -879,10 +923,13 @@ hypo_generate_part_scheme(CreateStmt *stmt, PartitionKey partkey, Oid memcpy(part_scheme->partopcintype, partkey->partopcintype, sizeof(Oid) * partnatts); - part_scheme->parttypcoll = (Oid *) palloc(sizeof(Oid) * partnatts); - memcpy(part_scheme->parttypcoll, partkey->parttypcoll, + part_scheme->partcollation = (Oid *) palloc(sizeof(Oid) * partnatts); + memcpy(part_scheme->partcollation, partkey->partcollation, sizeof(Oid) * partnatts); - + //part_scheme->parttypcoll = (Oid *) palloc(sizeof(Oid) * partnatts); + //memcpy(part_scheme->parttypcoll, partkey->parttypcoll, + // sizeof(Oid) * partnatts); + part_scheme->parttyplen = (int16 *) palloc(sizeof(int16) * partnatts); memcpy(part_scheme->parttyplen, partkey->parttyplen, sizeof(int16) * partnatts); @@ -1332,7 +1379,8 @@ hypo_table_pfree(hypoTable *entry) { pfree(entry->part_scheme->partopfamily); pfree(entry->part_scheme->partopcintype); - pfree(entry->part_scheme->parttypcoll); + //pfree(entry->part_scheme->parttypcoll); + pfree(entry->part_scheme->partcollation); pfree(entry->part_scheme->parttyplen); pfree(entry->part_scheme); } @@ -1352,6 +1400,7 @@ hypo_table_pfree(hypoTable *entry) pfree(entry->partkey->parttypbyval); pfree(entry->partkey->parttypalign); pfree(entry->partkey->parttypcoll); + pfree(entry->partkey->partcollation); pfree(entry->partkey); } @@ -1696,6 +1745,1310 @@ hypo_transformPartitionBound(ParseState *pstate, hypoTable *parent, return result_spec; } + + +/* + * If this rel is the table we want to partition hypothetically, we inject + * the hypothetical partitioning to this rel + */ +void +hypo_createHypotheticalTable(PlannerInfo *root, + Oid relationObjectId, + RelOptInfo *rel) +{ + if (isExplain && hypo_is_enabled && + hypo_table_oid_is_hypothetical(relationObjectId)) + { + hypoTable *parent = hypo_find_table(relationObjectId); + + /* + * if this rel is parent, prepare some structures to inject + * hypothetical partitioning + */ + if(!rtiIsHandled(rel->relid,root)) + { + List *inhoids; + int nparts, i; + int oldsize = root->simple_rel_array_size; + RangeTblEntry *rte; + List *partitioned_child_rels = NIL; + ListCell *cell; + AppendRelInfo *appinfo; + PartitionedChildRelInfo *pcinfo; + + /* get all of the partition oids */ + inhoids = hypo_find_inheritance_children(parent); + nparts = list_length(inhoids); + + /* resize and clean rte and rel arrays */ + root->simple_rel_array_size = oldsize + nparts + 1; + root->simple_rel_array = (RelOptInfo **) + repalloc(root->simple_rel_array, + root->simple_rel_array_size * + sizeof(RelOptInfo *)); + root->simple_rte_array = (RangeTblEntry **) + repalloc(root->simple_rte_array, + root->simple_rel_array_size * + sizeof(RangeTblEntry *)); + + for (i=oldsize; isimple_rel_array_size; i++) + { + root->simple_rel_array[i] = NULL; + root->simple_rte_array[i] = NULL; + } + + /* rewrite and restore this rel's rte */ + rte = root->simple_rte_array[rel->relid]; + rte->relkind = RELKIND_PARTITIONED_TABLE; + rte->inh = true; + + root->simple_rte_array[rel->relid] = rte; + root->simple_rte_array[oldsize] = rte; + + partitioned_child_rels = lappend_int(partitioned_child_rels, + oldsize); + root->parse->rtable = lappend(root->parse->rtable, + root->simple_rte_array[oldsize]); + + handleRti(rel->relid, root); + handleRti(oldsize, root); + + + /* + * create RangeTblEntries and AppendRelInfos hypothetically + * for all hypothetical partitions + */ + i = 1; + foreach(cell, inhoids) + { + int newrelid; + Oid childOid = lfirst_oid(cell); + hypoTable *child; + RangeTblEntry *childrte; + Relation parentrel; + + child = hypo_find_table(childOid); + newrelid = oldsize + i; + + childrte = copyObject(rte); + childrte->rtekind = RTE_RELATION; + childrte->relid = relationObjectId; //originalOID; + childrte->relkind = RELKIND_RELATION; + childrte->inh = false; + if(!childrte->alias) + childrte->alias = makeNode(Alias); + childrte->alias->aliasname = child->tablename; + /* FIXME maybe use a mapping array here instead of rte->values_lists*/ + childrte->values_lists = list_make1_oid(child->oid); //partitionOID + root->simple_rte_array[newrelid] = childrte; + handleRti(newrelid, root); + + appinfo = makeNode(AppendRelInfo); + appinfo->parent_relid = rel->relid; + appinfo->child_relid = newrelid; + parentrel = heap_open(relationObjectId, NoLock); + appinfo->parent_reltype = parentrel->rd_rel->reltype; + appinfo->child_reltype = parentrel->rd_rel->reltype; + make_inh_translation_list(parentrel, parentrel, newrelid, + &appinfo->translated_vars); + heap_close(parentrel, NoLock); + appinfo->parent_reloid = relationObjectId; + root->append_rel_list = lappend(root->append_rel_list, + appinfo); + root->parse->rtable = lappend(root->parse->rtable, + root->simple_rte_array[newrelid]); + + i++; + } + + /* create pcinfo hypothetically for this rel */ + pcinfo = makeNode(PartitionedChildRelInfo); + pcinfo->parent_relid = oldsize; + pcinfo->child_rels = partitioned_child_rels; + root->pcinfo_list = lappend(root->pcinfo_list, pcinfo); + + /* add partition info to this rel */ + hypo_partition_table(root, rel, parent); + } + + + /* + * if this rel is partition, rewrite tuples and pages using selectivity + * which is computed according to its partition constraints + */ + if (rel->reloptkind != RELOPT_BASEREL + &&rtiIsHandled(rel->relid,root)) + { + List *constraints; + PlannerInfo *root_dummy; + Selectivity selectivity; + + /* get its partition constraints */ + constraints = hypo_get_partition_constraints(root, rel, parent); + + /* + * to compute selectivity, make dummy PlannerInfo and then rewrite + * tuples and pages using this selectivity + */ + root_dummy = makeNode(PlannerInfo); + root_dummy = root; + root_dummy->simple_rel_array[rel->relid] = rel; + + selectivity = clauselist_selectivity(root_dummy, + constraints, + 0, + JOIN_INNER, + NULL); + + rel->pages = rint(rel->pages * selectivity); + rel->tuples = clamp_row_est(rel->tuples * selectivity); + + } + } +} + +/* + * If this rel is need not be scanned, we have to mark it as dummy to omit it + * from the appendrel + * + * It is inspired on relation_excluded_by_constraints + */ +void hypo_setHypotheticalDummyrel(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte) +{ + if(hypo_table_oid_is_hypothetical(rte->relid) + && rte->relkind == 'r') + { + hypoTable *parent = hypo_find_table(rte->relid); + List *constraints; + List *safe_constraints = NIL; + ListCell *lc; + + /* get its partition constraints */ + constraints = hypo_get_partition_constraints(root, rel, parent); + + /* + * We do not currently enforce that CHECK constraints contain only + * immutable functions, so it's necessary to check here. We daren't draw + * conclusions from plan-time evaluation of non-immutable functions. Since + * they're ANDed, we can just ignore any mutable constraints in the list, + * and reason about the rest. + */ + foreach(lc, constraints) + { + Node *pred = (Node *) lfirst(lc); + + if (!contain_mutable_functions(pred)) + safe_constraints = lappend(safe_constraints, pred); + } + + /* if this partition need not be scanned, we call the set_dummy_rel_pathlist() + * to mark it as dummy */ + if (predicate_refuted_by(safe_constraints, rel->baserestrictinfo, false)) + set_dummy_rel_pathlist(rel); + + + /* + TODO: re-estimate parent size just like set_append_rel_size() + */ + + } +} + + +/* + * If this is the table we want to hypothetically partition, modifies its + * metadata to add partitioning information + */ +static void +hypo_partition_table(PlannerInfo *root, RelOptInfo *rel, hypoTable *entry) +{ + PartitionDesc partdesc; + PartitionKey partkey; + + partdesc = hypo_generate_partitiondesc(entry); + partkey = entry->partkey; + rel->part_scheme = entry->part_scheme; + rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey); + rel->nparts = partdesc->nparts; + hypo_generate_partition_key_exprs(entry, rel); +} + + +/* + * Returns a List of partition constraints from its partition bound spec + * + * It is inspired on get_relation_constraints() + */ +static List * +hypo_get_partition_constraints(PlannerInfo *root, RelOptInfo *rel, hypoTable *parent) +{ + ListCell *lc; + Oid childOid; + hypoTable *child; + PartitionBoundSpec *spec; + List *constraints; + + /* FIXME maybe use a mapping array here instead of rte->values_lists*/ + lc = list_head(planner_rt_fetch(rel->relid, root)->values_lists); + childOid = lfirst_oid(lc); + child = hypo_find_table(childOid); + spec = child->boundspec; + + /* get its partition constraints */ + constraints = hypo_get_qual_from_partbound(parent,spec); + + if (constraints) + { + /* + * Run each expression through const-simplification and + * canonicalization similar to check constraints. + */ + constraints = (List *) eval_const_expressions(root, (Node *) constraints); + /* FIXME this func will be modified at pg11 */ + constraints = (List *) canonicalize_qual((Expr *) constraints); + + /* Fix Vars to have the desired varno */ + if (rel->relid != 1) + ChangeVarNodes((Node *) constraints, 1, rel->relid, 0); + + } + return constraints; +} + + + +/* + * Return the list of executable expressions as partition constraint + * + * Heavily inspired on get_qual_from_partbound + */ +static List * +hypo_get_qual_from_partbound(hypoTable *parent, PartitionBoundSpec *spec) +{ + PartitionKey key = parent->partkey; + List *my_qual = NIL; + + Assert(key != NULL); + + switch (key->strategy) + { + + case PARTITION_STRATEGY_HASH: + Assert(spec->strategy == PARTITION_STRATEGY_HASH); + my_qual = hypo_get_qual_for_hash(parent, spec); + break; + + case PARTITION_STRATEGY_LIST: + Assert(spec->strategy == PARTITION_STRATEGY_LIST); + my_qual = hypo_get_qual_for_list(parent, spec); + break; + + case PARTITION_STRATEGY_RANGE: + Assert(spec->strategy == PARTITION_STRATEGY_RANGE); + my_qual = hypo_get_qual_for_range(parent, spec, false); + break; + + default: + elog(ERROR, "unexpected partition strategy: %d", + (int) key->strategy); + } + + return my_qual; +} + + +/* + * Imported from src/backend/catalog/partition.c + * + * get_partition_operator + * + * Return oid of the operator of given strategy for a given partition key + * column. + */ +static Oid +get_partition_operator(PartitionKey key, int col, StrategyNumber strategy, + bool *need_relabel) +{ + Oid operoid; + + /* + * First check if there exists an operator of the given strategy, with + * this column's type as both its lefttype and righttype, in the + * partitioning operator family specified for the column. + */ + operoid = get_opfamily_member(key->partopfamily[col], + key->parttypid[col], + key->parttypid[col], + strategy); + + /* + * If one doesn't exist, we must resort to using an operator in the same + * operator family but with the operator class declared input type. It is + * OK to do so, because the column's type is known to be binary-coercible + * with the operator class input type (otherwise, the operator class in + * question would not have been accepted as the partitioning operator + * class). We must however inform the caller to wrap the non-Const + * expression with a RelabelType node to denote the implicit coercion. It + * ensures that the resulting expression structurally matches similarly + * processed expressions within the optimizer. + */ + if (!OidIsValid(operoid)) + { + operoid = get_opfamily_member(key->partopfamily[col], + key->partopcintype[col], + key->partopcintype[col], + strategy); + if (!OidIsValid(operoid)) + elog(ERROR, "missing operator %d(%u,%u) in opfamily %u", + strategy, key->partopcintype[col], key->partopcintype[col], + key->partopfamily[col]); + *need_relabel = true; + } + else + *need_relabel = false; + + return operoid; +} + + +/* + * Imported from src/backend/catalog/partition.c + * + * make_partition_op_expr + * Returns an Expr for the given partition key column with arg1 and + * arg2 as its leftop and rightop, respectively + */ +static Expr * +make_partition_op_expr(PartitionKey key, int keynum, + uint16 strategy, Expr *arg1, Expr *arg2) +{ + Oid operoid; + bool need_relabel = false; + Expr *result = NULL; + + /* Get the correct btree operator for this partitioning column */ + operoid = get_partition_operator(key, keynum, strategy, &need_relabel); + + /* + * Chosen operator may be such that the non-Const operand needs to be + * coerced, so apply the same; see the comment in + * get_partition_operator(). + */ + if (!IsA(arg1, Const) && + (need_relabel || + key->partcollation[keynum] != key->parttypcoll[keynum])) + arg1 = (Expr *) makeRelabelType(arg1, + key->partopcintype[keynum], + -1, + key->partcollation[keynum], + COERCE_EXPLICIT_CAST); + + /* Generate the actual expression */ + switch (key->strategy) + { + case PARTITION_STRATEGY_LIST: + { + List *elems = (List *) arg2; + int nelems = list_length(elems); + + Assert(nelems >= 1); + Assert(keynum == 0); + + if (nelems > 1 && + !type_is_array(key->parttypid[keynum])) + { + ArrayExpr *arrexpr; + ScalarArrayOpExpr *saopexpr; + + /* Construct an ArrayExpr for the right-hand inputs */ + arrexpr = makeNode(ArrayExpr); + arrexpr->array_typeid = + get_array_type(key->parttypid[keynum]); + arrexpr->array_collid = key->parttypcoll[keynum]; + arrexpr->element_typeid = key->parttypid[keynum]; + arrexpr->elements = elems; + arrexpr->multidims = false; + arrexpr->location = -1; + + /* Build leftop = ANY (rightop) */ + saopexpr = makeNode(ScalarArrayOpExpr); + saopexpr->opno = operoid; + saopexpr->opfuncid = get_opcode(operoid); + saopexpr->useOr = true; + saopexpr->inputcollid = key->partcollation[keynum]; + saopexpr->args = list_make2(arg1, arrexpr); + saopexpr->location = -1; + + result = (Expr *) saopexpr; + } + else + { + List *elemops = NIL; + ListCell *lc; + + foreach (lc, elems) + { + Expr *elem = lfirst(lc), + *elemop; + + elemop = make_opclause(operoid, + BOOLOID, + false, + arg1, elem, + InvalidOid, + key->partcollation[keynum]); + elemops = lappend(elemops, elemop); + } + + result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops); + } + break; + } + + case PARTITION_STRATEGY_RANGE: + result = make_opclause(operoid, + BOOLOID, + false, + arg1, arg2, + InvalidOid, + key->partcollation[keynum]); + break; + + default: + elog(ERROR, "invalid partitioning strategy"); + break; + } + + return result; +} + + +/* + * Returns a CHECK constraint expression to use as a hash partition's + * constraint, given the parent entry and partition bound structure. + * + * Heavily inspired on get_qual_for_hash + */ +static List * +hypo_get_qual_for_hash(hypoTable *parent, PartitionBoundSpec *spec) +{ + PartitionKey key = parent->partkey; + FuncExpr *fexpr; + Node *relidConst; + Node *modulusConst; + Node *remainderConst; + List *args; + ListCell *partexprs_item; + int i; + + /* Fixed arguments. */ + relidConst = (Node *) makeConst(OIDOID, + -1, + InvalidOid, + sizeof(Oid), + ObjectIdGetDatum(parent->oid), //parentid? + false, + true); + + modulusConst = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(spec->modulus), + false, + true); + + remainderConst = (Node *) makeConst(INT4OID, + -1, + InvalidOid, + sizeof(int32), + Int32GetDatum(spec->remainder), + false, + true); + + args = list_make3(relidConst, modulusConst, remainderConst); + partexprs_item = list_head(key->partexprs); + + /* Add an argument for each key column. */ + for (i = 0; i < key->partnatts; i++) + { + Node *keyCol; + + /* Left operand */ + if (key->partattrs[i] != 0) + { + keyCol = (Node *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + keyCol = (Node *) copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + args = lappend(args, keyCol); + } + + fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION, + BOOLOID, + args, + InvalidOid, + InvalidOid, + COERCE_EXPLICIT_CALL); + + return list_make1(fexpr); +} + + + +/* + * Returns an implicit-AND list of expressions to use as a list partition's + * constraint, given the parent entry and partition bound structure. + * + * Heavily inspired on get_qual_for_list + */ +static List * +hypo_get_qual_for_list(hypoTable *parent, PartitionBoundSpec *spec) +{ + PartitionKey key = parent->partkey; + List *result; + Expr *keyCol; + Expr *opexpr; + NullTest *nulltest; + ListCell *cell; + List *elems = NIL; + bool list_has_null = false; + + /* + * Only single-column list partitioning is supported, so we are worried + * only about the partition key with index 0. + */ + Assert(key->partnatts == 1); + + /* Construct Var or expression representing the partition column */ + if (key->partattrs[0] != 0) + keyCol = (Expr *) makeVar(1, + key->partattrs[0], + key->parttypid[0], + key->parttypmod[0], + key->parttypcoll[0], + 0); + else + keyCol = (Expr *) copyObject(linitial(key->partexprs)); + + /* + * For default list partition, collect datums for all the partitions. The + * default partition constraint should check that the partition key is + * equal to none of those. + */ + if (spec->is_default) + { + int i; + int ndatums = 0; + PartitionDesc pdesc = hypo_generate_partitiondesc(parent); + PartitionBoundInfo boundinfo = pdesc->boundinfo; + + if (boundinfo) + { + ndatums = boundinfo->ndatums; + + if (partition_bound_accepts_nulls(boundinfo)) + list_has_null = true; + } + + /* + * If default is the only partition, there need not be any partition + * constraint on it. + */ + if (ndatums == 0 && !list_has_null) + return NIL; + + for (i = 0; i < ndatums; i++) + { + Const *val; + + /* + * Construct Const from known-not-null datum. We must be careful + * to copy the value, because our result has to be able to outlive + * the relcache entry we're copying from. + */ + val = makeConst(key->parttypid[0], + key->parttypmod[0], + key->parttypcoll[0], + key->parttyplen[0], + datumCopy(*boundinfo->datums[i], + key->parttypbyval[0], + key->parttyplen[0]), + false, /* isnull */ + key->parttypbyval[0]); + + elems = lappend(elems, val); + } + } + else + { + /* + * Create list of Consts for the allowed values, excluding any nulls. + */ + foreach(cell, spec->listdatums) + { + Const *val = castNode(Const, lfirst(cell)); + + if (val->constisnull) + list_has_null = true; + else + elems = lappend(elems, copyObject(val)); + } + } + + if (elems) + { + /* + * Generate the operator expression from the non-null partition + * values. + */ + opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber, + keyCol, (Expr *) elems); + } + else + { + /* + * If there are no partition values, we don't need an operator + * expression. + */ + opexpr = NULL; + } + + if (!list_has_null) + { + /* + * Gin up a "col IS NOT NULL" test that will be AND'd with the main + * expression. This might seem redundant, but the partition routing + * machinery needs it. + */ + nulltest = makeNode(NullTest); + nulltest->arg = keyCol; + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + + result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest); + } + else + { + /* + * Gin up a "col IS NULL" test that will be OR'd with the main + * expression. + */ + nulltest = makeNode(NullTest); + nulltest->arg = keyCol; + nulltest->nulltesttype = IS_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + + if (opexpr) + { + Expr *or; + + or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1); + result = list_make1(or); + } + else + result = list_make1(nulltest); + } + + /* + * Note that, in general, applying NOT to a constraint expression doesn't + * necessarily invert the set of rows it accepts, because NOT (NULL) is + * NULL. However, the partition constraints we construct here never + * evaluate to NULL, so applying NOT works as intended. + */ + if (spec->is_default) + { + result = list_make1(make_ands_explicit(result)); + result = list_make1(makeBoolExpr(NOT_EXPR, result, -1)); + } + + return result; +} + + +/* + * Imported from src/backend/catalog/partition.c + * + * get_range_key_properties + * Returns range partition key information for a given column + * + * This is a subroutine for get_qual_for_range, and its API is pretty + * specialized to that caller. + * + * Constructs an Expr for the key column (returned in *keyCol) and Consts + * for the lower and upper range limits (returned in *lower_val and + * *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of + * a Const. All of these structures are freshly palloc'd. + * + * *partexprs_item points to the cell containing the next expression in + * the key->partexprs list, or NULL. It may be advanced upon return. + */ +static void +get_range_key_properties(PartitionKey key, int keynum, + PartitionRangeDatum *ldatum, + PartitionRangeDatum *udatum, + ListCell **partexprs_item, + Expr **keyCol, + Const **lower_val, Const **upper_val) +{ + /* Get partition key expression for this column */ + if (key->partattrs[keynum] != 0) + { + *keyCol = (Expr *) makeVar(1, + key->partattrs[keynum], + key->parttypid[keynum], + key->parttypmod[keynum], + key->parttypcoll[keynum], + 0); + } + else + { + if (*partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + *keyCol = copyObject(lfirst(*partexprs_item)); + *partexprs_item = lnext(*partexprs_item); + } + + /* Get appropriate Const nodes for the bounds */ + if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE) + *lower_val = castNode(Const, copyObject(ldatum->value)); + else + *lower_val = NULL; + + if (udatum->kind == PARTITION_RANGE_DATUM_VALUE) + *upper_val = castNode(Const, copyObject(udatum->value)); + else + *upper_val = NULL; +} + + + /* + * Imported from src/backend/catalog/partition.c + * + * get_range_nulltest + * + * A non-default range partition table does not currently allow partition + * keys to be null, so emit an IS NOT NULL expression for each key column. + */ +static List * +get_range_nulltest(PartitionKey key) +{ + List *result = NIL; + NullTest *nulltest; + ListCell *partexprs_item; + int i; + + partexprs_item = list_head(key->partexprs); + for (i = 0; i < key->partnatts; i++) + { + Expr *keyCol; + + if (key->partattrs[i] != 0) + { + keyCol = (Expr *) makeVar(1, + key->partattrs[i], + key->parttypid[i], + key->parttypmod[i], + key->parttypcoll[i], + 0); + } + else + { + if (partexprs_item == NULL) + elog(ERROR, "wrong number of partition key expressions"); + keyCol = copyObject(lfirst(partexprs_item)); + partexprs_item = lnext(partexprs_item); + } + + nulltest = makeNode(NullTest); + nulltest->arg = keyCol; + nulltest->nulltesttype = IS_NOT_NULL; + nulltest->argisrow = false; + nulltest->location = -1; + result = lappend(result, nulltest); + } + + return result; +} + + +/* + * Returns an implicit-AND list of expressions to use as a range partition's + * constraint, given the parent entry and partition bound structure. + * + * Heavily inspired on get_qual_for_range + */ +static List * +hypo_get_qual_for_range(hypoTable *parent, PartitionBoundSpec *spec, bool for_default) +{ + + List *result = NIL; + ListCell *cell1, + *cell2, + *partexprs_item, + *partexprs_item_saved; + int i, + j; + PartitionRangeDatum *ldatum, + *udatum; + PartitionKey key = parent->partkey; + Expr *keyCol; + Const *lower_val, + *upper_val; + List *lower_or_arms, + *upper_or_arms; + int num_or_arms, + current_or_arm; + ListCell *lower_or_start_datum, + *upper_or_start_datum; + bool need_next_lower_arm, + need_next_upper_arm; + + if (spec->is_default) + { + List *or_expr_args = NIL; + PartitionDesc pdesc = hypo_generate_partitiondesc(parent); + Oid *inhoids = pdesc->oids; + int nparts = pdesc->nparts, + i; + + for (i = 0; i < nparts; i++) + { + Oid inhrelid = inhoids[i]; //is this a parent oid or dummy child oid? + HeapTuple tuple; + Datum datum; + bool isnull; + PartitionBoundSpec *bspec; + + elog(NOTICE,"inhrelid : %u",inhrelid); + + tuple = SearchSysCache1(RELOID, inhrelid); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "cache lookup failed for relation %u", inhrelid); + + datum = SysCacheGetAttr(RELOID, tuple, + Anum_pg_class_relpartbound, + &isnull); + + Assert(!isnull); + bspec = (PartitionBoundSpec *) + stringToNode(TextDatumGetCString(datum)); + if (!IsA(bspec, PartitionBoundSpec)) + elog(ERROR, "expected PartitionBoundSpec"); + + if (!bspec->is_default) + { + List *part_qual; + + part_qual = hypo_get_qual_for_range(parent, bspec, true); + + /* + * AND the constraints of the partition and add to + * or_expr_args + */ + or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1 + ? makeBoolExpr(AND_EXPR, part_qual, -1) + : linitial(part_qual)); + } + ReleaseSysCache(tuple); + } + + if (or_expr_args != NIL) + { + + Expr *other_parts_constr; + + /* + * Combine the constraints obtained for non-default partitions + * using OR. As requested, each of the OR's args doesn't include + * the NOT NULL test for partition keys (which is to avoid its + * useless repetition). Add the same now. + */ + other_parts_constr = + makeBoolExpr(AND_EXPR, + lappend(get_range_nulltest(key), + list_length(or_expr_args) > 1 + ? makeBoolExpr(OR_EXPR, or_expr_args, + -1) + : linitial(or_expr_args)), + -1); + + /* + * Finally, the default partition contains everything *NOT* + * contained in the non-default partitions. + */ + result = list_make1(makeBoolExpr(NOT_EXPR, + list_make1(other_parts_constr), -1)); + } + + return result; + } + + lower_or_start_datum = list_head(spec->lowerdatums); + upper_or_start_datum = list_head(spec->upperdatums); + num_or_arms = key->partnatts; + + /* + * If it is the recursive call for default, we skip the get_range_nulltest + * to avoid accumulating the NullTest on the same keys for each partition. + */ + if (!for_default) + result = get_range_nulltest(key); + + /* + * Iterate over the key columns and check if the corresponding lower and + * upper datums are equal using the btree equality operator for the + * column's type. If equal, we emit single keyCol = common_value + * expression. Starting from the first column for which the corresponding + * lower and upper bound datums are not equal, we generate OR expressions + * as shown in the function's header comment. + */ + i = 0; + partexprs_item = list_head(key->partexprs); + partexprs_item_saved = partexprs_item; /* placate compiler */ + forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums) + { + EState *estate; + MemoryContext oldcxt; + Expr *test_expr; + ExprState *test_exprstate; + Datum test_result; + bool isNull; + + ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); + udatum = castNode(PartitionRangeDatum, lfirst(cell2)); + + /* + * Since get_range_key_properties() modifies partexprs_item, and we + * might need to start over from the previous expression in the later + * part of this function, save away the current value. + */ + partexprs_item_saved = partexprs_item; + + get_range_key_properties(key, i, ldatum, udatum, + &partexprs_item, + &keyCol, + &lower_val, &upper_val); + + /* + * If either value is NULL, the corresponding partition bound is + * either MINVALUE or MAXVALUE, and we treat them as unequal, because + * even if they're the same, there is no common value to equate the + * key column with. + */ + if (!lower_val || !upper_val) + break; + + /* Create the test expression */ + estate = CreateExecutorState(); + oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); + test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber, + (Expr *) lower_val, + (Expr *) upper_val); + fix_opfuncids((Node *) test_expr); + test_exprstate = ExecInitExpr(test_expr, NULL); + test_result = ExecEvalExprSwitchContext(test_exprstate, + GetPerTupleExprContext(estate), + &isNull); + MemoryContextSwitchTo(oldcxt); + FreeExecutorState(estate); + + /* If not equal, go generate the OR expressions */ + if (!DatumGetBool(test_result)) + break; + + /* + * The bounds for the last key column can't be equal, because such a + * range partition would never be allowed to be defined (it would have + * an empty range otherwise). + */ + if (i == key->partnatts - 1) + elog(ERROR, "invalid range bound specification"); + + /* Equal, so generate keyCol = lower_val expression */ + result = lappend(result, + make_partition_op_expr(key, i, BTEqualStrategyNumber, + keyCol, (Expr *) lower_val)); + + i++; + } + + /* First pair of lower_val and upper_val that are not equal. */ + lower_or_start_datum = cell1; + upper_or_start_datum = cell2; + + /* OR will have as many arms as there are key columns left. */ + num_or_arms = key->partnatts - i; + current_or_arm = 0; + lower_or_arms = upper_or_arms = NIL; + need_next_lower_arm = need_next_upper_arm = true; + while (current_or_arm < num_or_arms) + { + List *lower_or_arm_args = NIL, + *upper_or_arm_args = NIL; + + /* Restart scan of columns from the i'th one */ + j = i; + partexprs_item = partexprs_item_saved; + + for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum) + { + PartitionRangeDatum *ldatum_next = NULL, + *udatum_next = NULL; + + ldatum = castNode(PartitionRangeDatum, lfirst(cell1)); + if (lnext(cell1)) + ldatum_next = castNode(PartitionRangeDatum, + lfirst(lnext(cell1))); + udatum = castNode(PartitionRangeDatum, lfirst(cell2)); + if (lnext(cell2)) + udatum_next = castNode(PartitionRangeDatum, + lfirst(lnext(cell2))); + get_range_key_properties(key, j, ldatum, udatum, + &partexprs_item, + &keyCol, + &lower_val, &upper_val); + + if (need_next_lower_arm && lower_val) + { + uint16 strategy; + + /* + * For the non-last columns of this arm, use the EQ operator. + * For the last column of this arm, use GT, unless this is the + * last column of the whole bound check, or the next bound + * datum is MINVALUE, in which case use GE. + */ + if (j - i < current_or_arm) + strategy = BTEqualStrategyNumber; + else if (j == key->partnatts - 1 || + (ldatum_next && + ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE)) + strategy = BTGreaterEqualStrategyNumber; + else + strategy = BTGreaterStrategyNumber; + + lower_or_arm_args = lappend(lower_or_arm_args, + make_partition_op_expr(key, j, + strategy, + keyCol, + (Expr *) lower_val)); + } + + if (need_next_upper_arm && upper_val) + { + uint16 strategy; + + /* + * For the non-last columns of this arm, use the EQ operator. + * For the last column of this arm, use LT, unless the next + * bound datum is MAXVALUE, in which case use LE. + */ + if (j - i < current_or_arm) + strategy = BTEqualStrategyNumber; + else if (udatum_next && + udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE) + strategy = BTLessEqualStrategyNumber; + else + strategy = BTLessStrategyNumber; + + upper_or_arm_args = lappend(upper_or_arm_args, + make_partition_op_expr(key, j, + strategy, + keyCol, + (Expr *) upper_val)); + } + + /* + * Did we generate enough of OR's arguments? First arm considers + * the first of the remaining columns, second arm considers first + * two of the remaining columns, and so on. + */ + ++j; + if (j - i > current_or_arm) + { + /* + * We must not emit any more arms if the new column that will + * be considered is unbounded, or this one was. + */ + if (!lower_val || !ldatum_next || + ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE) + need_next_lower_arm = false; + if (!upper_val || !udatum_next || + udatum_next->kind != PARTITION_RANGE_DATUM_VALUE) + need_next_upper_arm = false; + break; + } + } + + if (lower_or_arm_args != NIL) + lower_or_arms = lappend(lower_or_arms, + list_length(lower_or_arm_args) > 1 + ? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1) + : linitial(lower_or_arm_args)); + + if (upper_or_arm_args != NIL) + upper_or_arms = lappend(upper_or_arms, + list_length(upper_or_arm_args) > 1 + ? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1) + : linitial(upper_or_arm_args)); + + /* If no work to do in the next iteration, break away. */ + if (!need_next_lower_arm && !need_next_upper_arm) + break; + + ++current_or_arm; + } + + /* + * Generate the OR expressions for each of lower and upper bounds (if + * required), and append to the list of implicitly ANDed list of + * expressions. + */ + if (lower_or_arms != NIL) + result = lappend(result, + list_length(lower_or_arms) > 1 + ? makeBoolExpr(OR_EXPR, lower_or_arms, -1) + : linitial(lower_or_arms)); + if (upper_or_arms != NIL) + result = lappend(result, + list_length(upper_or_arms) > 1 + ? makeBoolExpr(OR_EXPR, upper_or_arms, -1) + : linitial(upper_or_arms)); + + /* + * As noted above, for non-default, we return list with constant TRUE. If + * the result is NIL during the recursive call for default, it implies + * this is the only other partition which can hold every value of the key + * except NULL. Hence we return the NullTest result skipped earlier. + */ + if (result == NIL) + result = for_default + ? get_range_nulltest(key) + : list_make1(makeBoolConst(true, false)); + + return result; +} + + + + +/* + * Imported from src/backend/optimizer/prep/prepunion.c + * + * make_inh_translation_list + * Build the list of translations from parent Vars to child Vars for + * an inheritance child. + * + * For paranoia's sake, we match type/collation as well as attribute name. + */ +static void +make_inh_translation_list(Relation oldrelation, Relation newrelation, + Index newvarno, + List **translated_vars) +{ + List *vars = NIL; + TupleDesc old_tupdesc = RelationGetDescr(oldrelation); + TupleDesc new_tupdesc = RelationGetDescr(newrelation); + int oldnatts = old_tupdesc->natts; + int newnatts = new_tupdesc->natts; + int old_attno; + + for (old_attno = 0; old_attno < oldnatts; old_attno++) + { + Form_pg_attribute att; + char *attname; + Oid atttypid; + int32 atttypmod; + Oid attcollation; + int new_attno; + + att = TupleDescAttr(old_tupdesc, old_attno); + if (att->attisdropped) + { + /* Just put NULL into this list entry */ + vars = lappend(vars, NULL); + continue; + } + attname = NameStr(att->attname); + atttypid = att->atttypid; + atttypmod = att->atttypmod; + attcollation = att->attcollation; + + /* + * When we are generating the "translation list" for the parent table + * of an inheritance set, no need to search for matches. + */ + if (oldrelation == newrelation) + { + vars = lappend(vars, makeVar(newvarno, + (AttrNumber) (old_attno + 1), + atttypid, + atttypmod, + attcollation, + 0)); + continue; + } + + /* + * Otherwise we have to search for the matching column by name. + * There's no guarantee it'll have the same column position, because + * of cases like ALTER TABLE ADD COLUMN and multiple inheritance. + * However, in simple cases it will be the same column number, so try + * that before we go groveling through all the columns. + * + * Note: the test for (att = ...) != NULL cannot fail, it's just a + * notational device to include the assignment into the if-clause. + */ + if (old_attno < newnatts && + (att = TupleDescAttr(new_tupdesc, old_attno)) != NULL && + !att->attisdropped && att->attinhcount != 0 && + strcmp(attname, NameStr(att->attname)) == 0) + new_attno = old_attno; + else + { + for (new_attno = 0; new_attno < newnatts; new_attno++) + { + att = TupleDescAttr(new_tupdesc, new_attno); + if (!att->attisdropped && att->attinhcount != 0 && + strcmp(attname, NameStr(att->attname)) == 0) + break; + } + if (new_attno >= newnatts) + elog(ERROR, "could not find inherited attribute \"%s\" of relation \"%s\"", + attname, RelationGetRelationName(newrelation)); + } + + /* Found it, check type and collation match */ + if (atttypid != att->atttypid || atttypmod != att->atttypmod) + elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's type", + attname, RelationGetRelationName(newrelation)); + if (attcollation != att->attcollation) + elog(ERROR, "attribute \"%s\" of relation \"%s\" does not match parent's collation", + attname, RelationGetRelationName(newrelation)); + + vars = lappend(vars, makeVar(newvarno, + (AttrNumber) (new_attno + 1), + atttypid, + atttypmod, + attcollation, + 0)); + } + + *translated_vars = vars; +} #endif /* @@ -1976,3 +3329,6 @@ HYPO_PARTITION_NOT_SUPPORTED(); return (Datum) 0; #endif } + + + diff --git a/include/hypopg_table.h b/include/hypopg_table.h index 50ef13c..1d7dff0 100644 --- a/include/hypopg_table.h +++ b/include/hypopg_table.h @@ -21,6 +21,7 @@ #define HYPO_TABLE_NB_COLS 5 /* # of column hypopg_table() returns */ #define HYPO_ADD_PART_COLS 2 /* # of column hypopg_add_partition() returns */ +#include "optimizer/paths.h" /*--- Structs --- */ @@ -66,4 +67,10 @@ Datum hypopg_drop_table(PG_FUNCTION_ARGS); Datum hypopg_partition_table(PG_FUNCTION_ARGS); Datum hypopg_reset_table(PG_FUNCTION_ARGS); +void hypo_createHypotheticalTable(PlannerInfo *root, + Oid relationObjectId, + RelOptInfo *rel); +void hypo_setHypotheticalDummyrel(PlannerInfo *root, RelOptInfo *rel, + Index rti, RangeTblEntry *rte); + #endif