diff --git a/expected/hypo_table.out b/expected/hypo_table.out index 0bbcc05..ad08b71 100644 --- a/expected/hypo_table.out +++ b/expected/hypo_table.out @@ -1263,3 +1263,54 @@ ERROR: hypopg: partition "hypo_part_hash_0_dup" would overlap partition "hypo_p -- Overlapping range bounds, subpartition SELECT tablename FROM hypopg_add_partition('hypo_part_multi_1_q1_a_dup', 'PARTITION OF hypo_part_multi_1_q1 FOR VALUES FROM ($$2015-01-01$$) TO ($$2015-02-01$$)'); ERROR: hypopg: partition "hypo_part_multi_1_q1_a_dup" would overlap partition "hypo_part_multi_1_q1_a" +-- relcache callback test +-- ====================== +SELECT tablename FROM hypopg_table() WHERE tablename LIKE 'hypo_part_range%' ORDER BY tablename COLLATE "C"; + tablename +----------------------------- + hypo_part_range + hypo_part_range_10000_20000 + hypo_part_range_1_10000 + hypo_part_range_20000_30000 +(4 rows) + +DROP TABLE hypo_part_range; +SELECT tablename FROM hypopg_table() WHERE tablename LIKE 'hypo_part_range%' ORDER BY tablename COLLATE "C"; + tablename +----------- +(0 rows) + +CREATE TABLE hypo_part_range (id integer, val text); +INSERT INTO hypo_part_range SELECT i, 'line ' || i FROM generate_series(1, 29999) i; +SELECT * FROM hypopg_partition_table('hypo_part_range', 'PARTITION BY RANGE (id)'); + hypopg_partition_table +------------------------ + t +(1 row) + +SELECT tablename FROM hypopg_add_partition('hypo_part_range_10000_20000', 'PARTITION OF hypo_part_range FOR VALUES FROM (10000) TO (20000)'); + tablename +----------------------------- + hypo_part_range_10000_20000 +(1 row) + +SELECT tablename FROM hypopg_add_partition('hypo_part_range_20000_30000', 'PARTITION OF hypo_part_range FOR VALUES FROM (20000) TO (30000)'); + tablename +----------------------------- + hypo_part_range_20000_30000 +(1 row) + +SELECT tablename FROM hypopg_add_partition('hypo_part_range_1_10000', 'PARTITION OF hypo_part_range FOR VALUES FROM (1) TO (10000)'); + tablename +------------------------- + hypo_part_range_1_10000 +(1 row) + +EXPLAIN (COSTS OFF) SELECT * FROM hypo_part_range WHERE id = 42; + QUERY PLAN +----------------------------------------------------------- + Append + -> Seq Scan on hypo_part_range hypo_part_range_1_10000 + Filter: (id = 42) +(3 rows) + diff --git a/hypopg.c b/hypopg.c index ed68f13..bfe2430 100644 --- a/hypopg.c +++ b/hypopg.c @@ -21,6 +21,11 @@ #if PG_VERSION_NUM >= 90300 #include "access/htup_details.h" #endif +#if PG_VERSION_NUM >= 100000 +#include "access/xact.h" +#endif +#include "utils/inval.h" +#include "utils/lsyscache.h" #include "utils/selfuncs.h" #include "utils/syscache.h" @@ -41,6 +46,12 @@ bool isExplain; bool hypo_is_enabled; MemoryContext HypoMemoryContext; +/*--- Variables not exported ---*/ + +static List *pending_invals = NIL; /* List of interesting OID for which we + received inval messages that need to be + processed. */ + /*--- Functions --- */ void _PG_init(void); @@ -89,6 +100,7 @@ static bool hypo_get_relation_stats_hook(PlannerInfo *root, static get_relation_stats_hook_type prev_get_relation_stats_hook = NULL; static bool hypo_query_walker(Node *node); +static void hypo_CacheRelCallback(Datum arg, Oid relid); void _PG_init(void) @@ -134,6 +146,7 @@ _PG_init(void) NULL, NULL); + CacheRegisterRelcacheCallback(hypo_CacheRelCallback, (Datum) 0); } void @@ -214,6 +227,13 @@ hypo_utility_hook( hypo_query_walker, NULL, 0); + /* + * Process pending invalidation. For now, just do it if the current query + * might try to acess stored hypothetical objects + */ + if (isExplain && list_length(pending_invals) != 0) + hypo_process_inval(); + if (prev_utility_hook) prev_utility_hook( #if PG_VERSION_NUM >= 100000 @@ -291,6 +311,76 @@ hypo_query_walker(Node *parsetree) return false; } +/* + * Callback for relcache inval message. Detect if the given relid correspond + * to something we should take care of. For now, we only care of table being + * dropped for which we have hypothetical partitioning information, thus + * needing to remove relevant hypoTable entries. At this point, we can't + * detect if the inval message is due to table dropping or not, because any + * cache access require a valid transaction, and we don't have a guarantee that + * it's the case at this point. Instead, maintain a deduplicated list of + * interesting OID that will be processed before usage of hypothetical + * partitioned object. + */ +static void +hypo_CacheRelCallback(Datum arg, Oid relid) +{ +#if PG_VERSION_NUM >= 100000 + hypoTable *entry; + + entry = hypo_find_table(relid, true); + if (entry) + { + MemoryContext oldcontext; + + oldcontext = MemoryContextSwitchTo(HypoMemoryContext); + pending_invals = list_append_unique_oid(pending_invals, relid); + MemoryContextSwitchTo(oldcontext); + } +#endif +} + +/* Process any RelCache invalidation we previously received. We have to + * process them asynchronously, because we have to process it only if the + * invalidation message was due to the original table being dropped. We try to + * detect this case by comparing the relid'd relname if it exists, and this + * require a valid snapshot if may not be the case at the moment we receive the + * inval message. + */ +void +hypo_process_inval(void) +{ +#if PG_VERSION_NUM >= 100000 + ListCell *lc; + + Assert(IsTransactionState()); + + if (pending_invals == NIL) + return; + + foreach(lc, pending_invals) + { + Oid relid = lfirst_oid(lc); + hypoTable *entry = hypo_find_table(relid, false); + char *relname = get_rel_name(relid); + + /* + * The pending invalidations should be filtered and recorded after + * removing an entry, and should always be processed before any attempt + * to remove a hypothetical object, so we shoudl always find a + * hypoTable at this point. + */ + Assert(entry); + + if (!relname || (strcmp(relname, entry->tablename) != 0)) + hypo_table_remove(relid, true); + } + + list_free(pending_invals); + pending_invals = NIL; +#endif +} + /* Reset the isExplain flag after each query */ static void hypo_executorEnd_hook(QueryDesc *queryDesc) diff --git a/hypopg_analyze.c b/hypopg_analyze.c index 2782921..c8fef16 100644 --- a/hypopg_analyze.c +++ b/hypopg_analyze.c @@ -551,11 +551,16 @@ HYPO_PARTITION_NOT_SUPPORTED(); #else Oid root_tableid = PG_GETARG_OID(0); float4 fraction = PG_GETARG_FLOAT4(1); - hypoTable *root_entry = hypo_find_table(root_tableid, true); + hypoTable *root_entry; Relation onerel; Relation pgstats; int ret; + /* Process any pending invalidation */ + hypo_process_inval(); + + root_entry = hypo_find_table(root_tableid, true); + if (!root_entry) elog(ERROR, "hypopg: this table is not hypothetically partitioned"); @@ -617,6 +622,9 @@ HYPO_PARTITION_NOT_SUPPORTED(); hypoStatsEntry *entry; Relation pgstats; + /* Process any pending invalidation */ + hypo_process_inval(); + /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, diff --git a/hypopg_table.c b/hypopg_table.c index 4bd4cce..a70a2fc 100644 --- a/hypopg_table.c +++ b/hypopg_table.c @@ -100,7 +100,6 @@ static hypoTable *hypo_newTable(Oid parentid); static hypoTable *hypo_table_find_parent_oid(Oid parentid); static hypoTable *hypo_table_name_get_entry(const char *name); static void hypo_table_pfree(hypoTable *entry); -static bool hypo_table_remove(Oid tableid); static const hypoTable *hypo_table_store_parsetree(CreateStmt *node, const char *queryString, Oid parentid, Oid rootid); static PartitionBoundSpec *hypo_transformPartitionBound(ParseState *pstate, @@ -1537,7 +1536,7 @@ hypo_find_table(Oid tableid, bool missing_ok) } if (!missing_ok) - elog(ERROR, "hypopg: could not find parent for %d", tableid); + elog(ERROR, "hypopg: could not find entry for oid %d", tableid); return NULL; } @@ -1618,18 +1617,35 @@ hypo_table_pfree(hypoTable *entry) * partitioned table) from the list of hypothetical tables. pfree (by calling * hypo_table_pfree) all memory that has been allocated. Also free all stored * hypothetical statistics belonging to it if any. + * + * The deep parameter specify whether the function should try to perform the + * same cleanup for all entries whose rootid is the given tableid. All + * function should pass true for this parameter, except hypo_table_reset() + * which will sequentially iterate over all entries. */ -static bool -hypo_table_remove(Oid tableid) +bool +hypo_table_remove(Oid tableid, bool deep) { ListCell *lc; + bool found = false; - foreach(lc, hypoTables) + /* + * The cells can be removed during the loop, so we can't iterate using + * standard foreach / lnext macros. + */ + lc = list_head(hypoTables); + while (lc != NULL) { hypoTable *entry = (hypoTable *) lfirst(lc); - if (entry->oid == tableid) + /* get the next cell right now, before we might remove the entry */ + lc = lnext(lc); + + if (entry->oid == tableid || entry->rootid == tableid) { + if (!deep && entry->oid != tableid) + continue; + #if PG_VERSION_NUM >= 100000 /* Remove any stored statistics */ hypo_stat_remove(tableid); @@ -1637,10 +1653,20 @@ hypo_table_remove(Oid tableid) hypoTables = list_delete_ptr(hypoTables, entry); hypo_table_pfree(entry); - return true; + + /* if ws're not doing a deep remove, the only match we can get is + * entry which oid is the passed tableid. In this case, no need to + * continue looping, return true to indicate that we found and + * removed and entry. + */ + if (!deep) + return true; + + found = true; } } - return false; + + return found; } /* @@ -1660,7 +1686,7 @@ hypo_table_reset(void) { hypoTable *entry = (hypoTable *) lfirst(lc); - hypo_table_remove(entry->oid); + hypo_table_remove(entry->oid, false); } list_free(hypoTables); @@ -3118,6 +3144,9 @@ HYPO_PARTITION_NOT_SUPPORTED(); bool nulls[HYPO_ADD_PART_COLS]; int i = 0; + /* Process any pending invalidation */ + hypo_process_inval(); + if (!PG_ARGISNULL(2)) partition_by = TextDatumGetCString(PG_GETARG_TEXT_PP(2)); @@ -3218,7 +3247,10 @@ HYPO_PARTITION_NOT_SUPPORTED(); #else Oid tableid = PG_GETARG_OID(0); - PG_RETURN_BOOL(hypo_table_remove(tableid)); + /* Process any pending invalidation */ + hypo_process_inval(); + + PG_RETURN_BOOL(hypo_table_remove(tableid, true)); #endif } @@ -3245,6 +3277,9 @@ HYPO_PARTITION_NOT_SUPPORTED(); StringInfoData sql; RawStmt *raw_stmt; + /* Process any pending invalidation */ + hypo_process_inval(); + tp = SearchSysCache1(RELOID, ObjectIdGetDatum(tableid)); if (HeapTupleIsValid(tp)) { @@ -3314,6 +3349,9 @@ hypopg_reset_table(PG_FUNCTION_ARGS) #if PG_VERSION_NUM < 100000 HYPO_PARTITION_NOT_SUPPORTED(); #else + /* Process any pending invalidation */ + hypo_process_inval(); + hypo_table_reset(); PG_RETURN_VOID(); #endif @@ -3335,6 +3373,9 @@ HYPO_PARTITION_NOT_SUPPORTED(); Tuplestorestate *tupstore; ListCell *lc; + /* Process any pending invalidation */ + hypo_process_inval(); + /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, diff --git a/include/hypopg.h b/include/hypopg.h index 1dcbc1c..df5f77b 100644 --- a/include/hypopg.h +++ b/include/hypopg.h @@ -25,5 +25,6 @@ extern bool hypo_is_enabled; extern MemoryContext HypoMemoryContext; Oid hypo_getNewOid(Oid relid); +void hypo_process_inval(void); #endif diff --git a/include/hypopg_table.h b/include/hypopg_table.h index 5a3564a..87701b7 100644 --- a/include/hypopg_table.h +++ b/include/hypopg_table.h @@ -72,6 +72,7 @@ List *hypo_get_partition_constraints(PlannerInfo *root, RelOptInfo *rel, hypoTable *parent); List *hypo_get_partition_quals_inh(hypoTable *part, hypoTable *parent); bool hypo_table_oid_is_hypothetical(Oid relid); +bool hypo_table_remove(Oid tableid, bool deep); void hypo_injectHypotheticalPartitioning(PlannerInfo *root, Oid relationObjectId, RelOptInfo *rel); diff --git a/test/sql/hypo_table.sql b/test/sql/hypo_table.sql index 304ed7f..ca178d3 100644 --- a/test/sql/hypo_table.sql +++ b/test/sql/hypo_table.sql @@ -226,3 +226,17 @@ SELECT tablename FROM hypopg_add_partition('hypo_part_list_1_2_3_dup', 'PARTITIO SELECT tablename FROM hypopg_add_partition('hypo_part_hash_0_dup', 'PARTITION OF hypo_part_hash FOR VALUES WITH (MODULUS 10, REMAINDER 0)'); -- Overlapping range bounds, subpartition SELECT tablename FROM hypopg_add_partition('hypo_part_multi_1_q1_a_dup', 'PARTITION OF hypo_part_multi_1_q1 FOR VALUES FROM ($$2015-01-01$$) TO ($$2015-02-01$$)'); + +-- relcache callback test +-- ====================== + +SELECT tablename FROM hypopg_table() WHERE tablename LIKE 'hypo_part_range%' ORDER BY tablename COLLATE "C"; +DROP TABLE hypo_part_range; +SELECT tablename FROM hypopg_table() WHERE tablename LIKE 'hypo_part_range%' ORDER BY tablename COLLATE "C"; +CREATE TABLE hypo_part_range (id integer, val text); +INSERT INTO hypo_part_range SELECT i, 'line ' || i FROM generate_series(1, 29999) i; +SELECT * FROM hypopg_partition_table('hypo_part_range', 'PARTITION BY RANGE (id)'); +SELECT tablename FROM hypopg_add_partition('hypo_part_range_10000_20000', 'PARTITION OF hypo_part_range FOR VALUES FROM (10000) TO (20000)'); +SELECT tablename FROM hypopg_add_partition('hypo_part_range_20000_30000', 'PARTITION OF hypo_part_range FOR VALUES FROM (20000) TO (30000)'); +SELECT tablename FROM hypopg_add_partition('hypo_part_range_1_10000', 'PARTITION OF hypo_part_range FOR VALUES FROM (1) TO (10000)'); +EXPLAIN (COSTS OFF) SELECT * FROM hypo_part_range WHERE id = 42;