/*
* process any libraries that should be preloaded at postmaster start
*/
process_shared_preload_libraries();
/*
* _PG_init
*
* Main entrypoint of PG-Strom. It shall be invoked only once when postmaster
* process is starting up, then it calls other sub-systems to initialize for each ones.
*/
void
_PG_init(void)
{
/*
* PG-Strom has to be loaded using shared_preload_libraries option
*/
if (!process_shared_preload_libraries_in_progress)
ereport(ERROR,
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
errmsg("PG-Strom must be loaded via shared_preload_libraries")));
/* link nvrtc library according to the current CUDA version */
pgstrom_init_nvrtc();
/* dump version number */
#ifdef PGSTROM_VERSION
elog(LOG, "PG-Strom version %s built for PostgreSQL %s",
PGSTROM_VERSION, PG_MAJORVERSION);
#else
elog(LOG, "PG-Strom built for PostgreSQL %s", PG_MAJORVERSION);
#endif
/* init misc variables */
PAGE_SIZE = sysconf(_SC_PAGESIZE);
PAGE_MASK = PAGE_SIZE - 1;
PAGE_SHIFT = get_next_log2(PAGE_SIZE);
PHYS_PAGES = sysconf(_SC_PHYS_PAGES);
/* init GPU/CUDA infrastracture */
pgstrom_init_common_guc();
pgstrom_init_shmbuf();
pgstrom_init_gpu_device();
pgstrom_init_gpu_mmgr();
pgstrom_init_gpu_context();
pgstrom_init_cuda_program();
pgstrom_init_nvme_strom();
pgstrom_init_codegen();
/* init custom-scan providers/FDWs */
pgstrom_init_gputasks();
pgstrom_init_gpuscan();
pgstrom_init_gpujoin();
pgstrom_init_inners();
pgstrom_init_gpupreagg();
pgstrom_init_relscan();
pgstrom_init_arrow_fdw();
/* check commercial license, if any */
check_heterodb_license();
/* dummy custom-scan node */
memset(&pgstrom_dummy_path_methods, 0, sizeof(CustomPathMethods));
pgstrom_dummy_path_methods.CustomName = "Dummy";
pgstrom_dummy_path_methods.PlanCustomPath
= pgstrom_dummy_create_plan;
memset(&pgstrom_dummy_plan_methods, 0, sizeof(CustomScanMethods));
pgstrom_dummy_plan_methods.CustomName = "Dummy";
pgstrom_dummy_plan_methods.CreateCustomScanState
= pgstrom_dummy_create_scan_state;
/* planner hook registration */
planner_hook_next = planner_hook;
planner_hook = pgstrom_post_planner;
}
/*
* set_base_rel_pathlists
* Finds all paths available for scanning each base-relation entry.
* Sequential scan and any available indices are considered.
* Each useful path is attached to its relation's 'pathlist' field.
*/
static void
set_base_rel_pathlists(PlannerInfo *root)
{
Index rti;
for (rti = 1; rti < root->simple_rel_array_size; rti++)
{
RelOptInfo *rel = root->simple_rel_array[rti];
/* there may be empty slots corresponding to non-baserel RTEs */
if (rel == NULL)
continue;
Assert(rel->relid == rti); /* sanity check on array */
/* ignore RTEs that are "other rels" */
if (rel->reloptkind != RELOPT_BASEREL)
continue;
set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
}
}
/*
* set_rel_pathlist
* Build access paths for a base relation
*/
static void
set_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (IS_DUMMY_REL(rel))
{
/* We already proved the relation empty, so nothing more to do */
}
else if (rte->inh)
{
/* It's an "append relation", process accordingly */
set_append_rel_pathlist(root, rel, rti, rte);
}
else
{
switch (rel->rtekind)
{
case RTE_RELATION:
if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
/* Foreign table */
set_foreign_pathlist(root, rel, rte);
}
else if (rte->tablesample != NULL)
{
/* Sampled relation */
set_tablesample_rel_pathlist(root, rel, rte);
}
else
{
/* Plain relation */
set_plain_rel_pathlist(root, rel, rte);
}
break;
case RTE_SUBQUERY:
/* Subquery --- fully handled during set_rel_size */
break;
case RTE_FUNCTION:
/* RangeFunction */
set_function_pathlist(root, rel, rte);
break;
case RTE_TABLEFUNC:
/* Table Function */
set_tablefunc_pathlist(root, rel, rte);
break;
case RTE_VALUES:
/* Values list */
set_values_pathlist(root, rel, rte);
break;
case RTE_CTE:
/* CTE reference --- fully handled during set_rel_size */
break;
case RTE_NAMEDTUPLESTORE:
/* tuplestore reference --- fully handled during set_rel_size */
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rel->rtekind);
break;
}
}
/*
* Allow a plugin to editorialize on the set of Paths for this base
* relation. It could add new paths (such as CustomPaths) by calling
* add_path(), or add_partial_path() if parallel aware. It could also
* delete or modify paths added by the core code.
*/
if (set_rel_pathlist_hook)
(*set_rel_pathlist_hook) (root, rel, rti, rte);
/*
* If this is a baserel, we should normally consider gathering any partial
* paths we may have created for it. We have to do this after calling the
* set_rel_pathlist_hook, else it cannot add partial paths to be included
* here.
*
* However, if this is an inheritance child, skip it. Otherwise, we could
* end up with a very large number of gather nodes, each trying to grab
* its own pool of workers. Instead, we'll consider gathering partial
* paths for the parent appendrel.
*
* Also, if this is the topmost scan/join rel (that is, the only baserel),
* we postpone gathering until the final scan/join targetlist is available
* (see grouping_planner).
*/
if (rel->reloptkind == RELOPT_BASEREL &&
bms_membership(root->all_baserels) != BMS_SINGLETON)
generate_gather_paths(root, rel, false);
/* Now find the cheapest of the paths for this rel */
set_cheapest(rel);
#ifdef OPTIMIZER_DEBUG
debug_print_rel(root, rel);
#endif
}
/*
* pgstrom_init_gpuscan
*/
void
pgstrom_init_gpuscan(void)
{
/* pg_strom.enable_gpuscan */
DefineCustomBoolVariable("pg_strom.enable_gpuscan",
"Enables the use of GPU accelerated full-scan",
NULL,
&enable_gpuscan,
true,
PGC_USERSET,
GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
/* pg_strom.pullup_outer_scan */
DefineCustomBoolVariable("pg_strom.pullup_outer_scan",
"Enables to pull up simple outer scan",
NULL,
&enable_pullup_outer_scan,
true,
PGC_USERSET,
GUC_NOT_IN_SAMPLE,
NULL, NULL, NULL);
/* setup path methods */
memset(&gpuscan_path_methods, 0, sizeof(gpuscan_path_methods));
gpuscan_path_methods.CustomName = "GpuScan";
gpuscan_path_methods.PlanCustomPath = PlanGpuScanPath;
/* setup plan methods */
memset(&gpuscan_plan_methods, 0, sizeof(gpuscan_plan_methods));
gpuscan_plan_methods.CustomName = "GpuScan";
gpuscan_plan_methods.CreateCustomScanState = gpuscan_create_scan_state;
RegisterCustomScanMethods(&gpuscan_plan_methods);
/* setup exec methods */
memset(&gpuscan_exec_methods, 0, sizeof(gpuscan_exec_methods));
gpuscan_exec_methods.CustomName = "GpuScan";
gpuscan_exec_methods.BeginCustomScan = ExecInitGpuScan;
gpuscan_exec_methods.ExecCustomScan = ExecGpuScan;
gpuscan_exec_methods.EndCustomScan = ExecEndGpuScan;
gpuscan_exec_methods.ReScanCustomScan = ExecReScanGpuScan;
gpuscan_exec_methods.EstimateDSMCustomScan = ExecGpuScanEstimateDSM;
gpuscan_exec_methods.InitializeDSMCustomScan = ExecGpuScanInitDSM;
gpuscan_exec_methods.InitializeWorkerCustomScan = ExecGpuScanInitWorker;
gpuscan_exec_methods.ReInitializeDSMCustomScan = ExecGpuScanReInitializeDSM;
gpuscan_exec_methods.ShutdownCustomScan = ExecShutdownGpuScan;
gpuscan_exec_methods.ExplainCustomScan = ExplainGpuScan;
/* hook registration */
set_rel_pathlist_next = set_rel_pathlist_hook;
set_rel_pathlist_hook = gpuscan_add_scan_path;
}
/*
* gpuscan_add_scan_path - entrypoint of the set_rel_pathlist_hook
*/
static void
gpuscan_add_scan_path(PlannerInfo *root,
RelOptInfo *baserel,
Index rtindex,
RangeTblEntry *rte)
{
Path *pathnode;
List *dev_quals = NIL;
List *host_quals = NIL;
IndexOptInfo *indexOpt;
List *indexConds;
List *indexQuals;
cl_long indexNBlocks;
ListCell *lc;
/* call the secondary hook */
if (set_rel_pathlist_next)
set_rel_pathlist_next(root, baserel, rtindex, rte);
/* nothing to do, if either PG-Strom or GpuScan is not enabled */
if (!pgstrom_enabled || !enable_gpuscan)
return;
/* We already proved the relation empty, so nothing more to do */
if (is_dummy_rel(baserel))
return;
/* It is the role of built-in Append node */
if (rte->inh)
return;
/*
* GpuScan can run on only base relations or foreign table managed
* by arrow_fdw.
*/
if (rte->relkind == RELKIND_FOREIGN_TABLE)
{
if (!baseRelIsArrowFdw(baserel))
return;
}
else if (rte->relkind != RELKIND_RELATION &&
rte->relkind != RELKIND_MATVIEW)
return;
/* Check whether the qualifier can run on GPU device */
foreach (lc, baserel->baserestrictinfo)
{
RestrictInfo *rinfo = lfirst(lc);
if (pgstrom_device_expression(root, baserel, rinfo->clause))
dev_quals = lappend(dev_quals, rinfo);
else
host_quals = lappend(host_quals, rinfo);
}
if (dev_quals == NIL)
return;
/* Check availability of GpuScan+BRIN Index */
indexOpt = pgstrom_tryfind_brinindex(root, baserel,
&indexConds,
&indexQuals,
&indexNBlocks);
/* add GpuScan path in single process */
pathnode = create_gpuscan_path(root, baserel,
dev_quals,
host_quals,
0,
indexOpt,
indexConds,
indexQuals,
indexNBlocks);
add_path(baserel, pathnode);
/* If appropriate, consider parallel GpuScan */
if (baserel->consider_parallel && baserel->lateral_relids == NULL)
{
int parallel_nworkers
= compute_parallel_worker(baserel,
baserel->pages, -1.0,
max_parallel_workers_per_gather);
/*
* XXX - Do we need a something specific logic for GpuScan to adjust
* parallel_workers.
*/
if (parallel_nworkers <= 0)
return;
/* add GpuScan path performing on parallel workers */
pathnode = create_gpuscan_path(root, baserel,
dev_quals,
host_quals,
parallel_nworkers,
indexOpt,
indexConds,
indexQuals,
indexNBlocks);
add_partial_path(baserel, pathnode);
}
}
static PlannedStmt *
pgstrom_post_planner(Query *parse,int cursorOptions,ParamListInfo boundParams)
{
PlannedStmt *pstmt;
ListCell *lc;
if (planner_hook_next)
pstmt = planner_hook_next(parse, cursorOptions, boundParams);
else
pstmt = standard_planner(parse, cursorOptions, boundParams);
pgstrom_post_planner_recurse(pstmt, &pstmt->planTree);
foreach (lc, pstmt->subplans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
return pstmt;
}
/*
* pgstrom_post_planner
*
* remove 'dummy' custom scan node.
*/
static void
pgstrom_post_planner_recurse(PlannedStmt *pstmt, Plan **p_plan)
{
Plan *plan = *p_plan;
ListCell *lc;
Assert(plan != NULL);
switch (nodeTag(plan))
{
case T_ModifyTable:
{
ModifyTable *splan = (ModifyTable *) plan;
foreach (lc, splan->plans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
}
break;
case T_Append:
{
Append *splan = (Append *) plan;
foreach (lc, splan->appendplans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
}
break;
case T_MergeAppend:
{
MergeAppend *splan = (MergeAppend *) plan;
foreach (lc, splan->mergeplans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
}
break;
case T_BitmapAnd:
{
BitmapAnd *splan = (BitmapAnd *) plan;
foreach (lc, splan->bitmapplans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
}
break;
case T_BitmapOr:
{
BitmapOr *splan = (BitmapOr *) plan;
foreach (lc, splan->bitmapplans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
}
break;
case T_SubqueryScan:
{
SubqueryScan *sscan = (SubqueryScan *) plan;
pgstrom_post_planner_recurse(pstmt, &sscan->subplan);
}
break;
case T_CustomScan:
{
CustomScan *cscan = (CustomScan *) plan;
if (cscan->methods == &pgstrom_dummy_plan_methods)
{
*p_plan = pgstrom_dummy_remove_plan(pstmt, cscan);
pgstrom_post_planner_recurse(pstmt, p_plan);
return;
}
else if (pgstrom_plan_is_gpupreagg(&cscan->scan.plan))
gpupreagg_post_planner(pstmt, cscan);
foreach (lc, cscan->custom_plans)
pgstrom_post_planner_recurse(pstmt, (Plan **)&lfirst(lc));
}
break;
default:
break;
}
if (plan->lefttree)
pgstrom_post_planner_recurse(pstmt, &plan->lefttree);
if (plan->righttree)
pgstrom_post_planner_recurse(pstmt, &plan->righttree);
}
SELECT cat, count(*), avg(ax) FROM t0 NATURAL JOIN t1 NATURAL JOIN t2 NATURAL JOIN t3 NATURAL JOIN t4 NATURAL JOIN t5 NATURAL JOIN t6 NATURAL JOIN t7 NATURAL JOIN t8 GROUP BY cat;
HashAggregate (cost=3529132.25..3529132.57 rows=26 width=12)
Output: t0.cat, pgstrom.count((pgstrom.nrows())), pgstrom.avg((pgstrom.nrows((t1.ax IS NOT NULL))), (pgstrom.psum(t1.ax)))
Group Key: t0.cat
-> Custom Scan (GpuPreAgg) (cost=29099.24..2859511.64 rows=234 width=48)
Output: t0.cat, pgstrom.nrows(), pgstrom.nrows((t1.ax IS NOT NULL)), pgstrom.psum(t1.ax)
Reduction: Local + Global
GPU Projection: t0.cat, t1.ax
Logic Parameter: SafetyLimit: 2147483647, KeyDistSalt: 9
Extra: outer-bulk-exec
-> Custom Scan (GpuJoin) on public.t0 (cost=25099.24..2826221.34 rows=93721454 width=12)
Output: t0.cat, t1.ax
GPU Projection: t0.cat::text, t1.ax::double precision
Depth 1: GpuHashJoin, HashKeys: (t0.fid)
JoinQuals: (t0.fid = t6.fid)
Nrows (in/out: 97.75%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 2: GpuHashJoin, HashKeys: (t0.cid)
JoinQuals: (t0.cid = t3.cid)
Nrows (in/out: 98.34%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 3: GpuHashJoin, HashKeys: (t0.bid)
JoinQuals: (t0.bid = t2.bid)
Nrows (in/out: 98.76%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 4: GpuHashJoin, HashKeys: (t0.eid)
JoinQuals: (t0.eid = t5.eid)
Nrows (in/out: 98.97%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 5: GpuHashJoin, HashKeys: (t0.aid)
JoinQuals: (t0.aid = t1.aid)
Nrows (in/out: 100.00%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 6: GpuHashJoin, HashKeys: (t0.did)
JoinQuals: (t0.did = t4.did)
Nrows (in/out: 100.00%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 7: GpuHashJoin, HashKeys: (t0.gid)
JoinQuals: (t0.gid = t7.gid)
Nrows (in/out: 100.00%), KDS-Hash (size: 13.47MB, nbatches: 1)
Depth 8: GpuHashJoin, HashKeys: (t0.hid)
JoinQuals: (t0.hid = t8.hid)
Nrows (in/out: 99.74%), KDS-Hash (size: 13.47MB, nbatches: 1)
Extra: bulk-exec-support, row-format
-> Seq Scan on public.t6 (cost=0.00..1935.00 rows=100000 width=4)
Output: t6.fid, t6.ftext, t6.fx
-> Seq Scan on public.t3 (cost=0.00..1935.00 rows=100000 width=4)
Output: t3.cid, t3.ctext, t3.cx
-> Seq Scan on public.t2 (cost=0.00..1935.00 rows=100000 width=4)
Output: t2.bid, t2.btext, t2.bx
-> Seq Scan on public.t5 (cost=0.00..1935.00 rows=100000 width=4)
Output: t5.eid, t5.etext, t5.ex
-> Seq Scan on public.t1 (cost=0.00..1935.00 rows=100000 width=12)
Output: t1.aid, t1.atext, t1.ax
-> Seq Scan on public.t4 (cost=0.00..1935.00 rows=100000 width=4)
Output: t4.did, t4.dtext, t4.dx
-> Seq Scan on public.t7 (cost=0.00..1935.00 rows=100000 width=4)
Output: t7.gid, t7.gtext, t7.gx
-> Seq Scan on public.t8 (cost=0.00..1935.00 rows=100000 width=4)
Output: t8.hid, t8.htext, t8.hx