GreenPlum聚合结构解析

聚合的执行需要状态描述信息,由AggState结构体管理。该结构体如下:

typedef struct AggState
{
	ScanState	ss;				/* its first field is NodeTag */
	List	   *aggs;			/* all Aggref nodes in targetlist & quals */
	int			numaggs;		/* length of list (could be zero!) */
	int			numtrans;		/* number of pertrans items */
	AggStrategy aggstrategy;	/* strategy mode */
	AggSplit	aggsplit;		/* agg-splitting mode, see nodes.h */
	AggStatePerPhase phase;		/* pointer to current phase data */
	int			numphases;		/* number of phases (including phase 0) */
	int			current_phase;	/* current phase number */
	AggStatePerAgg peragg;		/* per-Aggref information */
	AggStatePerTrans pertrans;	/* per-Trans state information */
	ExprContext *hashcontext;	/* econtexts for long-lived data (hashtable) */
	ExprContext **aggcontexts;	/* econtexts for long-lived data (per GS) */
	ExprContext *tmpcontext;	/* econtext for input expressions */
#define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
	ExprContext *curaggcontext; /* currently active aggcontext */
	AggStatePerAgg curperagg;	/* currently active aggregate, if any */
#define FIELDNO_AGGSTATE_CURPERTRANS 16
	AggStatePerTrans curpertrans;	/* currently active trans state, if any */
	bool		input_done;		/* indicates end of input */
	bool		agg_done;		/* indicates completion of Agg scan */
	int			projected_set;	/* The last projected grouping set */
#define FIELDNO_AGGSTATE_CURRENT_SET 20
	int			current_set;	/* The current grouping set being evaluated */
	Bitmapset  *grouped_cols;	/* grouped cols in current projection */
	List	   *all_grouped_cols;	/* list of all grouped cols in DESC order */
	/* These fields are for grouping set phase data */
	int			maxsets;		/* The max number of sets in any phase */
	AggStatePerPhase phases;	/* array of all phases */
	Tuplesortstate *sort_in;	/* sorted input to phases > 1 */
	Tuplesortstate *sort_out;	/* input is copied here for next phase */
	TupleTableSlot *sort_slot;	/* slot for sort results */
	/* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
	AggStatePerGroup *pergroups;	/* grouping set indexed array of per-group
									 * pointers */
	HeapTuple	grp_firstTuple; /* copy of first tuple of current group */
	/* these fields are used in AGG_HASHED and AGG_MIXED modes: */
	bool		table_filled;	/* hash table filled yet? */
	int			num_hashes;
	MemoryContext	hash_metacxt;	/* memory for hash table itself */
	struct HashTapeInfo *hash_tapeinfo; /* metadata for spill tapes */
	struct HashAggSpill *hash_spills; /* HashAggSpill for each grouping set,
										 exists only during first pass */
	TupleTableSlot *hash_spill_slot; /* slot for reading from spill files */
	List	   *hash_batches;	/* hash batches remaining to be processed */
	bool		hash_ever_spilled;	/* ever spilled during this execution? */
	bool		hash_spill_mode;	/* we hit a limit during the current batch
									   and we must not create new groups */
	Size		hash_mem_limit;	/* limit before spilling hash table */
	uint64		hash_ngroups_limit;	/* limit before spilling hash table */
	int			hash_planned_partitions; /* number of partitions planned
											for first pass */
	double		hashentrysize;	/* estimate revised during execution */
	Size		hash_mem_peak;	/* peak hash table memory usage */
	uint64		hash_ngroups_current;	/* number of groups currently in
										   memory in all hash tables */
	uint64		hash_disk_used; /* kB of disk space used */
	int			hash_batches_used;	/* batches used during entire execution */

	AggStatePerHash perhash;	/* array of per-hashtable data */
	AggStatePerGroup *hash_pergroup;	/* grouping set indexed array of
										 * per-group pointers */

	/* support for evaluation of agg input expressions: */
#define FIELDNO_AGGSTATE_ALL_PERGROUPS 49
	AggStatePerGroup *all_pergroups;	/* array of first ->pergroups, than
										 * ->hash_pergroup */
	ProjectionInfo *combinedproj;	/* projection machinery */

	int			group_id;		/* GROUP_ID in current projection. This is passed
								 * to GroupingSetId expressions, similar to the
								 * 'grouped_cols' value. */
	int			gset_id;

	/* if input tuple has an AggExprId, save the Attribute Number */
	Index       AggExprId_AttrNum;
} AggState;

他们之间的关系如下图所示:以投影中有聚合操作为例

GreenPlum聚合结构解析

下面分布对AggState中成员进行介绍。

ScanState中存储有聚合算子操作的计划节点描述信息PlanState。PlanState中有投影信息和执行计划树节点。计划节点Plan里的targetlist链表为聚合操作的一些相关信息。比如Aggref,aggref.args链表有针对哪一列进行聚合操作的信息。

AggState中的aggs链表存储有所有聚合操作函数的描述信息,最终aggref指向Plan的targetlist中。

aggstrategy指定聚合模式:有3中:

typedef enum AggStrategy
{
        AGG_PLAIN, /* simple agg across all input rows */
        AGG_SORTED, /* grouped agg, input must be sorted */
        AGG_HASHED, /* grouped agg, use internal hashtable */
        AGG_MIXED /* grouped agg, hash and sort both used */
} AggStrategy;

phase:聚合操作中间函数,比如avg的求和函数,的计算步骤。针对最终函数,并未为其进行表达式生成计算步骤,而是在finalize_aggregate函数中直接调用其函数进行计算。

peragg:聚合操作最终计算函数的元数据信息。这是一个数组,描述所有聚合操作的最终计算函数

pertrans:聚合操作中间函数的元数据信息。这也是一个数组。

pergroups:每个中间操作函数的返回值

发表评论
留言与评论(共有 0 条评论) “”
   
验证码:

相关文章

推荐文章