PostgreSQL 源碼解讀（88）- 查詢語句#73（SeqNext函數#1）

發布時間：2020-08-12 13:20:01 來源：ITPUB博客閱讀：451 作者：husthxd 欄目：關系型數據庫

本節介紹了SeqNext函數的主要實現邏輯以及該函數中初始化相關數據結構的實現邏輯。SeqNext函數作為參數傳遞到函數ExecScan中，執行實際的掃描操作。

一、數據結構

TupleTableSlot
Tuple Table Slot,用于存儲元組相關信息

/* base tuple table slot type */
typedef struct TupleTableSlot
{
    NodeTag     type;//Node標記
#define FIELDNO_TUPLETABLESLOT_FLAGS 1
    uint16      tts_flags;      /* 布爾狀態;Boolean states */
#define FIELDNO_TUPLETABLESLOT_NVALID 2
    AttrNumber  tts_nvalid;     /* 在tts_values中有多少有效的values;# of valid values in tts_values */
    const TupleTableSlotOps *const tts_ops; /* 實現一個slot的成本;implementation of slot */
#define FIELDNO_TUPLETABLESLOT_TUPLEDESCRIPTOR 4
    TupleDesc   tts_tupleDescriptor;    /* slot的元組描述符;slot's tuple descriptor */
#define FIELDNO_TUPLETABLESLOT_VALUES 5
    Datum      *tts_values;     /* 當前屬性值;current per-attribute values */
#define FIELDNO_TUPLETABLESLOT_ISNULL 6
    bool       *tts_isnull;     /* 當前屬性isnull標記;current per-attribute isnull flags */
    MemoryContext tts_mcxt;     /*內存上下文; slot itself is in this context */
} TupleTableSlot;


typedef struct tupleDesc
{
    int         natts;          /* tuple中的屬性數量;number of attributes in the tuple */
    Oid         tdtypeid;       /* tuple類型的組合類型ID;composite type ID for tuple type */
    int32       tdtypmod;       /* tuple類型的typmode;typmod for tuple type */
    int         tdrefcount;     /* 依賴計數,如為-1,則沒有依賴;reference count, or -1 if not counting */
    TupleConstr *constr;        /* 約束,如無則為NULL;constraints, or NULL if none */
    /* attrs[N] is the description of Attribute Number N+1 */
    //attrs[N]是第N+1個屬性的描述符
    FormData_pg_attribute attrs[FLEXIBLE_ARRAY_MEMBER];
}  *TupleDesc;

HeapTuple
HeapTupleData是一個指向元組的內存數據結構
HeapTuple是指向HeapTupleData指針

/*
 * HeapTupleData is an in-memory data structure that points to a tuple.
 * HeapTupleData是一個指向元組的內存數據結構。
 *
 * There are several ways in which this data structure is used:
 * 使用這種數據結構有幾種方式:
 *
 * * Pointer to a tuple in a disk buffer: t_data points directly into the
 *   buffer (which the code had better be holding a pin on, but this is not
 *   reflected in HeapTupleData itself).
 *   指向磁盤緩沖區中的一個tuple的指針:
 *      t_data點直接指向緩沖區(代碼最好將pin放在緩沖區中，但這在HeapTupleData本身中沒有反映出來)。
 *  
 * * Pointer to nothing: t_data is NULL.  This is used as a failure indication
 *   in some functions.
 *   沒有指針:
 *      t_data是空的。用于在一些函數中作為故障指示。
 *
 * * Part of a palloc'd tuple: the HeapTupleData itself and the tuple
 *   form a single palloc'd chunk.  t_data points to the memory location
 *   immediately following the HeapTupleData struct (at offset HEAPTUPLESIZE).
 *   This is the output format of heap_form_tuple and related routines.
 *   palloc'd tuple的一部分:HeapTupleData本身和tuple形成一個單一的palloc'd chunk。
 *      t_data指向HeapTupleData結構體后面的內存位置(偏移HEAPTUPLESIZE)。
 *      這是heap_form_tuple和相關例程的輸出格式。
 *
 * * Separately allocated tuple: t_data points to a palloc'd chunk that
 *   is not adjacent to the HeapTupleData.  (This case is deprecated since
 *   it's difficult to tell apart from case #1.  It should be used only in
 *   limited contexts where the code knows that case #1 will never apply.)
 *   單獨分配的tuple: 
 *      t_data指向一個與HeapTupleData不相鄰的palloc數據塊。
 *      (這個情況已廢棄不用，因為很難與第一種情況中進行區分。
 *      它應該只在代碼知道第一種情況永遠不會適用的有限上下文中使用。
 *
 * * Separately allocated minimal tuple: t_data points MINIMAL_TUPLE_OFFSET
 *   bytes before the start of a MinimalTuple.  As with the previous case,
 *   this can't be told apart from case #1 by inspection; code setting up
 *   or destroying this representation has to know what it's doing.
 *   獨立分配的最小元組:
 *      t_data指向MinimalTuple開始前偏移MINIMAL_TUPLE_OFFSET個字節的位置。
 *      與前一種情況一樣，不能通過檢查與第一種情況相區別;
 *      設置或銷毀這種表示的代碼必須知道它在做什么。
 *
 * t_len should always be valid, except in the pointer-to-nothing case.
 * t_self and t_tableOid should be valid if the HeapTupleData points to
 * a disk buffer, or if it represents a copy of a tuple on disk.  They
 * should be explicitly set invalid in manufactured tuples.
 * t_len應該總是有效的，除非在指針為NULL。
 * 如果HeapTupleData指向磁盤緩沖區，或者它表示磁盤上元組的副本，那么t_self和t_tableOid應該是有效的。
 * 它們應該顯式地在制造的元組中設置為無效。
 */
typedef struct HeapTupleData
{
    uint32      t_len;          /* *t_data指針的長度;length of *t_data */
    ItemPointerData t_self;     /* SelfItemPointer */
    Oid         t_tableOid;     /* 該元組所屬的table;table the tuple came from */
#define FIELDNO_HEAPTUPLEDATA_DATA 3
    HeapTupleHeader t_data;     /* 指向元組的header&數據;-> tuple header and data */
} HeapTupleData;

typedef HeapTupleData *HeapTuple;

#define HEAPTUPLESIZE   MAXALIGN(sizeof(HeapTupleData))

HeapScanDesc
HeapScanDesc是指向HeapScanDescData結構體的指針

typedef struct HeapScanDescData
{
    /* scan parameters */
    Relation    rs_rd;          /* 堆表描述符;heap relation descriptor */
    Snapshot    rs_snapshot;    /* 快照;snapshot to see */
    int         rs_nkeys;       /* 掃描鍵數;number of scan keys */
    ScanKey     rs_key;         /* 掃描鍵數組;array of scan key descriptors */
    bool        rs_bitmapscan;  /* bitmap scan=>T;true if this is really a bitmap scan */
    bool        rs_samplescan;  /* sample scan=>T;true if this is really a sample scan */
    bool        rs_pageatatime; /* 是否驗證可見性(MVCC機制);verify visibility page-at-a-time? */
    bool        rs_allow_strat; /* 是否允許訪問策略的使用;allow or disallow use of access strategy */
    bool        rs_allow_sync;  /* 是否允許syncscan的使用;allow or disallow use of syncscan */
    bool        rs_temp_snap;   /* 是否在掃描結束后取消快照"登記";unregister snapshot at scan end? */

    /* state set up at initscan time */
    //在initscan時配置的狀態
    BlockNumber rs_nblocks;     /* rel中的blocks總數;total number of blocks in rel */
    BlockNumber rs_startblock;  /* 開始的block編號;block # to start at */
    BlockNumber rs_numblocks;   /* 最大的block編號;max number of blocks to scan */
    /* rs_numblocks is usually InvalidBlockNumber, meaning "scan whole rel" */
    //rs_numblocks通常值為InvalidBlockNumber,意味著掃描整個rel
    
    BufferAccessStrategy rs_strategy;   /* 讀取時的訪問場景;access strategy for reads */
    bool        rs_syncscan;    /* 在syncscan邏輯處理時是否報告位置;report location to syncscan logic? */

    /* scan current state */
    //掃描時的當前狀態
    bool        rs_inited;      /* 如為F,則掃描尚未初始化;false = scan not init'd yet */
    HeapTupleData rs_ctup;      /* 當前掃描的tuple;current tuple in scan, if any */
    BlockNumber rs_cblock;      /* 當前掃描的block;current block # in scan, if any */
    Buffer      rs_cbuf;        /* 當前掃描的buffer;current buffer in scan, if any */
    /* NB: if rs_cbuf is not InvalidBuffer, we hold a pin on that buffer */
    //注意:如果rs_cbuf<>InvalidBuffer,在buffer設置pin

    ParallelHeapScanDesc rs_parallel;   /* 并行掃描信息;parallel scan information */

    /* these fields only used in page-at-a-time mode and for bitmap scans */
    //下面的變量只用于page-at-a-time模式以及位圖掃描
    int         rs_cindex;      /* 在vistuples中的當前元組索引;current tuple's index in vistuples */
    int         rs_ntuples;     /* page中的可見元組計數;number of visible tuples on page */
    OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];    /* 元組的偏移;their offsets */
} HeapScanDescData;

/* struct definitions appear in relscan.h */
typedef struct HeapScanDescData *HeapScanDesc;

ScanState
ScanState擴展了對表示底層關系掃描的節點類型的PlanState。

/* ----------------
 *   ScanState information
 *
 *      ScanState extends PlanState for node types that represent
 *      scans of an underlying relation.  It can also be used for nodes
 *      that scan the output of an underlying plan node --- in that case,
 *      only ScanTupleSlot is actually useful, and it refers to the tuple
 *      retrieved from the subplan.
 *      ScanState擴展了對表示底層關系掃描的節點類型的PlanState。
 *      它還可以用于掃描底層計劃節點的輸出的節點——在這種情況下，實際上只有ScanTupleSlot有用，它引用從子計劃檢索到的元組。
 *
 *      currentRelation    relation being scanned (NULL if none)
 *                          正在掃描的relation,如無則為NULL
 *      currentScanDesc    current scan descriptor for scan (NULL if none)
 *                         當前的掃描描述符,如無則為NULL
 *      ScanTupleSlot      pointer to slot in tuple table holding scan tuple
 *                         指向tuple table中的slot
 * ----------------
 */
typedef struct ScanState
{
    PlanState   ps;             /* its first field is NodeTag */
    Relation    ss_currentRelation;
    HeapScanDesc ss_currentScanDesc;
    TupleTableSlot *ss_ScanTupleSlot;
} ScanState;

/* ----------------
 *   SeqScanState information
 * ----------------
 */
typedef struct SeqScanState
{
    ScanState   ss;             /* its first field is NodeTag */
    Size        pscan_len;      /* size of parallel heap scan descriptor */
} SeqScanState;

二、源碼解讀

SeqNext函數是ExecSeqScan的元組的實際訪問方法(ExecScanAccessMtd).這里簡單介紹了初始化過程,實際的元組獲取過程下節再行介紹.

/* ----------------------------------------------------------------
 *      SeqNext
 *
 *      This is a workhorse for ExecSeqScan
 *      這是ExecSeqScan的實際訪問方法(ExecScanAccessMtd)
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
SeqNext(SeqScanState *node)
{
    HeapTuple   tuple;
    HeapScanDesc scandesc;
    EState     *estate;
    ScanDirection direction;
    TupleTableSlot *slot;

    /*
     * get information from the estate and scan state
     * 從EState和ScanSate中獲取相關信息
     */
    scandesc = node->ss.ss_currentScanDesc;
    estate = node->ss.ps.state;
    direction = estate->es_direction;
    slot = node->ss.ss_ScanTupleSlot;

    if (scandesc == NULL)//如scandesc為NULL,則初始化
    {
        /*
         * We reach here if the scan is not parallel, or if we're serially
         * executing a scan that was planned to be parallel.
         * 如果掃描不是并行的，或者正在序列化執行計劃為并行的掃描，實現邏輯就會到這里。
         */
        scandesc = heap_beginscan(node->ss.ss_currentRelation,
                                  estate->es_snapshot,
                                  0, NULL);//掃描前準備,返回HeapScanDesc
        node->ss.ss_currentScanDesc = scandesc;//賦值
    }

    /*
     * get the next tuple from the table
     * 從數據表中獲取下一個tuple
     */
    tuple = heap_getnext(scandesc, direction);

    /*
     * save the tuple and the buffer returned to us by the access methods in
     * our scan tuple slot and return the slot.  Note: we pass 'false' because
     * tuples returned by heap_getnext() are pointers onto disk pages and were
     * not created with palloc() and so should not be pfree()'d.  Note also
     * that ExecStoreHeapTuple will increment the refcount of the buffer; the
     * refcount will not be dropped until the tuple table slot is cleared.
     * 保存的元組和緩沖區，這些信息通過調用訪問方法時返回,同時該方法返回slot。
     * 注意:我們傳遞‘false’，因為heap_getnext()返回的元組是指向磁盤頁面的指針，
     * 不是用palloc()創建的，所以不應該使用pfree()函數釋放。
     * 還要注意，ExecStoreHeapTuple將增加緩沖區的refcount;在清除tuple table slot之前不會刪除refcount。
     */
    if (tuple)//獲取了tuple
        ExecStoreBufferHeapTuple(tuple, /* 需要存儲的tuple;tuple to store */
                                 slot,  /* 即將用于存儲tuple的slot;slot to store in */
                                 scandesc->rs_cbuf);    /* 與該tuple相關聯的緩沖區;
                                                           buffer associated
                                                         * with this tuple */
    else
        ExecClearTuple(slot);//tuple為NULL,則釋放slot

    return slot;//返回slot
}

/*
 * SeqRecheck -- access method routine to recheck a tuple in EvalPlanQual
 * 訪問方法在EvalPlanQual中對元組重新檢查
 */
static bool
SeqRecheck(SeqScanState *node, TupleTableSlot *slot)
{
    /*
     * Note that unlike IndexScan, SeqScan never use keys in heap_beginscan
     * (and this is very bad) - so, here we do not check are keys ok or not.
     * 注意，與IndexScan不同，SeqScan從不使用heap_beginscan中的鍵(這很糟糕)——因此，這里我們不檢查鍵是否正確。
     */
    //直接返回T
    return true;
}


/* ----------------
 *      heap_beginscan  - begin relation scan
 *      heap_beginscan - 開始堆表掃描
 *
 * heap_beginscan is the "standard" case.
 * heap_beginscan是標準情況
 *
 * heap_beginscan_catalog differs in setting up its own temporary snapshot.
 * heap_beginscan_catalog與heap_beginscan不同的是,該方法配置自己的臨時快照
 *
 * heap_beginscan_strat offers an extended API that lets the caller control
 * whether a nondefault buffer access strategy can be used, and whether
 * syncscan can be chosen (possibly resulting in the scan not starting from
 * block zero).  Both of these default to true with plain heap_beginscan.
 * heap_beginscan_strat提供了一個擴展API，可以讓調用者控制是否可以使用非默認的緩沖區訪問策略，
 * 以及是否可以選擇syncscan(可能導致掃描從非0塊開始)。
 * 對于普通的heap_beginscan，這兩個默認值都為T。
 *
 * heap_beginscan_bm is an alternative entry point for setting up a
 * HeapScanDesc for a bitmap heap scan.  Although that scan technology is
 * really quite unlike a standard seqscan, there is just enough commonality
 * to make it worth using the same data structure.
 * heap_beginscan_bm是為位圖堆掃描設置HeapScanDesc的備選入口點。
 * 盡管這種掃描技術與標準的seqscan非常不同，但它有足夠的共性，因此值得使用相同的數據結構。
 * 
 * heap_beginscan_sampling is an alternative entry point for setting up a
 * HeapScanDesc for a TABLESAMPLE scan.  As with bitmap scans, it's worth
 * using the same data structure although the behavior is rather different.
 * In addition to the options offered by heap_beginscan_strat, this call
 * also allows control of whether page-mode visibility checking is used.
 * heap_beginscan_sampling是為TABLESAMPLE掃描設置HeapScanDesc的備選入口點。
 * 與位圖掃描一樣，使用相同的數據結構是值得的，盡管其行為相當不同。
 * 除了heap_beginscan_strat提供的選項之外，這個調用還允許控制是否使用頁面模式可見性檢查。
 * ----------------
 */
HeapScanDesc
heap_beginscan(Relation relation, Snapshot snapshot,
               int nkeys, ScanKey key)
{
    return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
                                   true, true, true, false, false, false);//標準情況,調用heap_beginscan_internal
}


static HeapScanDesc
heap_beginscan_internal(Relation relation, Snapshot snapshot,//Relation & snapshot
                        int nkeys, ScanKey key,//鍵個數&掃描鍵
                        ParallelHeapScanDesc parallel_scan,//并行掃描描述符
                        bool allow_strat,//允許開始?
                        bool allow_sync,//允許sync掃描?
                        bool allow_pagemode,//允許頁模式?
                        bool is_bitmapscan,//是否位圖掃描
                        bool is_samplescan,//是否采樣掃描
                        bool temp_snap)//是否使用臨時快照
{
    HeapScanDesc scan;//堆表掃描描述符

    /*
     * increment relation ref count while scanning relation
     * 在掃描時增加relation依賴計數
     *
     * This is just to make really sure the relcache entry won't go away while
     * the scan has a pointer to it.  Caller should be holding the rel open
     * anyway, so this is redundant in all normal scenarios...
     * 這只是為了確保relcache條目不會在掃描存在指向它的指針時消失。
     * 無論如何，調用者都應該保持rel是打開的，所以這在所有正常情況下都是多余的……
     */
    RelationIncrementReferenceCount(relation);

    /*
     * allocate and initialize scan descriptor
     * 分配并初始化掃描描述符
     */
    scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));

    scan->rs_rd = relation;
    scan->rs_snapshot = snapshot;
    scan->rs_nkeys = nkeys;
    scan->rs_bitmapscan = is_bitmapscan;
    scan->rs_samplescan = is_samplescan;
    scan->rs_strategy = NULL;   /* set in initscan */
    scan->rs_allow_strat = allow_strat;
    scan->rs_allow_sync = allow_sync;
    scan->rs_temp_snap = temp_snap;
    scan->rs_parallel = parallel_scan;

    /*
     * we can use page-at-a-time mode if it's an MVCC-safe snapshot
     * 如果快照是MVCC-safte,那么要使用page-at-a-time模式
     */
    scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);

    /*
     * For a seqscan in a serializable transaction, acquire a predicate lock
     * on the entire relation. This is required not only to lock all the
     * matching tuples, but also to conflict with new insertions into the
     * table. In an indexscan, we take page locks on the index pages covering
     * the range specified in the scan qual, but in a heap scan there is
     * nothing more fine-grained to lock. A bitmap scan is a different story,
     * there we have already scanned the index and locked the index pages
     * covering the predicate. But in that case we still have to lock any
     * matching heap tuples.
     * 對于serializable事務中的seqscan，獲取整個關系上的謂詞鎖。
     * 這不僅需要鎖定所有匹配的元組，還需要與表中發生的新插入存在沖突。
     * 在indexscan中，在覆蓋了scan qual中指定的范圍的索引頁上獲取分頁鎖，但是在堆掃描中沒有更細粒度的鎖。
     * 位圖掃描則不同，已經掃描了索引并鎖定了覆蓋謂詞的索引頁。但在這種情況下，仍然需要鎖定所有匹配的堆元組。
     */
    if (!is_bitmapscan)
        PredicateLockRelation(relation, snapshot);

    /* we only need to set this up once */
    //設置relid
    scan->rs_ctup.t_tableOid = RelationGetRelid(relation);

    /*
     * we do this here instead of in initscan() because heap_rescan also calls
     * initscan() and we don't want to allocate memory again
     * 在這里完成而不是在initscan()中處理是因為heap_rescan也調用initscan()，因此不希望再分配內存
     */
    if (nkeys > 0)
        scan->rs_key = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
    else
        scan->rs_key = NULL;
    //初始化scan
    initscan(scan, key, false);

    return scan;
}

/* Get the LockTupleMode for a given MultiXactStatus */
#define TUPLOCK_from_mxstatus(status) \
            (MultiXactStatusLock[(status)])

/* ----------------------------------------------------------------
 *                       heap support routines
 * ----------------------------------------------------------------
 */

/* ----------------
 *      initscan - scan code common to heap_beginscan and heap_rescan
 *      initscan - heap_beginscan & heap_rescan的掃描代碼
 * ----------------
 */
static void
initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
{
    bool        allow_strat;
    bool        allow_sync;

    /*
     * Determine the number of blocks we have to scan.
     * 確定必須掃描的block數
     *
     * It is sufficient to do this once at scan start, since any tuples added
     * while the scan is in progress will be invisible to my snapshot anyway.
     * (That is not true when using a non-MVCC snapshot.  However, we couldn't
     * guarantee to return tuples added after scan start anyway, since they
     * might go into pages we already scanned.  To guarantee consistent
     * results for a non-MVCC snapshot, the caller must hold some higher-level
     * lock that ensures the interesting tuple(s) won't change.)
     * 只要在掃描開始時做一次就足夠了，因為在掃描進行過程中添加的任何元組對快照都是不可見的。
     * (在使用非MVCC快照時不是這樣,不能保證返回掃描開始后添加的元組，因為它們可能會存儲在已掃描的頁面。
     *  為了保證非MVCC快照的一致結果，調用者必須持有一些高級鎖，以確保有受影響的元組不會改變。)
     */
    if (scan->rs_parallel != NULL)
        scan->rs_nblocks = scan->rs_parallel->phs_nblocks;
    else
        scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);

    /*
     * If the table is large relative to NBuffers, use a bulk-read access
     * strategy and enable synchronized scanning (see syncscan.c).  Although
     * the thresholds for these features could be different, we make them the
     * same so that there are only two behaviors to tune rather than four.
     * (However, some callers need to be able to disable one or both of these
     * behaviors, independently of the size of the table; also there is a GUC
     * variable that can disable synchronized scanning.)
     * 如果表相對于nbuffer較大，則使用批量讀取訪問策略并啟用同步掃描(參見syncscan.c)。
     * 盡管這些特性的閾值可能不同，但我們使它們相同，以便只有兩種行為可以進行調優，而不是四種。
     * (然而，一些調用者需要能夠禁用其中一種或兩種行為，這與表的大小無關;還有一個GUC變量可以禁用同步掃描。)
     *
     * Note that heap_parallelscan_initialize has a very similar test; if you
     * change this, consider changing that one, too.
     * 注意，heap_parallelscan_initialize中有一個非常類似的測試;
     * 如果你改變了這個，也應該考慮改變那個。
     */
    if (!RelationUsesLocalBuffers(scan->rs_rd) &&
        scan->rs_nblocks > NBuffers / 4)
    {
        allow_strat = scan->rs_allow_strat;
        allow_sync = scan->rs_allow_sync;
    }
    else
        allow_strat = allow_sync = false;//設置為F

    if (allow_strat)//允許使用訪問策略
    {
        /* During a rescan, keep the previous strategy object. */
        //在重新掃描期間,存儲先前的策略(strategy)對象
        if (scan->rs_strategy == NULL)
            scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
    }
    else
    {
        if (scan->rs_strategy != NULL)
            FreeAccessStrategy(scan->rs_strategy);
        scan->rs_strategy = NULL;//不允許,則設置為NULL
    }

    if (scan->rs_parallel != NULL)//使用并行
    {
        /* For parallel scan, believe whatever ParallelHeapScanDesc says. */
        //對于并行掃描,使用ParallelHeapScanDesc中的變量
        scan->rs_syncscan = scan->rs_parallel->phs_syncscan;
    }
    else if (keep_startblock)
    {
        /*
         * When rescanning, we want to keep the previous startblock setting,
         * so that rewinding a cursor doesn't generate surprising results.
         * Reset the active syncscan setting, though.
         * 當重新掃描時，希望保持先前的startblock設置，以便重新回退游標,這樣不會產生令人驚訝的結果。
         * 不過，注意重置活動syncscan的設置。
         */
        scan->rs_syncscan = (allow_sync && synchronize_seqscans);
    }
    else if (allow_sync && synchronize_seqscans)
    {
        scan->rs_syncscan = true;
        scan->rs_startblock = ss_get_location(scan->rs_rd, scan->rs_nblocks);
    }
    else
    {
        scan->rs_syncscan = false;
        scan->rs_startblock = 0;
    }

    scan->rs_numblocks = InvalidBlockNumber;
    scan->rs_inited = false;
    scan->rs_ctup.t_data = NULL;
    ItemPointerSetInvalid(&scan->rs_ctup.t_self);
    scan->rs_cbuf = InvalidBuffer;
    scan->rs_cblock = InvalidBlockNumber;

    /* page-at-a-time fields are always invalid when not rs_inited */
    //page-at-a-time相關的域通常設置為無效值

    /*
     * copy the scan key, if appropriate
     * 如需要,拷貝掃描鍵
     */
    if (key != NULL)
        memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));

    /*
     * Currently, we don't have a stats counter for bitmap heap scans (but the
     * underlying bitmap index scans will be counted) or sample scans (we only
     * update stats for tuple fetches there)
     * 目前，沒有一個用于位圖堆掃描的統計計數器(但是將計算底層的位圖索引掃描)
     * 或樣本掃描(只對那里的元組讀取更新統計數據)
     */
    if (!scan->rs_bitmapscan && !scan->rs_samplescan)
        pgstat_count_heap_scan(scan->rs_rd);
}

三、跟蹤分析

測試腳本如下

testdb=# explain select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je 
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je 
testdb(#                         from t_grxx gr inner join t_jfxx jf 
testdb(#                                        on gr.dwbh = dw.dwbh 
testdb(#                                           and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
                                        QUERY PLAN                                        
------------------------------------------------------------------------------------------
 Sort  (cost=20070.93..20320.93 rows=100000 width=47)
   Sort Key: dw.dwbh
   ->  Hash Join  (cost=3754.00..8689.61 rows=100000 width=47)
         Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
         ->  Hash Join  (cost=3465.00..8138.00 rows=100000 width=31)
               Hash Cond: ((jf.grbh)::text = (gr.grbh)::text)
               ->  Seq Scan on t_jfxx jf  (cost=0.00..1637.00 rows=100000 width=20)
               ->  Hash  (cost=1726.00..1726.00 rows=100000 width=16)
                     ->  Seq Scan on t_grxx gr  (cost=0.00..1726.00 rows=100000 width=16)
         ->  Hash  (cost=164.00..164.00 rows=10000 width=20)
               ->  Seq Scan on t_dwxx dw  (cost=0.00..164.00 rows=10000 width=20)
(11 rows)

啟動gdb,設置斷點,進入SeqNext

(gdb) b SeqNext
Breakpoint 1 at 0x7156b2: file nodeSeqscan.c, line 60.
(gdb) c
Continuing.

Breakpoint 1, SeqNext (node=0x2ed1588) at nodeSeqscan.c:60
60      scandesc = node->ss.ss_currentScanDesc;

變量賦值

60      scandesc = node->ss.ss_currentScanDesc;
(gdb) n
61      estate = node->ss.ps.state;
(gdb) 
62      direction = estate->es_direction;
(gdb) 
63      slot = node->ss.ss_ScanTupleSlot;
(gdb) 
65      if (scandesc == NULL)

scandesc為NULL,進入初始化,調用heap_beginscan

(gdb) p scandesc
$1 = (HeapScanDesc) 0x0

進入heap_beginscan/heap_beginscan_internal函數

(gdb) n
71          scandesc = heap_beginscan(node->ss.ss_currentRelation,
(gdb) step
heap_beginscan (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0) at heapam.c:1407
1407        return heap_beginscan_internal(relation, snapshot, nkeys, key, NULL,
(gdb) step
heap_beginscan_internal (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0, parallel_scan=0x0, 
    allow_strat=true, allow_sync=true, allow_pagemode=true, is_bitmapscan=false, is_samplescan=false, temp_snap=false)
    at heapam.c:1469
1469        RelationIncrementReferenceCount(relation);

heap_beginscan_internal->增加relation參考計數

1469        RelationIncrementReferenceCount(relation);
(gdb) n

heap_beginscan_internal->初始化HeapScanDesc結構體

1474        scan = (HeapScanDesc) palloc(sizeof(HeapScanDescData));
(gdb) 
1476        scan->rs_rd = relation;
(gdb) 
1477        scan->rs_snapshot = snapshot;
(gdb) 
1478        scan->rs_nkeys = nkeys;
(gdb) 
1479        scan->rs_bitmapscan = is_bitmapscan;
(gdb) 
1480        scan->rs_samplescan = is_samplescan;
(gdb) 
1481        scan->rs_strategy = NULL;   /* set in initscan */
(gdb) 
1482        scan->rs_allow_strat = allow_strat;
(gdb) 
1483        scan->rs_allow_sync = allow_sync;
(gdb) 
1484        scan->rs_temp_snap = temp_snap;
(gdb) 
1485        scan->rs_parallel = parallel_scan;
(gdb) 
1490        scan->rs_pageatatime = allow_pagemode && IsMVCCSnapshot(snapshot);
(gdb) 
1503        if (!is_bitmapscan)

heap_beginscan_internal->非位圖掃描,謂詞鎖定

1503        if (!is_bitmapscan)
(gdb) p is_bitmapscan
$2 = false
(gdb) n
1504            PredicateLockRelation(relation, snapshot);
(gdb) 
1507        scan->rs_ctup.t_tableOid = RelationGetRelid(relation);

heap_beginscan_internal->進入initscan函數

(gdb) n
1513        if (nkeys > 0)
(gdb) 
1516            scan->rs_key = NULL;
(gdb) 
1518        initscan(scan, key, false);
(gdb) step
initscan (scan=0x2ee4568, key=0x0, keep_startblock=false) at heapam.c:236
236     if (scan->rs_parallel != NULL)

heap_beginscan_internal->relation的大小相對于buffer并不大(<25%),不使用訪問策略(批量讀取)&同步掃描

(gdb) n
239         scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
(gdb) 
253     if (!RelationUsesLocalBuffers(scan->rs_rd) &&
(gdb) 
254         scan->rs_nblocks > NBuffers / 4)
(gdb) 
253     if (!RelationUsesLocalBuffers(scan->rs_rd) &&
(gdb) 
260         allow_strat = allow_sync = false;

heap_beginscan_internal->設置其他變量

312     if (key != NULL)
(gdb) 
320     if (!scan->rs_bitmapscan && !scan->rs_samplescan)
(gdb) 
321         pgstat_count_heap_scan(scan->rs_rd);
(gdb) 
322 }
(gdb)

heap_beginscan_internal->回到heap_beginscan_internal,完成初始化

(gdb) n
heap_beginscan_internal (relation=0x7fb27c488a90, snapshot=0x2e0b8f0, nkeys=0, key=0x0, parallel_scan=0x0, 
    allow_strat=true, allow_sync=true, allow_pagemode=true, is_bitmapscan=false, is_samplescan=false, temp_snap=false)
    at heapam.c:1520
1520        return scan;
(gdb) p *scan
$4 = {rs_rd = 0x7fb27c488a90, rs_snapshot = 0x2e0b8f0, rs_nkeys = 0, rs_key = 0x0, rs_bitmapscan = false, 
  rs_samplescan = false, rs_pageatatime = true, rs_allow_strat = true, rs_allow_sync = true, rs_temp_snap = false, 
  rs_nblocks = 726, rs_startblock = 0, rs_numblocks = 4294967295, rs_strategy = 0x0, rs_syncscan = false, 
  rs_inited = false, rs_ctup = {t_len = 2139062143, t_self = {ip_blkid = {bi_hi = 65535, bi_lo = 65535}, ip_posid = 0}, 
    t_tableOid = 16742, t_data = 0x0}, rs_cblock = 4294967295, rs_cbuf = 0, rs_parallel = 0x0, rs_cindex = 2139062143, 
  rs_ntuples = 2139062143, rs_vistuples = {32639 <repeats 291 times>}}
(gdb)

DONE!

四、參考資料

PG Document:Query Planning

向AI問一下細節

91超碰碰碰碰久久久久久综合_超碰av人澡人澡人澡人澡人掠_国产黄大片在线观看画质优化_txt小说免费全本

PostgreSQL 源碼解讀（88）- 查詢語句#73（SeqNext函數#1）

一、數據結構

二、源碼解讀

三、跟蹤分析

四、參考資料

猜你喜歡

91超碰碰碰碰久久久久久综合_超碰av人澡人澡人澡人澡人掠_国产黄大片在线观看画质优化_txt小说免费全本

PostgreSQL 源碼解讀（88）- 查詢語句#73（SeqNext函數#1）

一、數據結構

二、源碼解讀

三、跟蹤分析

四、參考資料

猜你喜歡

最新資訊

相關推薦

相關標簽