Postgres是基于进程实现的开源数据库,其进程间通信IPC主要通过共享内存和信号量实现,接下来会从源码角度学习postgres数据库中的IPC原理,借鉴其优秀的设计思路。内容很多,会分成多个小节进行讲解,同时若有理解不正确的地方,欢迎指正与探讨。
1 全局变量
/* shared memory global variables */
static PGShmemHeader *ShmemSegHdr; /* shared mem segment header */
static void *ShmemBase; /* start address of shared memory */
static void *ShmemEnd; /* end+1 address of shared memory */
slock_t *ShmemLock; /* spinlock for shared memory and LWLock
* allocation */
static HTAB *ShmemIndex = NULL; /* primary index hashtable for shmem */
2 PGShmemHeader
该结构体为共享内存的标准头信息,如校验魔数,总内存段大小、空闲空间的偏移量、动态共享控制段ID号以及ShememIndex表指针等
typedef struct PGShmemHeader /* standard header for all Postgres shmem */
{
int32 magic; /* magic # to identify Postgres segments */
#define PGShmemMagic 679834894
pid_t creatorPID; /* PID of creating process (set but unread) */
Size totalsize; /* total size of segment */
Size freeoffset; /* offset to first free space */
dsm_handle dsm_control; /* ID of dynamic shared memory control seg */
void *index; /* pointer to ShmemIndex table */
#ifndef WIN32 /* Windows doesn't have useful inode#s */
dev_t device; /* device data directory is on */
ino_t inode; /* inode number of data directory */
#endif
} PGShmemHeader;
3 ShmemIndexEnt
哈希桶,该结构体包含索引建、位置、分配字节数等信息
/* size constants for the shmem index table */
/* max size of data structure string name */
#define SHMEM_INDEX_KEYSIZE (48)
/* estimated size of the shmem index table (not a hard limit) */
#define SHMEM_INDEX_SIZE (64)
// 队列
/* shmqueue.c */
typedef struct SHM_QUEUE
{
struct SHM_QUEUE *prev;
struct SHM_QUEUE *next;
} SHM_QUEUE;
/* this is a hash bucket in the shmem index table */
typedef struct
{
char key[SHMEM_INDEX_KEYSIZE]; /* string name */
void *location; /* location in shared mem */ // 此结构体在内存中的位置
Size size; /* # bytes requested for the structure */
Size allocated_size; /* # bytes actually allocated */ // 分配字节数
} ShmemIndexEnt;
4 HTAB
该结构体为哈希索引表,加速各个模块共享内存位置的查找,包含控制信息、hash函数、内存上下文以及哈希键大小等信息
/*
* Top control structure for a hashtable --- in a shared table, each backend
* has its own copy (OK since no fields change at runtime)
*/
struct HTAB
{
HASHHDR *hctl; /* => shared control information */ // 控制信息
HASHSEGMENT *dir; /* directory of segment starts */
HashValueFunc hash; /* hash function */
HashCompareFunc match; /* key comparison function */
HashCopyFunc keycopy; /* key copying function */
HashAllocFunc alloc; /* memory allocator */
MemoryContext hcxt; /* memory context if default allocator used */
char *tabname; /* table name (for error messages) */
bool isshared; /* true if table is in shared memory */
bool isfixed; /* if true, don't enlarge */
/* freezing a shared table isn't allowed, so we can keep state here */
bool frozen; /* true = no more inserts allowed */
/* We keep local copies of these fixed values to reduce contention */
Size keysize; /* hash key length in bytes */
long ssize; /* segment size --- must be power of 2 */
int sshift; /* segment shift = log2(ssize) */
};
5 不同的操作系统其共享内存的分配方式也有所不同
/* Possible values for huge_pages */
typedef enum
{
HUGE_PAGES_OFF,
HUGE_PAGES_ON,
HUGE_PAGES_TRY
}HugePagesType;
/* Possible values for shared_memory_type */
typedef enum
{
SHMEM_TYPE_WINDOWS,
SHMEM_TYPE_SYSV,
SHMEM_TYPE_MMAP
}PGShmemType;
6 标识共享内存区块的相关字段
typedef key_t IpcMemoryKey; /* shared memory key passed to shmget(2) */
typedef int IpcMemoryId; /* shared memory ID returned by shmget(2) */
/*
* How does a given IpcMemoryId relate to this PostgreSQL process?
*
* One could recycle unattached segments of different data directories if we
* distinguished that case from other SHMSTATE_FOREIGN cases. Doing so would
* cause us to visit less of the key space, making us less likely to detect a
* SHMSTATE_ATTACHED key. It would also complicate the concurrency analysis,
* in that postmasters of different data directories could simultaneously
* attempt to recycle a given key. We'll waste keys longer in some cases, but
* avoiding the problems of the alternative justifies that loss.
*/
typedef enum
{
SHMSTATE_ANALYSIS_FAILURE, /* unexpected failure to analyze the ID */
SHMSTATE_ATTACHED, /* pertinent to DataDir, has attached PIDs */
SHMSTATE_ENOENT, /* no segment of that ID */
SHMSTATE_FOREIGN, /* exists, but not pertinent to DataDir */
SHMSTATE_UNATTACHED /* pertinent to DataDir, no attached PIDs */
} IpcMemoryState;
PostmasterMain
|- - reset_shared 创建共享内存和信号量
|- - CreateSharedMemoryAndSemaphores
|- - PGSharedMemoryCreate
|- - PGReserveSemaphores
|- - SpinlockSemaInit
|- - CreateLWLocks
|- - InitShmemIndex
|- - dsm_shmem_init
|- - XLOGShmemInit
|- - CLOGShmemInit
|- - CommitTsShmemInit
|- - SUBTRANSShmemInit
|- - MultiXactShmemInit
|- - InitBufferPool
|- - InitLocks
|- - InitPredicateLocks
|- - InitProcGlobal
|- - CreateSharedProcArray
|- - CreateSharedBackendStatus
|- - TwoPhaseShmemInit
|- - BackgroundWorkerShmemInit
|- - CreateSharedInvalidationState
|- - PMSignalShmemInit
|- - ProcSignalShmemInit
|- - CheckpointerShmemInit
|- - AutoVacuumShmemInit
|- - ReplicationSlotsShmemInit
|- - ReplicationOriginShmemInit
|- - WalSndShmemInit
|- - WalRcvShmemInit
|- - PgArchShmemInit
|- - ApplyLauncherShmemInit
|- - SnapMgrInit
|- - BTreeShmemInit
|- - SyncScanShmemInit
|- - AsyncShmemInit