diff --git a/src/backend/Makefile b/src/backend/Makefile index 7344c8c7f5c..685d7a0a77e 100644 --- a/src/backend/Makefile +++ b/src/backend/Makefile @@ -114,7 +114,7 @@ parser/gram.h: parser/gram.y storage/lmgr/lwlocknames.h: storage/lmgr/generate-lwlocknames.pl ../include/storage/lwlocklist.h utils/activity/wait_event_names.txt $(MAKE) -C storage/lmgr lwlocknames.h -utils/activity/wait_event_types.h: utils/activity/generate-wait_event_types.pl utils/activity/wait_event_names.txt +utils/activity/wait_event_types.h: utils/activity/generate-wait_event_types.pl utils/activity/wait_event_names.txt ../include/storage/lwlocklist.h ../include/utils/wait_classes.h $(MAKE) -C utils/activity wait_event_types.h pgstat_wait_event.c wait_event_funcs_data.c # run this unconditionally to avoid needing to know its dependencies here: diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index 9c2443e1ecd..e7fc1354c1f 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -31,6 +31,7 @@ OBJS = \ pgstat_shmem.o \ pgstat_slru.o \ pgstat_subscription.o \ + pgstat_waitevent.o \ pgstat_wal.o \ pgstat_xact.o \ wait_event.o \ @@ -45,8 +46,8 @@ wait_event.o: pgstat_wait_event.c pgstat_wait_event.c: wait_event_types.h touch $@ -wait_event_types.h: $(top_srcdir)/src/backend/utils/activity/wait_event_names.txt generate-wait_event_types.pl - $(PERL) $(srcdir)/generate-wait_event_types.pl --code $< +wait_event_types.h: $(top_srcdir)/src/backend/utils/activity/wait_event_names.txt $(top_srcdir)/src/include/storage/lwlocklist.h $(top_srcdir)/src/include/utils/wait_classes.h generate-wait_event_types.pl + $(PERL) $(srcdir)/generate-wait_event_types.pl --code $(wordlist 1,3,$^) clean: rm -f wait_event_types.h pgstat_wait_event.c wait_event_funcs_data.c diff --git a/src/backend/utils/activity/generate-wait_event_types.pl b/src/backend/utils/activity/generate-wait_event_types.pl index 424ad9f115d..623a7aa5e85 100644 --- a/src/backend/utils/activity/generate-wait_event_types.pl +++ b/src/backend/utils/activity/generate-wait_event_types.pl @@ -21,6 +21,12 @@ my $output_path = '.'; my $gen_docs = 0; my $gen_code = 0; +my $nb_waitclass_table_entries = 0; +my $nb_wait_events_with_null = 0; +my $nb_wait_events_per_class = 0; +my %waitclass_values; +my $wait_event_class_mask = 0xFF000000; +my $wait_event_id_mask = 0x0000FFFF; my $continue = "\n"; my %hashwe; @@ -38,11 +44,50 @@ open my $wait_event_names, '<', $ARGV[0] or die; +# When generating code, we need lwlocklist.h as the second argument +my $lwlocklist_file = $ARGV[1] if $gen_code; + +# When generating code, we need wait_classes.h as the third argument +my $wait_classes_file = $ARGV[2] if $gen_code; + my @abi_compatibility_lines; my @lines; my $abi_compatibility = 0; my $section_name; +# Function to parse wait_classes.h and extract wait class definitions +sub parse_wait_classes_header +{ + + open my $wait_classes_header, '<', $wait_classes_file + or die "Could not open $wait_classes_file: $!"; + + while (<$wait_classes_header>) + { + chomp; + if (/^\s*#define\s+(PG_WAIT_\w+)\s+(0x[0-9A-Fa-f]+)U?\s*$/) + { + my ($macro_name, $value) = ($1, $2); + + $waitclass_values{$macro_name} = $value; + } + } + + close $wait_classes_header; +} + +# Function to get the macro from the wait class name +sub waitclass_to_macro +{ + + my $waitclass = shift; + my $last = $waitclass; + $last =~ s/^WaitEvent//; + my $lastuc = uc $last; + + return "PG_WAIT_" . $lastuc; +} + # Remove comments and empty lines and add waitclassname based on the section while (<$wait_event_names>) { @@ -84,8 +129,39 @@ # Sort the lines based on the second column. # uc() is being used to force the comparison to be case-insensitive. -my @lines_sorted = - sort { uc((split(/\t/, $a))[1]) cmp uc((split(/\t/, $b))[1]) } @lines; + +my @lines_sorted; +if ($gen_code) +{ + my @lwlock_lines; + + # Separate LWLock lines from others + foreach my $line (@lines) + { + if ($line =~ /^WaitEventLWLock\t/) + { + push(@lwlock_lines, $line); + } + else + { + push(@lines_sorted, $line); + } + } + + # Sort only non-LWLock lines + @lines_sorted = + sort { uc((split(/\t/, $a))[1]) cmp uc((split(/\t/, $b))[1]) } + @lines_sorted; + + # Add LWLock lines back in their original order + push(@lines_sorted, @lwlock_lines); +} +else +{ + # For docs, use original alphabetical sorting for all + @lines_sorted = + sort { uc((split(/\t/, $a))[1]) cmp uc((split(/\t/, $b))[1]) } @lines; +} # If we are generating code, concat @lines_sorted and then # @abi_compatibility_lines. @@ -165,9 +241,22 @@ '; + my $wait_event_class_shift = 0; + my $temp_mask = $wait_event_class_mask; + while (($temp_mask & 1) == 0 && $temp_mask != 0) + { + $wait_event_class_shift++; + $temp_mask >>= 1; + } + printf $h $header_comment, 'wait_event_types.h'; printf $h "#ifndef WAIT_EVENT_TYPES_H\n"; printf $h "#define WAIT_EVENT_TYPES_H\n\n"; + printf $h "#define WAIT_EVENT_CLASS_MASK 0x%08X\n", + $wait_event_class_mask; + printf $h "#define WAIT_EVENT_ID_MASK 0x%08X\n", $wait_event_id_mask; + printf $h "#define WAIT_EVENT_CLASS_SHIFT %d\n\n", + $wait_event_class_shift; printf $h "#include \"utils/wait_classes.h\"\n\n"; printf $c $header_comment, 'pgstat_wait_event.c'; @@ -269,6 +358,170 @@ } } + printf $h " + +/* To represent wait_event_info as integers */ +typedef struct DecodedWaitInfo +{ + int classId; + int eventId; +} DecodedWaitInfo; + +/* To extract classId and eventId as integers from wait_event_info */ +#define WAIT_EVENT_INFO_DECODE(d, i) \\ + d.classId = ((i) & WAIT_EVENT_CLASS_MASK) / (WAIT_EVENT_CLASS_MASK & (-WAIT_EVENT_CLASS_MASK)), \\ + d.eventId = (i) & WAIT_EVENT_ID_MASK + +/* To encode wait_event_info from classId and eventId as integers */ +#define ENCODE_WAIT_EVENT_INFO(classId, eventId) \\ + (((classId) << WAIT_EVENT_CLASS_SHIFT) | ((eventId) & WAIT_EVENT_ID_MASK)) + +/* To map wait event classes into the WaitClassTable */ +typedef struct +{ + const int classId; + const int numberOfEvents; + const int offSet; + const char *className; + const char *const *eventNames; +} WaitClassTableEntry; + +extern WaitClassTableEntry WaitClassTable[];\n\n"; + + printf $c " +/* + * Lookup table that is used by the wait events statistics. + * Indexed by classId (derived from the PG_WAIT_* constants), handle gaps + * in the class ID numbering and provide metadata for wait events. + */ +WaitClassTableEntry WaitClassTable[] = {\n"; + + parse_wait_classes_header(); + my $next_index = 0; + my $class_divisor = $wait_event_class_mask & (-$wait_event_class_mask); + + foreach my $waitclass ( + sort { + my $macro_a = waitclass_to_macro($a); + my $macro_b = waitclass_to_macro($b); + hex($waitclass_values{$macro_a}) <=> + hex($waitclass_values{$macro_b}) + } keys %hashwe) + { + my $event_names_array; + my $array_size; + my $last = $waitclass; + $last =~ s/^WaitEvent//; + + $nb_waitclass_table_entries++; + + # The LWLocks need to be handled differently than the other classes when + # building the WaitClassTable. We need to take care of the prefedined + # LWLocks as well as the additional ones. + if ($waitclass eq 'WaitEventLWLock') + { + # Parse lwlocklist.h to get LWLock definitions + open my $lwlocklist, '<', $lwlocklist_file + or die "Could not open $lwlocklist_file: $!"; + + my %predefined_lwlock_indices; + my $max_lwlock_index = -1; + + while (<$lwlocklist>) + { + if (/^PG_LWLOCK\((\d+),\s+(\w+)\)$/) + { + my ($lockidx, $lockname) = ($1, $2); + $predefined_lwlock_indices{$lockname} = $lockidx; + $max_lwlock_index = $lockidx + if $lockidx > $max_lwlock_index; + } + } + + close $lwlocklist; + + # Iterates through wait_event_names.txt order + my @event_names_sparse; + my $next_additional_index = $max_lwlock_index + 1; + + foreach my $wev (@{ $hashwe{$waitclass} }) + { + my $lockname = $wev->[1]; + + if (exists $predefined_lwlock_indices{$lockname}) + { + # This is a predefined one, place it at its specific index + my $index = $predefined_lwlock_indices{$lockname}; + $event_names_sparse[$index] = "\"$lockname\""; + } + else + { + # This is an additional one, append it after predefined ones + $event_names_sparse[$next_additional_index] = + "\"$lockname\""; + $next_additional_index++; + } + } + + # Fill gaps with NULL for missing predefined locks + for my $i (0 .. $max_lwlock_index) + { + $event_names_sparse[$i] = "NULL" + unless defined $event_names_sparse[$i]; + } + + # Build the array literal + $event_names_array = "(const char *const []){" + . join(", ", @event_names_sparse) . "}"; + $array_size = scalar(@event_names_sparse); + } + else + { + # Construct a simple string array literal for this class + $event_names_array = "(const char *const []){"; + + # For each wait event in this class, add its name to the array + foreach my $wev (@{ $hashwe{$waitclass} }) + { + $event_names_array .= "\"$wev->[1]\", "; + } + + $event_names_array .= "}"; + $array_size = scalar(@{ $hashwe{$waitclass} }); + } + + my $lastuc = uc $last; + my $pg_wait_class = "PG_WAIT_" . $lastuc; + + my $index = hex($waitclass_values{$pg_wait_class}) / $class_divisor; + + # Fill any holes with {0, 0, 0, NULL, NULL} + while ($next_index < $index) + { + printf $c "{0, 0, 0, NULL, NULL},\n"; + $next_index++; + $nb_waitclass_table_entries++; + } + + my $offset = $nb_wait_events_with_null; + $nb_wait_events_with_null += $array_size; + + # Generate the entry + printf $c "{$pg_wait_class, $array_size, $offset, \"%s\", %s},\n", + $last, $event_names_array; + + $next_index = $index + 1; + } + + printf $c "};\n\n"; + + printf $h "#define NB_WAITCLASSTABLE_SIZE $nb_wait_events_with_null\n"; + printf $h + "#define NB_WAITCLASSTABLE_ENTRIES $nb_waitclass_table_entries\n\n"; + printf $h + "StaticAssertDecl(NB_WAITCLASSTABLE_SIZE > 0, \"Wait class table must have entries\");\n"; + printf $h + "StaticAssertDecl(NB_WAITCLASSTABLE_ENTRIES > 0, \"Must have at least one wait class\");\n"; printf $h "#endif /* WAIT_EVENT_TYPES_H */\n"; close $h; close $c; diff --git a/src/backend/utils/activity/meson.build b/src/backend/utils/activity/meson.build index d8e56b49c24..8b9b4b4bdb2 100644 --- a/src/backend/utils/activity/meson.build +++ b/src/backend/utils/activity/meson.build @@ -16,6 +16,7 @@ backend_sources += files( 'pgstat_shmem.c', 'pgstat_slru.c', 'pgstat_subscription.c', + 'pgstat_waitevent.c', 'pgstat_wal.c', 'pgstat_xact.c', ) diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index c6783d11411..b9788685b23 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -379,6 +379,22 @@ static const PgStat_KindInfo pgstat_kind_builtin_infos[PGSTAT_KIND_BUILTIN_SIZE] .reset_timestamp_cb = pgstat_backend_reset_timestamp_cb, }, + [PGSTAT_KIND_WAIT_EVENT] = { + .name = "wait_event", + + .fixed_amount = false, + .write_to_file = true, + + .accessed_across_databases = true, + + .shared_size = sizeof(PgStatShared_WaitEvent), + .shared_data_off = offsetof(PgStatShared_WaitEvent, stats), + .shared_data_len = sizeof(((PgStatShared_WaitEvent *) 0)->stats), + + .flush_static_cb = pgstat_wait_event_flush_cb, + .reset_timestamp_cb = pgstat_wait_event_reset_timestamp_cb, + }, + /* stats for fixed-numbered (mostly 1) objects */ [PGSTAT_KIND_ARCHIVER] = { diff --git a/src/backend/utils/activity/pgstat_waitevent.c b/src/backend/utils/activity/pgstat_waitevent.c new file mode 100644 index 00000000000..bc064948077 --- /dev/null +++ b/src/backend/utils/activity/pgstat_waitevent.c @@ -0,0 +1,244 @@ +/* ------------------------------------------------------------------------- + * + * pgstat_waitevent.c + * Implementation of wait event statistics. + * + * This file contains the implementation of wait event statistics. It is kept + * separate from pgstat.c to enforce the line between the statistics access / + * storage implementation and the details about individual types of + * statistics. + * + * Copyright (c) 2001-2025, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/activity/pgstat_waitevent.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "utils/pgstat_internal.h" + +bool have_wait_event_stats = false; +instr_time pgstat_wait_start_time; +bool track_wait_event_timing = true; +int track_wait_event_pid = -1; + +static PgStat_PendingWaitevent PendingWaitEventStats; + +/* + * Support function for the SQL-callable pgstat* functions. Returns + * a pointer to the wait events statistics struct. + */ +PgStat_WaitEvent * +pgstat_fetch_stat_wait_event(uint32 wait_event_info) +{ + PgStat_WaitEvent *wait_event_entry; + + wait_event_entry = (PgStat_WaitEvent *) pgstat_fetch_entry(PGSTAT_KIND_WAIT_EVENT, + InvalidOid, (uint64) wait_event_info); + + return wait_event_entry; +} + +/* + * Returns a pointer to the first counter for a specific class. + */ +static PgStat_Counter * +waitEventGetClassCounters(int64 *waitEventStats, int classId) +{ + int offset = WaitClassTable[classId].offSet; + + return &waitEventStats[offset]; +} + +/* + * Returns a pointer to the counter for a specific wait event. + */ +static PgStat_Counter * +waitEventGetCounter(int64 *waitEventStats, int classId, int eventId) +{ + int64 *classCounters; + + Assert(classId >= 0 && classId < NB_WAITCLASSTABLE_ENTRIES); + Assert(eventId >= 0 && eventId < WaitClassTable[classId].numberOfEvents); + + classCounters = waitEventGetClassCounters(waitEventStats, classId); + + return &classCounters[eventId]; +} + +/* + * Increment a wait event stat counter. + */ +inline void +waitEventIncrementCounter(uint32 wait_event_info, instr_time start_time) +{ + instr_time wait_time; + DecodedWaitInfo waitInfo; + PgStat_Counter *counter; + PgStat_Counter *total_time; + uint32 classId; + uint16 eventId; + + classId = *my_wait_event_info & WAIT_EVENT_CLASS_MASK; + eventId = *my_wait_event_info & WAIT_EVENT_ID_MASK; + + if (classId == 0 && eventId == 0) + return; + + /* Don't take into account user defined LWLock in the stats */ + if (classId == PG_WAIT_LWLOCK && eventId >= LWTRANCHE_FIRST_USER_DEFINED) + return; + + /* Don't take into account custom wait event extension in the stats */ + if (classId == PG_WAIT_EXTENSION && eventId >= WAIT_EVENT_CUSTOM_INITIAL_ID) + return; + + /* Don't take account PG_WAIT_INJECTIONPOINT */ + if (classId == PG_WAIT_INJECTIONPOINT) + return; + + WAIT_EVENT_INFO_DECODE(waitInfo, wait_event_info); + + counter = waitEventGetCounter(PendingWaitEventStats.counts, waitInfo.classId, + waitInfo.eventId); + + (*counter)++; + + + if (unlikely(track_wait_event_timing)) + { + total_time = waitEventGetCounter(PendingWaitEventStats.total_time, waitInfo.classId, + waitInfo.eventId); + INSTR_TIME_SET_CURRENT(wait_time); + INSTR_TIME_SUBTRACT(wait_time, start_time); + (*total_time) += INSTR_TIME_GET_MICROSEC(wait_time); + } + + have_wait_event_stats = true; +} + +const char * +get_wait_event_name_from_index(int index) +{ + /* Iterate through the WaitClassTable */ + for (int classIdx = 0; classIdx < NB_WAITCLASSTABLE_ENTRIES; classIdx++) + { + int classOffset = WaitClassTable[classIdx].offSet; + int classSize = WaitClassTable[classIdx].numberOfEvents; + + /* Skip empty entries */ + if (WaitClassTable[classIdx].numberOfEvents == 0) + continue; + + /* Check if the index falls within this class section */ + if (index >= classOffset && index < classOffset + classSize) + { + /* Calculate the event ID within this class */ + int eventId = index - classOffset; + + return WaitClassTable[classIdx].eventNames[eventId]; + } + } + + Assert(false); + return "unknown"; +} + +/* + * Flush out locally pending wait event statistics + * + * Returns true if some statistics could not be flushed due to lock contention. + */ + +bool +pgstat_wait_event_flush_cb(bool nowait) +{ + PgStat_EntryRef *entry_ref; + bool could_not_be_flushed = false; + + if (!have_wait_event_stats) + return false; + + for (int classIdx = 0; classIdx < NB_WAITCLASSTABLE_ENTRIES; classIdx++) + { + WaitClassTableEntry *class; + int classOffset; + int classSize; + + /* Skip empty entries */ + if (WaitClassTable[classIdx].numberOfEvents == 0) + continue; + + class = &WaitClassTable[classIdx]; + + classOffset = class->offSet; + classSize = class->numberOfEvents; + + for (int eventId = 0; eventId < classSize; eventId++) + { + const char *name; + PgStatShared_WaitEvent *shwaiteventent; + PgStat_Counter *shstat; + PgStat_Counter pending_counter; + + PgStat_Counter *shstat_time; + PgStat_Counter pending_time; + uint32 wait_event_info; + + name = get_wait_event_name_from_index(classOffset + eventId); + + if (!name) + continue; + + /* Build the wait_event_info */ + wait_event_info = ENCODE_WAIT_EVENT_INFO(classIdx, eventId); + + entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_WAIT_EVENT, + InvalidOid, (uint64) wait_event_info, nowait); + + if (!entry_ref) + { + could_not_be_flushed = true; + continue; + } + + shwaiteventent = (PgStatShared_WaitEvent *) entry_ref->shared_stats; + shstat = &shwaiteventent->stats.counts; + pending_counter = PendingWaitEventStats.counts[classOffset + eventId]; + + *shstat += pending_counter; + + shstat_time = &shwaiteventent->stats.total_time; + pending_time = PendingWaitEventStats.total_time[classOffset + eventId]; + *shstat_time += pending_time; + + pgstat_unlock_entry(entry_ref); + } + } + + /* done, clear the pending entry */ + MemSet(PendingWaitEventStats.counts, 0, sizeof(PendingWaitEventStats.counts)); + MemSet(PendingWaitEventStats.total_time, 0, sizeof(PendingWaitEventStats.total_time)); + + if (!could_not_be_flushed) + have_wait_event_stats = false; + + return could_not_be_flushed; +} + +void +pgstat_wait_event_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts) +{ + ((PgStatShared_WaitEvent *) header)->stats.stat_reset_timestamp = ts; +} + +/* + * Check if there any wait event stats waiting for flush. + */ +bool +pgstat_wait_event_have_pending_cb(void) +{ + return have_wait_event_stats; +} diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c index d9b8f34a355..613935f22a2 100644 --- a/src/backend/utils/activity/wait_event.c +++ b/src/backend/utils/activity/wait_event.c @@ -39,9 +39,6 @@ static const char *pgstat_get_wait_io(WaitEventIO w); static uint32 local_my_wait_event_info; uint32 *my_wait_event_info = &local_my_wait_event_info; -#define WAIT_EVENT_CLASS_MASK 0xFF000000 -#define WAIT_EVENT_ID_MASK 0x0000FFFF - /* * Hash tables for storing custom wait event ids and their names in * shared memory. @@ -90,9 +87,6 @@ typedef struct WaitEventCustomCounterData /* pointer to the shared memory */ static WaitEventCustomCounterData *WaitEventCustomCounter; -/* first event ID of custom wait events */ -#define WAIT_EVENT_CUSTOM_INITIAL_ID 1 - static uint32 WaitEventCustomNew(uint32 classId, const char *wait_event_name); static const char *GetWaitEventCustomIdentifier(uint32 wait_event_info); diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt index 017deee70d4..e8e52f50cfe 100644 --- a/src/backend/utils/activity/wait_event_names.txt +++ b/src/backend/utils/activity/wait_event_names.txt @@ -430,3 +430,24 @@ advisory "Waiting to acquire an advisory user lock." applytransaction "Waiting to acquire a lock on a remote transaction being applied by a logical replication subscriber." # No "ABI_compatibility" region here as WaitEventLock has its own C code. + +# +# Wait Events - Neon +# +# Use this category when a process is waiting on neon extension code. +# + +Section: ClassName - WaitEventNeon + +NEON_LFC_MAINTENANCE "LFC maintainance." +NEON_LFC_READ "LFC read." +NEON_LFC_TRUNCATE "LFC truncate." +NEON_LFC_WRITE "LFC write." +NEON_LFC_CV_WAIT "LFC CV wait." +NEON_PS_STARTING "PS starting." +NEON_PS_CONFIGURING "PS configuring." +NEON_PS_SEND "PS SEND." +NEON_PS_READ "PS READ." +NEON_WAL_DL "WAL DL." + +ABI_compatibility: diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 1c12ddbae49..e7670f5424d 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -1882,6 +1882,7 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) pgstat_reset_of_kind(PGSTAT_KIND_IO); XLogPrefetchResetStats(); pgstat_reset_of_kind(PGSTAT_KIND_SLRU); + pgstat_reset_of_kind(PGSTAT_KIND_WAIT_EVENT); pgstat_reset_of_kind(PGSTAT_KIND_WAL); PG_RETURN_VOID(); @@ -1901,6 +1902,8 @@ pg_stat_reset_shared(PG_FUNCTION_ARGS) XLogPrefetchResetStats(); else if (strcmp(target, "slru") == 0) pgstat_reset_of_kind(PGSTAT_KIND_SLRU); + else if (strcmp(target, "wait_event") == 0) + pgstat_reset_of_kind(PGSTAT_KIND_WAIT_EVENT); else if (strcmp(target, "wal") == 0) pgstat_reset_of_kind(PGSTAT_KIND_WAL); else diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index dd11cd1c223..3071ba03168 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -1538,7 +1538,15 @@ struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, - + { + {"track_wait_event_timing", PGC_SUSET, STATS_CUMULATIVE, + gettext_noop("Collects timing statistics for wait events."), + NULL + }, + &track_wait_event_timing, + true, + NULL, NULL, NULL + }, { {"update_process_title", PGC_SUSET, PROCESS_TITLE, gettext_noop("Updates the process title to show the active SQL command."), @@ -2357,6 +2365,15 @@ struct config_int ConfigureNamesInt[] = 100, 1, MAX_BACKENDS, NULL, NULL, NULL }, + { + {"track_wait_event_pid", PGC_SIGHUP, STATS_CUMULATIVE, + gettext_noop("Collects timing statistics for wait events for the specified process only."), + NULL + }, + &track_wait_event_pid, + -1, -1, INT_MAX, + NULL, NULL, NULL + }, { /* see max_connections */ diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 0a62fb39fe7..89db289b6b1 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -504,6 +504,27 @@ typedef struct PgStat_BackendPending PgStat_PendingIO pending_io; } PgStat_BackendPending; +/* ------- + * PgStat_WaitEvent Wait events statistics + * ------- + */ +typedef struct PgStat_WaitEvent +{ + TimestampTz stat_reset_timestamp; + PgStat_Counter counts; + PgStat_Counter total_time; +} PgStat_WaitEvent; + +/* --------- + * PgStat_PendingWaitEvent Non-flushed wait events stats. + * --------- + */ +typedef struct PgStat_PendingWaitEvent +{ + PgStat_Counter counts[NB_WAITCLASSTABLE_SIZE]; + PgStat_Counter total_time[NB_WAITCLASSTABLE_SIZE]; +} PgStat_PendingWaitevent; + /* * Functions in pgstat.c */ @@ -783,6 +804,10 @@ struct xl_xact_stats_item; extern int pgstat_get_transactional_drops(bool isCommit, struct xl_xact_stats_item **items); extern void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items, bool is_redo); +/* + * Functions in pgstat_waitevent.c + */ +extern PgStat_WaitEvent *pgstat_fetch_stat_wait_event(uint32 wait_event_info); /* * Functions in pgstat_wal.c @@ -791,6 +816,11 @@ extern void pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_ extern void pgstat_report_wal(bool force); extern PgStat_WalStats *pgstat_fetch_stat_wal(void); +/* + * Functions in pgstatfuncs.c + */ + +// extern Datum pg_stat_get_wait_event(PG_FUNCTION_ARGS); /* * Variables in pgstat.c diff --git a/src/include/utils/meson.build b/src/include/utils/meson.build index 78c6b9b0a23..ff519242de7 100644 --- a/src/include/utils/meson.build +++ b/src/include/utils/meson.build @@ -2,7 +2,9 @@ wait_event_output = ['wait_event_types.h', 'pgstat_wait_event.c', 'wait_event_funcs_data.c'] wait_event_target = custom_target('wait_event_names', - input: files('../../backend/utils/activity/wait_event_names.txt'), + input: files('../../backend/utils/activity/wait_event_names.txt', + '../../include/storage/lwlocklist.h', + '../../include/utils/wait_classes.h'), output: wait_event_output, command: [ perl, files('../../backend/utils/activity/generate-wait_event_types.pl'), diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 6cf00008f63..8c5ae59ee77 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -456,6 +456,12 @@ typedef struct PgStatShared_Backend PgStat_Backend stats; } PgStatShared_Backend; +typedef struct PgStatShared_WaitEvent +{ + PgStatShared_Common header; + PgStat_WaitEvent stats; +} PgStatShared_WaitEvent; + /* * Central shared memory entry for the cumulative stats system. * @@ -785,6 +791,15 @@ extern PGDLLIMPORT bool pgstat_report_fixed; /* Backend-local stats state */ extern PGDLLIMPORT PgStat_LocalState pgStatLocal; +/* + * Functions in pgstat_waitevent.c + */ + +extern bool pgstat_wait_event_flush_cb(bool nowait); +extern void pgstat_wait_event_reset_timestamp_cb(PgStatShared_Common *header, + TimestampTz ts); +extern bool pgstat_wait_event_have_pending_cb(void); + /* * Implementation of inline functions declared above. */ diff --git a/src/include/utils/pgstat_kind.h b/src/include/utils/pgstat_kind.h index eb5f0b3ae6d..ec8cd756d0c 100644 --- a/src/include/utils/pgstat_kind.h +++ b/src/include/utils/pgstat_kind.h @@ -30,14 +30,15 @@ #define PGSTAT_KIND_REPLSLOT 4 /* per-slot statistics */ #define PGSTAT_KIND_SUBSCRIPTION 5 /* per-subscription statistics */ #define PGSTAT_KIND_BACKEND 6 /* per-backend statistics */ +#define PGSTAT_KIND_WAIT_EVENT 7 /* wait events statistics */ /* stats for fixed-numbered objects */ -#define PGSTAT_KIND_ARCHIVER 7 -#define PGSTAT_KIND_BGWRITER 8 -#define PGSTAT_KIND_CHECKPOINTER 9 -#define PGSTAT_KIND_IO 10 -#define PGSTAT_KIND_SLRU 11 -#define PGSTAT_KIND_WAL 12 +#define PGSTAT_KIND_ARCHIVER 8 +#define PGSTAT_KIND_BGWRITER 9 +#define PGSTAT_KIND_CHECKPOINTER 10 +#define PGSTAT_KIND_IO 11 +#define PGSTAT_KIND_SLRU 12 +#define PGSTAT_KIND_WAL 13 #define PGSTAT_KIND_BUILTIN_MIN PGSTAT_KIND_DATABASE #define PGSTAT_KIND_BUILTIN_MAX PGSTAT_KIND_WAL diff --git a/src/include/utils/wait_classes.h b/src/include/utils/wait_classes.h index 51ee68397d5..c1d2cdda9ad 100644 --- a/src/include/utils/wait_classes.h +++ b/src/include/utils/wait_classes.h @@ -25,5 +25,6 @@ #define PG_WAIT_TIMEOUT 0x09000000U #define PG_WAIT_IO 0x0A000000U #define PG_WAIT_INJECTIONPOINT 0x0B000000U +#define PG_WAIT_NEON 0x0C000000U #endif /* WAIT_CLASSES_H */ diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h index f5815b4994a..8e8371f35d3 100644 --- a/src/include/utils/wait_event.h +++ b/src/include/utils/wait_event.h @@ -10,8 +10,12 @@ #ifndef WAIT_EVENT_H #define WAIT_EVENT_H +#include "storage/lwlock.h" + /* enums for wait events */ #include "utils/wait_event_types.h" +#include "portability/instr_time.h" +#include "miscadmin.h" extern const char *pgstat_get_wait_event(uint32 wait_event_info); extern const char *pgstat_get_wait_event_type(uint32 wait_event_info); @@ -19,9 +23,17 @@ static inline void pgstat_report_wait_start(uint32 wait_event_info); static inline void pgstat_report_wait_end(void); extern void pgstat_set_wait_event_storage(uint32 *wait_event_info); extern void pgstat_reset_wait_event_storage(void); +extern void waitEventIncrementCounter(uint32 wait_event_info, instr_time start_time); +extern const char *get_wait_event_name_from_index(int index); extern PGDLLIMPORT uint32 *my_wait_event_info; +extern PGDLLIMPORT bool have_wait_event_stats; +extern PGDLLIMPORT instr_time pgstat_wait_start_time; +extern PGDLLIMPORT bool track_wait_event_timing; +extern PGDLLIMPORT int track_wait_event_pid; +/* first event ID of custom wait events */ +#define WAIT_EVENT_CUSTOM_INITIAL_ID 1 /* * Wait Events - Extension, InjectionPoint @@ -73,6 +85,14 @@ pgstat_report_wait_start(uint32 wait_event_info) * four-bytes, updates are atomic. */ *(volatile uint32 *) my_wait_event_info = wait_event_info; + + if (unlikely(track_wait_event_timing)) + { + if (MyProcPid == track_wait_event_pid || track_wait_event_pid == -1) + { + INSTR_TIME_SET_CURRENT(pgstat_wait_start_time); + } + } } /* ---------- @@ -84,6 +104,12 @@ pgstat_report_wait_start(uint32 wait_event_info) static inline void pgstat_report_wait_end(void) { + if (MyProcPid == track_wait_event_pid || track_wait_event_pid == -1) + { + /* Increment the wait event counter */ + waitEventIncrementCounter(*(volatile uint32 *) my_wait_event_info, pgstat_wait_start_time); + } + /* see pgstat_report_wait_start() */ *(volatile uint32 *) my_wait_event_info = 0; } diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out index 6cf828ca8d0..3aa93a2fcbb 100644 --- a/src/test/regress/expected/rules.out +++ b/src/test/regress/expected/rules.out @@ -2277,6 +2277,12 @@ pg_stat_user_tables| SELECT relid, total_autoanalyze_time FROM pg_stat_all_tables WHERE ((schemaname <> ALL (ARRAY['pg_catalog'::name, 'information_schema'::name])) AND (schemaname !~ '^pg_toast'::text)); +pg_stat_wait_event| SELECT type, + name, + counts, + total_time, + stats_reset + FROM pg_stat_get_wait_event() s(type, name, counts, total_time, stats_reset); pg_stat_wal| SELECT wal_records, wal_fpi, wal_bytes, diff --git a/src/test/regress/expected/stats.out b/src/test/regress/expected/stats.out index 776f1ad0e53..26d72dda3f1 100644 --- a/src/test/regress/expected/stats.out +++ b/src/test/regress/expected/stats.out @@ -236,6 +236,54 @@ FROM prevstats AS pr; (1 row) COMMIT; +---- +-- Basic tests for wait events statistics +--- +-- ensure there is no wait events missing in pg_stat_wait_event +select count(1) > 0 from pg_stat_wait_event + where name not in (select name from pg_wait_events + where type <> 'InjectionPoint' or type <> 'Extension') + and type <> 'Extension'; + ?column? +---------- + f +(1 row) + +-- Test that reset_shared with wait_event specified as the stats type works +SELECT count(1) AS counts_wait_event FROM pg_stat_wait_event WHERE counts > 0 \gset +SELECT :counts_wait_event > 0; + ?column? +---------- + t +(1 row) + +SELECT pg_stat_reset_shared('wait_event'); + pg_stat_reset_shared +---------------------- + +(1 row) + +SELECT count(1) < :counts_wait_event FROM pg_stat_wait_event WHERE counts > 0; + ?column? +---------- + t +(1 row) + +-- Test wait event counters are incremented +CREATE TABLE wait_event_stats_test(id serial); +INSERT INTO wait_event_stats_test DEFAULT VALUES; +SELECT pg_stat_force_next_flush(); + pg_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT counts > 0 FROM pg_stat_wait_event WHERE name = 'WalWrite' and type = 'IO'; + ?column? +---------- + t +(1 row) + ---- -- Basic tests for track_functions --- diff --git a/src/test/regress/sql/stats.sql b/src/test/regress/sql/stats.sql index 232ab8db8fa..0ea887a45fd 100644 --- a/src/test/regress/sql/stats.sql +++ b/src/test/regress/sql/stats.sql @@ -132,6 +132,28 @@ FROM prevstats AS pr; COMMIT; +---- +-- Basic tests for wait events statistics +--- + +-- ensure there is no wait events missing in pg_stat_wait_event +select count(1) > 0 from pg_stat_wait_event + where name not in (select name from pg_wait_events + where type <> 'InjectionPoint' or type <> 'Extension') + and type <> 'Extension'; + +-- Test that reset_shared with wait_event specified as the stats type works +SELECT count(1) AS counts_wait_event FROM pg_stat_wait_event WHERE counts > 0 \gset +SELECT :counts_wait_event > 0; +SELECT pg_stat_reset_shared('wait_event'); +SELECT count(1) < :counts_wait_event FROM pg_stat_wait_event WHERE counts > 0; + +-- Test wait event counters are incremented +CREATE TABLE wait_event_stats_test(id serial); +INSERT INTO wait_event_stats_test DEFAULT VALUES; +SELECT pg_stat_force_next_flush(); +SELECT counts > 0 FROM pg_stat_wait_event WHERE name = 'WalWrite' and type = 'IO'; + ---- -- Basic tests for track_functions --- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 58606898add..32c78319fe6 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -2216,6 +2216,7 @@ PgStatShared_Relation PgStatShared_ReplSlot PgStatShared_SLRU PgStatShared_Subscription +PgStatShared_WaitEvent PgStatShared_Wal PgStat_ArchiverStats PgStat_Backend @@ -2236,6 +2237,7 @@ PgStat_KindInfo PgStat_LocalState PgStat_PendingDroppedStatsItem PgStat_PendingIO +PgStat_PendingWaitevent PgStat_SLRUStats PgStat_ShmemControl PgStat_Snapshot @@ -2251,6 +2253,7 @@ PgStat_SubXactStatus PgStat_TableCounts PgStat_TableStatus PgStat_TableXactStatus +PgStat_WaitEvent PgStat_WalCounters PgStat_WalStats PgXmlErrorContext