Allow changing autovacuum_max_workers without restarting.
authorNathan Bossart <nathan@postgresql.org>
Mon, 6 Jan 2025 21:01:22 +0000 (15:01 -0600)
committerNathan Bossart <nathan@postgresql.org>
Mon, 6 Jan 2025 21:01:22 +0000 (15:01 -0600)
This commit introduces a new parameter named
autovacuum_worker_slots that controls how many autovacuum worker
slots to reserve during server startup.  Modifying this new
parameter's value does require a server restart, but it should
typically be set to the upper bound of what you might realistically
need to set autovacuum_max_workers.  With that new parameter in
place, autovacuum_max_workers can now be changed with a SIGHUP
(e.g., pg_ctl reload).

If autovacuum_max_workers is set higher than
autovacuum_worker_slots, a WARNING is emitted, and the server will
only start up to autovacuum_worker_slots workers at a given time.
If autovacuum_max_workers is set to a value less than the number of
currently-running autovacuum workers, the existing workers will
continue running, but no new workers will be started until the
number of running autovacuum workers drops below
autovacuum_max_workers.

Reviewed-by: Sami Imseih, Justin Pryzby, Robert Haas, Andres Freund, Yogesh Sharma
Discussion: https://postgr.es/m/20240410212344.GA1824549%40nathanxps13

doc/src/sgml/config.sgml
doc/src/sgml/runtime.sgml
src/backend/access/transam/xlog.c
src/backend/postmaster/autovacuum.c
src/backend/postmaster/pmchild.c
src/backend/storage/lmgr/proc.c
src/backend/utils/init/postinit.c
src/backend/utils/misc/guc_tables.c
src/backend/utils/misc/postgresql.conf.sample
src/include/postmaster/autovacuum.h
src/test/perl/PostgreSQL/Test/Cluster.pm

index fbdd6ce57408333c65da116e33dbd20c69915822..740ff5d50443ce3d4ea46c0a302814de5e79c303 100644 (file)
@@ -8630,6 +8630,25 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-autovacuum-worker-slots" xreflabel="autovacuum_worker_slots">
+      <term><varname>autovacuum_worker_slots</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>autovacuum_worker_slots</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the number of backend slots to reserve for autovacuum worker
+        processes.  The default is 16.  This parameter can only be set at server
+        start.
+       </para>
+       <para>
+        When changing this value, consider also adjusting
+        <xref linkend="guc-autovacuum-max-workers"/>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-autovacuum-max-workers" xreflabel="autovacuum_max_workers">
       <term><varname>autovacuum_max_workers</varname> (<type>integer</type>)
       <indexterm>
@@ -8640,7 +8659,14 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
        <para>
         Specifies the maximum number of autovacuum processes (other than the
         autovacuum launcher) that may be running at any one time.  The default
-        is three.  This parameter can only be set at server start.
+        is three.  This parameter can only be set in the
+        <filename>postgresql.conf</filename> file or on the server command line.
+       </para>
+       <para>
+        Note that a setting for this value which is higher than
+        <xref linkend="guc-autovacuum-worker-slots"/> will have no effect,
+        since autovacuum workers are taken from the pool of slots established
+        by that setting.
        </para>
       </listitem>
      </varlistentry>
index 8750044852dac8c246c1249decd088f25a91f02e..59f39e8992470247c6ad533e12a9d3b8d7147af1 100644 (file)
@@ -839,7 +839,7 @@ psql: error: connection to server on socket "/tmp/.s.PGSQL.5432" failed: No such
     When using System V semaphores,
     <productname>PostgreSQL</productname> uses one semaphore per allowed connection
     (<xref linkend="guc-max-connections"/>), allowed autovacuum worker process
-    (<xref linkend="guc-autovacuum-max-workers"/>), allowed WAL sender process
+    (<xref linkend="guc-autovacuum-worker-slots"/>), allowed WAL sender process
     (<xref linkend="guc-max-wal-senders"/>), allowed background
     process (<xref linkend="guc-max-worker-processes"/>), etc., in sets of 19.
     The runtime-computed parameter <xref linkend="guc-num-os-semaphores"/>
@@ -892,7 +892,7 @@ $ <userinput>postgres -D $PGDATA -C num_os_semaphores</userinput>
     When using POSIX semaphores, the number of semaphores needed is the
     same as for System V, that is one semaphore per allowed connection
     (<xref linkend="guc-max-connections"/>), allowed autovacuum worker process
-    (<xref linkend="guc-autovacuum-max-workers"/>), allowed WAL sender process
+    (<xref linkend="guc-autovacuum-worker-slots"/>), allowed WAL sender process
     (<xref linkend="guc-max-wal-senders"/>), allowed background
     process (<xref linkend="guc-max-worker-processes"/>), etc.
     On the platforms where this option is preferred, there is no specific
index b9ea92a54276dae9e8176ce6da9ffcd49e85bcfb..bf3dbda901d52575b6f49be3f8761aa26ad65764 100644 (file)
@@ -5403,7 +5403,7 @@ CheckRequiredParameterValues(void)
     */
    if (ArchiveRecoveryRequested && EnableHotStandby)
    {
-       /* We ignore autovacuum_max_workers when we make this test. */
+       /* We ignore autovacuum_worker_slots when we make this test. */
        RecoveryRequiresIntParameter("max_connections",
                                     MaxConnections,
                                     ControlFile->MaxConnections);
index 3f826532b8807e3a96d1f645024f776b150868fd..0ab921a169b850f63ff5aa1ebb3f69e60060a01b 100644 (file)
  * GUC parameters
  */
 bool       autovacuum_start_daemon = false;
+int            autovacuum_worker_slots;
 int            autovacuum_max_workers;
 int            autovacuum_work_mem = -1;
 int            autovacuum_naptime;
@@ -210,7 +211,7 @@ typedef struct autovac_table
 /*-------------
  * This struct holds information about a single worker's whereabouts.  We keep
  * an array of these in shared memory, sized according to
- * autovacuum_max_workers.
+ * autovacuum_worker_slots.
  *
  * wi_links        entry into free list or running list
  * wi_dboid        OID of the database this worker is supposed to work on
@@ -291,7 +292,7 @@ typedef struct
 {
    sig_atomic_t av_signal[AutoVacNumSignals];
    pid_t       av_launcherpid;
-   dlist_head  av_freeWorkers;
+   dclist_head av_freeWorkers;
    dlist_head  av_runningWorkers;
    WorkerInfo  av_startingWorker;
    AutoVacuumWorkItem av_workItems[NUM_WORKITEMS];
@@ -349,6 +350,8 @@ static void autovac_report_activity(autovac_table *tab);
 static void autovac_report_workitem(AutoVacuumWorkItem *workitem,
                                    const char *nspname, const char *relname);
 static void avl_sigusr2_handler(SIGNAL_ARGS);
+static bool av_worker_available(void);
+static void check_av_worker_gucs(void);
 
 
 
@@ -577,8 +580,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
         * wakening conditions.
         */
 
-       launcher_determine_sleep(!dlist_is_empty(&AutoVacuumShmem->av_freeWorkers),
-                                false, &nap);
+       launcher_determine_sleep(av_worker_available(), false, &nap);
 
        /*
         * Wait until naptime expires or we get some type of signal (all the
@@ -638,7 +640,7 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
        current_time = GetCurrentTimestamp();
        LWLockAcquire(AutovacuumLock, LW_SHARED);
 
-       can_launch = !dlist_is_empty(&AutoVacuumShmem->av_freeWorkers);
+       can_launch = av_worker_available();
 
        if (AutoVacuumShmem->av_startingWorker != NULL)
        {
@@ -681,8 +683,8 @@ AutoVacLauncherMain(char *startup_data, size_t startup_data_len)
                    worker->wi_sharedrel = false;
                    worker->wi_proc = NULL;
                    worker->wi_launchtime = 0;
-                   dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
-                                   &worker->wi_links);
+                   dclist_push_head(&AutoVacuumShmem->av_freeWorkers,
+                                    &worker->wi_links);
                    AutoVacuumShmem->av_startingWorker = NULL;
                    ereport(WARNING,
                            errmsg("autovacuum worker took too long to start; canceled"));
@@ -747,6 +749,8 @@ HandleAutoVacLauncherInterrupts(void)
 
    if (ConfigReloadPending)
    {
+       int         autovacuum_max_workers_prev = autovacuum_max_workers;
+
        ConfigReloadPending = false;
        ProcessConfigFile(PGC_SIGHUP);
 
@@ -754,6 +758,14 @@ HandleAutoVacLauncherInterrupts(void)
        if (!AutoVacuumingActive())
            AutoVacLauncherShutdown();
 
+       /*
+        * If autovacuum_max_workers changed, emit a WARNING if
+        * autovacuum_worker_slots < autovacuum_max_workers.  If it didn't
+        * change, skip this to avoid too many repeated log messages.
+        */
+       if (autovacuum_max_workers_prev != autovacuum_max_workers)
+           check_av_worker_gucs();
+
        /* rebuild the list in case the naptime changed */
        rebuild_database_list(InvalidOid);
    }
@@ -1089,7 +1101,7 @@ do_start_worker(void)
 
    /* return quickly when there are no free workers */
    LWLockAcquire(AutovacuumLock, LW_SHARED);
-   if (dlist_is_empty(&AutoVacuumShmem->av_freeWorkers))
+   if (!av_worker_available())
    {
        LWLockRelease(AutovacuumLock);
        return InvalidOid;
@@ -1242,7 +1254,7 @@ do_start_worker(void)
         * Get a worker entry from the freelist.  We checked above, so there
         * really should be a free slot.
         */
-       wptr = dlist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
+       wptr = dclist_pop_head_node(&AutoVacuumShmem->av_freeWorkers);
 
        worker = dlist_container(WorkerInfoData, wi_links, wptr);
        worker->wi_dboid = avdb->adw_datid;
@@ -1615,8 +1627,8 @@ FreeWorkerInfo(int code, Datum arg)
        MyWorkerInfo->wi_proc = NULL;
        MyWorkerInfo->wi_launchtime = 0;
        pg_atomic_clear_flag(&MyWorkerInfo->wi_dobalance);
-       dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
-                       &MyWorkerInfo->wi_links);
+       dclist_push_head(&AutoVacuumShmem->av_freeWorkers,
+                        &MyWorkerInfo->wi_links);
        /* not mine anymore */
        MyWorkerInfo = NULL;
 
@@ -3248,10 +3260,14 @@ AutoVacuumRequestWork(AutoVacuumWorkItemType type, Oid relationId,
 void
 autovac_init(void)
 {
-   if (autovacuum_start_daemon && !pgstat_track_counts)
+   if (!autovacuum_start_daemon)
+       return;
+   else if (!pgstat_track_counts)
        ereport(WARNING,
                (errmsg("autovacuum not started because of misconfiguration"),
                 errhint("Enable the \"track_counts\" option.")));
+   else
+       check_av_worker_gucs();
 }
 
 /*
@@ -3268,7 +3284,7 @@ AutoVacuumShmemSize(void)
     */
    size = sizeof(AutoVacuumShmemStruct);
    size = MAXALIGN(size);
-   size = add_size(size, mul_size(autovacuum_max_workers,
+   size = add_size(size, mul_size(autovacuum_worker_slots,
                                   sizeof(WorkerInfoData)));
    return size;
 }
@@ -3295,7 +3311,7 @@ AutoVacuumShmemInit(void)
        Assert(!found);
 
        AutoVacuumShmem->av_launcherpid = 0;
-       dlist_init(&AutoVacuumShmem->av_freeWorkers);
+       dclist_init(&AutoVacuumShmem->av_freeWorkers);
        dlist_init(&AutoVacuumShmem->av_runningWorkers);
        AutoVacuumShmem->av_startingWorker = NULL;
        memset(AutoVacuumShmem->av_workItems, 0,
@@ -3305,10 +3321,10 @@ AutoVacuumShmemInit(void)
                               MAXALIGN(sizeof(AutoVacuumShmemStruct)));
 
        /* initialize the WorkerInfo free list */
-       for (i = 0; i < autovacuum_max_workers; i++)
+       for (i = 0; i < autovacuum_worker_slots; i++)
        {
-           dlist_push_head(&AutoVacuumShmem->av_freeWorkers,
-                           &worker[i].wi_links);
+           dclist_push_head(&AutoVacuumShmem->av_freeWorkers,
+                            &worker[i].wi_links);
            pg_atomic_init_flag(&worker[i].wi_dobalance);
        }
 
@@ -3344,3 +3360,35 @@ check_autovacuum_work_mem(int *newval, void **extra, GucSource source)
 
    return true;
 }
+
+/*
+ * Returns whether there is a free autovacuum worker slot available.
+ */
+static bool
+av_worker_available(void)
+{
+   int         free_slots;
+   int         reserved_slots;
+
+   free_slots = dclist_count(&AutoVacuumShmem->av_freeWorkers);
+
+   reserved_slots = autovacuum_worker_slots - autovacuum_max_workers;
+   reserved_slots = Max(0, reserved_slots);
+
+   return free_slots > reserved_slots;
+}
+
+/*
+ * Emits a WARNING if autovacuum_worker_slots < autovacuum_max_workers.
+ */
+static void
+check_av_worker_gucs(void)
+{
+   if (autovacuum_worker_slots < autovacuum_max_workers)
+       ereport(WARNING,
+               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                errmsg("\"autovacuum_max_workers\" (%d) should be less than or equal to \"autovacuum_worker_slots\" (%d)",
+                       autovacuum_max_workers, autovacuum_worker_slots),
+                errdetail("The server will only start up to \"autovacuum_worker_slots\" (%d) autovacuum workers at a given time.",
+                          autovacuum_worker_slots)));
+}
index 0d53812406c5e0949b7a5cba53fa365942e718cb..0d473226c3a33090893dde0ef58228a343235fbc 100644 (file)
@@ -8,7 +8,7 @@
  * child process is allocated a PMChild struct from a fixed pool of structs.
  * The size of the pool is determined by various settings that configure how
  * many worker processes and backend connections are allowed, i.e.
- * autovacuum_max_workers, max_worker_processes, max_wal_senders, and
+ * autovacuum_worker_slots, max_worker_processes, max_wal_senders, and
  * max_connections.
  *
  * Dead-end backends are handled slightly differently.  There is no limit
@@ -99,7 +99,7 @@ InitPostmasterChildSlots(void)
     */
    pmchild_pools[B_BACKEND].size = 2 * (MaxConnections + max_wal_senders);
 
-   pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_max_workers;
+   pmchild_pools[B_AUTOVAC_WORKER].size = autovacuum_worker_slots;
    pmchild_pools[B_BG_WORKER].size = max_worker_processes;
 
    /*
index c7a972df7dd48234d5dce143547c10dacc013224..49204f91a20159248eff14e3e9a3d5e28465850e 100644 (file)
@@ -150,7 +150,7 @@ ProcGlobalSemas(void)
  *   So, now we grab enough semaphores to support the desired max number
  *   of backends immediately at initialization --- if the sysadmin has set
  *   MaxConnections, max_worker_processes, max_wal_senders, or
- *   autovacuum_max_workers higher than his kernel will support, he'll
+ *   autovacuum_worker_slots higher than his kernel will support, he'll
  *   find out sooner rather than later.
  *
  *   Another reason for creating semaphores here is that the semaphore
@@ -284,13 +284,13 @@ InitProcGlobal(void)
            dlist_push_tail(&ProcGlobal->freeProcs, &proc->links);
            proc->procgloballist = &ProcGlobal->freeProcs;
        }
-       else if (i < MaxConnections + autovacuum_max_workers + NUM_SPECIAL_WORKER_PROCS)
+       else if (i < MaxConnections + autovacuum_worker_slots + NUM_SPECIAL_WORKER_PROCS)
        {
            /* PGPROC for AV or special worker, add to autovacFreeProcs list */
            dlist_push_tail(&ProcGlobal->autovacFreeProcs, &proc->links);
            proc->procgloballist = &ProcGlobal->autovacFreeProcs;
        }
-       else if (i < MaxConnections + autovacuum_max_workers + NUM_SPECIAL_WORKER_PROCS + max_worker_processes)
+       else if (i < MaxConnections + autovacuum_worker_slots + NUM_SPECIAL_WORKER_PROCS + max_worker_processes)
        {
            /* PGPROC for bgworker, add to bgworkerFreeProcs list */
            dlist_push_tail(&ProcGlobal->bgworkerFreeProcs, &proc->links);
index 46dbc46a97e93c19408624a50e5f50d61001357d..01bb6a410cb83702e77be18191840f2da393ec82 100644 (file)
@@ -547,15 +547,15 @@ InitializeMaxBackends(void)
    Assert(MaxBackends == 0);
 
    /* Note that this does not include "auxiliary" processes */
-   MaxBackends = MaxConnections + autovacuum_max_workers +
+   MaxBackends = MaxConnections + autovacuum_worker_slots +
        max_worker_processes + max_wal_senders + NUM_SPECIAL_WORKER_PROCS;
 
    if (MaxBackends > MAX_BACKENDS)
        ereport(ERROR,
                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                 errmsg("too many server processes configured"),
-                errdetail("\"max_connections\" (%d) plus \"autovacuum_max_workers\" (%d) plus \"max_worker_processes\" (%d) plus \"max_wal_senders\" (%d) must be less than %d.",
-                          MaxConnections, autovacuum_max_workers,
+                errdetail("\"max_connections\" (%d) plus \"autovacuum_worker_slots\" (%d) plus \"max_worker_processes\" (%d) plus \"max_wal_senders\" (%d) must be less than %d.",
+                          MaxConnections, autovacuum_worker_slots,
                           max_worker_processes, max_wal_senders,
                           MAX_BACKENDS - (NUM_SPECIAL_WORKER_PROCS - 1))));
 }
index 22f16a3b46273fe5cfc9af64a728fa8d93f9bbd3..c9d8cd796a8a2aa630843941298beed724810a2f 100644 (file)
@@ -3467,7 +3467,16 @@ struct config_int ConfigureNamesInt[] =
    },
    {
        /* see max_connections */
-       {"autovacuum_max_workers", PGC_POSTMASTER, AUTOVACUUM,
+       {"autovacuum_worker_slots", PGC_POSTMASTER, AUTOVACUUM,
+           gettext_noop("Sets the number of backend slots to allocate for autovacuum workers."),
+           NULL
+       },
+       &autovacuum_worker_slots,
+       16, 1, MAX_BACKENDS,
+       NULL, NULL, NULL
+   },
+   {
+       {"autovacuum_max_workers", PGC_SIGHUP, AUTOVACUUM,
            gettext_noop("Sets the maximum number of simultaneously running autovacuum worker processes."),
            NULL
        },
index a2ac7575ca7e3a96b0f8f07f55d65eebb7a14459..b2bc43383dbea251f6578da8d04d2086b25bbef2 100644 (file)
 
 #autovacuum = on           # Enable autovacuum subprocess?  'on'
                    # requires track_counts to also be on.
-#autovacuum_max_workers = 3        # max number of autovacuum subprocesses
+autovacuum_worker_slots = 16   # autovacuum worker slots to allocate
                    # (change requires restart)
+#autovacuum_max_workers = 3        # max number of autovacuum subprocesses
 #autovacuum_naptime = 1min     # time between autovacuum runs
 #autovacuum_vacuum_threshold = 50  # min number of row updates before
                    # vacuum
index edac746f3cfafb723217663004e97ed682e8b1d6..54e01c81d68e534b3a1b5de4c4b6c5b98963001d 100644 (file)
@@ -28,6 +28,7 @@ typedef enum
 
 /* GUC variables */
 extern PGDLLIMPORT bool autovacuum_start_daemon;
+extern PGDLLIMPORT int autovacuum_worker_slots;
 extern PGDLLIMPORT int autovacuum_max_workers;
 extern PGDLLIMPORT int autovacuum_work_mem;
 extern PGDLLIMPORT int autovacuum_naptime;
index 74bf3408682e99bb4d4ba2d535ef392a038bda17..08b89a4cdffd52f1d5f2b25d1340152c9d5ed490 100644 (file)
@@ -707,6 +707,7 @@ sub init
        }
        print $conf "max_wal_senders = 10\n";
        print $conf "max_replication_slots = 10\n";
+       print $conf "autovacuum_worker_slots = 3\n";
        print $conf "wal_log_hints = on\n";
        print $conf "hot_standby = on\n";
        # conservative settings to ensure we can run multiple postmasters: