guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[shepherd] 03/03: service: 'start' blocks when a service is already bein


From: Ludovic Courtès
Subject: [shepherd] 03/03: service: 'start' blocks when a service is already being started.
Date: Sat, 12 Nov 2022 16:44:26 -0500 (EST)

civodul pushed a commit to branch wip-service-monitor
in repository shepherd.

commit a915989cb1770ac8ae45b3fabec0a21ef80f2a83
Author: Ludovic Courtès <ludo@gnu.org>
AuthorDate: Sat Jun 25 23:24:05 2022 +0200

    service: 'start' blocks when a service is already being started.
    
    This change allows the monitor to keep track of services that are being
    started, in addition to running/stopped.  The 'start' method now blocks
    when called on a service already being started, until the service has
    actually been started.  This was the case before 0.9.0; since 0.9.0
    though, invoking 'start' on a service already being started would
    lead to a second invocation of its 'start' method.
    
    * modules/shepherd/service.scm (start): Use the new 'start' protocol
    with the monitor and adjust accordingly.
    (service-monitor)[*service-started*, started-message?]: New variables.
    Add 'starting' variable to 'loop'.  Remove ('set-running ...) clause;
    add ('start ...) clause.
    * tests/starting-status.sh: New file.
    * Makefile.am (TESTS): Add it.
---
 Makefile.am                  |   1 +
 modules/shepherd/service.scm | 138 ++++++++++++++++++++++++++++++++-----------
 tests/starting-status.sh     | 110 ++++++++++++++++++++++++++++++++++
 3 files changed, 213 insertions(+), 36 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 81fb04d..0d3a983 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -230,6 +230,7 @@ SUFFIXES = .go
 
 TESTS =                                                \
   tests/basic.sh                               \
+  tests/starting-status.sh                     \
   tests/replacement.sh                         \
   tests/respawn.sh                             \
   tests/respawn-throttling.sh                  \
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index 9b326ce..ea26c65 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -27,13 +27,14 @@
   #:use-module ((fibers)
                 #:hide (sleep))
   #:use-module (fibers channels)
+  #:use-module (fibers conditions)
   #:use-module (fibers scheduler)
   #:use-module (oop goops)
   #:use-module (srfi srfi-1)
   #:use-module (srfi srfi-9)
   #:use-module (srfi srfi-26)
   #:use-module (srfi srfi-34)
-  #:use-module (srfi srfi-35)
+  #:use-module ((srfi srfi-35) #:hide (make-condition))
   #:use-module (rnrs io ports)
   #:use-module ((ice-9 control) #:select (call/ec))
   #:use-module (ice-9 match)
@@ -392,30 +393,40 @@ wire."
         (let ((problem
                ;; Resolve all dependencies.
                (find (negate start) (required-by obj))))
-          (if problem
-              (local-output (l10n "Service ~a depends on ~a.")
-                            (canonical-name obj)
-                            problem)
-               ;; Start the service itself.
-               (let ((running (catch #t
-                                (lambda ()
-                                  (apply (slot-ref obj 'start) args))
-                                (lambda (key . args)
-                                  (report-exception 'start obj key args)))))
-                 (put-message (current-monitor-channel)
-                              `(set-running ,obj ,running))))
+           (define running
+            (if problem
+                (local-output (l10n "Service ~a depends on ~a.")
+                              (canonical-name obj)
+                              problem)
+                 ;; Start the service itself.
+                 (let ((reply (make-channel)))
+                   (put-message (current-monitor-channel) `(start ,obj ,reply))
+                   (match (get-message reply)
+                     (#f
+                      ;; We lost the race: OBJ is already running.
+                      (service-running-value obj))
+                     ((? channel? notification)
+                      ;; We won the race: we're responsible for starting OBJ
+                      ;; and sending its running value on NOTIFICATION.
+                      (let ((running (catch #t
+                                       (lambda ()
+                                         (apply (slot-ref obj 'start) args))
+                                       (lambda (key . args)
+                                         (report-exception 'start obj key 
args)))))
+                        (put-message notification running)
+                        running))))))
 
           ;; Status message.
-           (let ((running (service-running-value obj)))
-             (when (one-shot? obj)
-               (put-message (current-monitor-channel)
-                            `(notify-termination ,obj)))
-             (local-output (if running
-                              (l10n "Service ~a has been started.")
-                               (l10n "Service ~a could not be started."))
-                          (canonical-name obj))
+           (when (one-shot? obj)
+             (put-message (current-monitor-channel)
+                          `(notify-termination ,obj)))
+
+           (local-output (if running
+                            (l10n "Service ~a has been started.")
+                             (l10n "Service ~a could not be started."))
+                        (canonical-name obj))
 
-             running)))))
+           running))))
 
 (define (replace-service old-service new-service)
   "Replace OLD-SERVICE with NEW-SERVICE in the services registry.  This
@@ -648,8 +659,12 @@ clients."
 (define (service-monitor channel)
   "Encapsulate shepherd state (registered and running services) and serve
 requests arriving on @var{channel}."
+  (define *service-started* (list 'service 'started!))
+  (define (started-message? obj) (eq? *service-started* obj))
+
   (let loop ((registered vlist-null)
-             (running vlist-null))
+             (running vlist-null)
+             (starting vlist-null))
     (define (unregister services)
       ;; Return REGISTERED minus SERVICE.
       (vhash-fold (lambda (name service result)
@@ -672,35 +687,36 @@ requests arriving on @var{channel}."
        (let ((name (canonical-name service)))
          (match (vhash-assq name registered)
            (#f
-            (loop (register service) running))
+            (loop (register service) running starting))
            ((_ . old)
             (if (vhash-assq old running)
                 (begin
                   (slot-set! old 'replacement service)
-                  (loop registered running))
+                  (loop registered running starting))
                 (loop (register service (unregister (list old)))
-                      running))))))
+                      running starting))))))
       (('unregister services)                     ;no reply
        (match (filter (cut vhash-assq <> running) services)
          (()
-          (loop (unregister services) running))
+          (loop (unregister services) running starting))
          (lst                                     ;
           (local-output
            (l10n "Cannot unregister service ~a, which is still running"
                  "Cannot unregister services~{ ~a,~} which are still running"
                  (length lst))
            (map canonical-name lst))
-          (loop registered running))))
+          (loop registered running starting))))
       (('unregister-all)                          ;no reply
        (let ((root (cdr (vhash-assq 'root registered))))
          (loop (fold (cut vhash-consq <> root <>)
                      vlist-null
                      (provided-by root))
-               (vhash-consq root #t running))))
+               (vhash-consq root #t running)
+               starting)))
       (('lookup name reply)
        (put-message reply
                     (vhash-foldq* cons '() name registered))
-       (loop registered running))
+       (loop registered running starting))
       (('service-list reply)
        (let ((names (delete-duplicates
                      (vhash-fold (lambda (key _ result)
@@ -716,19 +732,69 @@ requests arriving on @var{channel}."
                                           result))
                             '()
                             names))
-         (loop registered running)))
+         (loop registered running starting)))
       (('running service reply)
        (put-message reply
                     (match (vhash-assq service running)
                       (#f #f)
                       ((_ . value) value)))
-       (loop registered running))
-      (('set-running service value)               ;no reply
-       (loop registered
-             (vhash-consq service value running)))
+       (loop registered running starting))
+      (('start service reply)
+       ;; Attempt to start SERVICE, blocking if it is already being started.
+       ;; Send #f on REPLY if SERVICE was already running or being started;
+       ;; otherwise send a channel on which to send SERVICE's value one it has
+       ;; been started.
+       (cond ((vhash-assq service running)
+              =>
+              ;; SERVICE is already running: send its value on REPLY.
+              (lambda (pair)
+                (match pair
+                  ((_ . value)
+                   (put-message reply #f)
+                   (loop registered running starting)))))
+             ((vhash-assq service starting)
+              =>
+              ;; SERVICE is being started: wait until it has started and then
+              ;; send #f on REPLY.
+              (lambda (pair)
+                (match pair
+                  ((_ . condition)
+                   (spawn-fiber
+                    (lambda ()
+                      (wait condition)
+                      (put-message reply #f)))
+                   (loop registered running starting)))))
+             (else
+              ;; Become the one who starts SERVICE.
+              (let ((condition (make-condition))
+                    (notification (make-channel)))
+                (spawn-fiber
+                 (lambda ()
+                   (let ((running (get-message notification)))
+                     (local-output (l10n "Service ~a started.")
+                                   (canonical-name service))
+                     (put-message channel
+                                  (list *service-started* service running)))))
+                (local-output (l10n "Starting service ~a...")
+                              (canonical-name service))
+                (put-message reply notification)
+                (loop registered running
+                      (vhash-consq service condition starting))))))
+      (((? started-message?) service value)       ;no reply
+       (local-output (l10n "Service ~a running with value ~s.")
+                     (canonical-name service) value)
+       (match (vhash-assq service starting)
+         ((_ . condition)
+          (signal-condition! condition)
+          (loop registered
+                (if (or (one-shot? service) (not value))
+                    running
+                    (vhash-consq service value running))
+                (vhash-delq service starting)))))
       (('notify-termination service)              ;no reply
        (loop registered
-             (vhash-delq service running))))))    ;XXX: complexity
+             (vhash-delq service running)         ;XXX: complexity
+             starting)))))
 
 (define (spawn-service-monitor)
   "Spawn a new service monitor fiber and return a channel to send it requests."
diff --git a/tests/starting-status.sh b/tests/starting-status.sh
new file mode 100644
index 0000000..e0a2c45
--- /dev/null
+++ b/tests/starting-status.sh
@@ -0,0 +1,110 @@
+# GNU Shepherd --- Test the "starting" status.
+# Copyright © 2022 Ludovic Courtès <ludo@gnu.org>
+#
+# This file is part of the GNU Shepherd.
+#
+# The GNU Shepherd is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at
+# your option) any later version.
+#
+# The GNU Shepherd is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with the GNU Shepherd.  If not, see <http://www.gnu.org/licenses/>.
+
+shepherd --version
+herd --version
+
+socket="t-socket-$$"
+conf="t-conf-$$"
+confdir="t-confdir-$$"
+datadir="t-datadir-$$"
+log="t-log-$$"
+stamp="t-stamp-$$"
+pid="t-pid-$$"
+
+herd="herd -s $socket"
+
+trap "cat $log || true; rm -f $socket $conf $stamp $log;
+      test -f $pid && kill \`cat $pid\` || true; rm -f $pid" EXIT
+
+cat > "$conf" <<EOF
+(register-services
+ (make <service>
+   #:provides '(test)
+   #:start (lambda _
+             (let loop ((n 30))
+               (if (or (file-exists? "$stamp") (zero? n))
+                   (> n 0)
+                   (begin
+                     ((@ (fibers) sleep) 1)
+                     (loop (- n 1))))))
+   #:stop  (lambda _
+             (delete-file "$stamp"))
+   #:respawn? #f))
+EOF
+
+rm -f "$pid" "$stamp"
+shepherd -I -s "$socket" -c "$conf" -l "$log" --pid="$pid" &
+
+# Wait till it's ready.
+while ! test -f "$pid" ; do sleep 0.3 ; done
+
+shepherd_pid="`cat $pid`"
+
+$herd start test &
+herd_pid=$!
+
+# Currently, 'test' is considered as "stopped" while starting.
+$herd status
+$herd status test
+$herd status test | grep stopped
+
+$herd start test &
+herd_pid2=$!
+sleep 1
+kill -0 "$herd_pid"
+kill -0 "$herd_pid2"
+
+# Trigger actual service start.
+touch "$stamp"
+
+# Make sure the service is marked as "started" soon shortly after.
+n=0
+while : ; do
+    if $herd status test | grep started
+    then break
+    else n=$(expr $n + 1)
+    fi
+
+    test $n -le 10
+    sleep 1
+done
+
+# Make sure the 'herd' processes terminated.
+n=0
+while : ; do
+    if kill -0 "$herd_pid" || kill -0 "$herd_pid2"
+    then
+       n=$(expr $n + 1)
+       test $n -le 10
+       sleep 1
+    else
+       break
+    fi
+done
+
+$herd stop test
+! test -f "$stamp"
+
+$herd stop root
+! kill -0 $shepherd_pid
+
+test $(grep "Starting service test" "$log" | wc -l) = 1
+
+rm -rf "$confdir"
+rm -rf "$datadir"



reply via email to

[Prev in Thread] Current Thread [Next in Thread]