[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[shepherd] 01/01: services: Gracefully handle PID files not created atom
From: |
Ludovic Courtès |
Subject: |
[shepherd] 01/01: services: Gracefully handle PID files not created atomically. |
Date: |
Sat, 4 May 2019 11:26:35 -0400 (EDT) |
civodul pushed a commit to branch master
in repository shepherd.
commit 72631752149d000c8c98ae0cc66e0b0c2eda94ef
Author: Ludovic Courtès <address@hidden>
Date: Sat May 4 17:21:36 2019 +0200
services: Gracefully handle PID files not created atomically.
Prompted by <https://bugs.gnu.org/35550>.
* modules/shepherd/service.scm (read-pid-file): Loop when
'string->number' returns #f.
* tests/pid-file.sh: Define 'test-works' service that checks for PID
files not created atomically.
---
modules/shepherd/service.scm | 17 ++++++++++++++---
tests/pid-file.sh | 28 +++++++++++++++++++++++++++-
2 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/modules/shepherd/service.scm b/modules/shepherd/service.scm
index 53437b6..3aea7b8 100644
--- a/modules/shepherd/service.scm
+++ b/modules/shepherd/service.scm
@@ -717,9 +717,20 @@ otherwise return the number that was read (a PID)."
(let loop ()
(catch 'system-error
(lambda ()
- (string->number
- (string-trim-both
- (call-with-input-file file get-string-all))))
+ (match (string->number
+ (string-trim-both
+ (call-with-input-file file get-string-all)))
+ (#f
+ ;; If we didn't get an integer, it may be because the daemon didn't
+ ;; create FILE atomically and isn't done writing to it. Try again.
+ (loop))
+ ((? integer? pid)
+ ;; It's possible, though unlikely, that PID is not a valid PID, for
+ ;; instance because writes to FILE did not complete. However, we
+ ;; don't do (kill pid 0) because if the process lives in a separate
+ ;; PID namespace, then PID is probably invalid in our own
+ ;; namespace.
+ pid)))
(lambda args
(let ((errno (system-error-errno args)))
(if (= ENOENT errno)
diff --git a/tests/pid-file.sh b/tests/pid-file.sh
index d76ec91..5db6bd8 100644
--- a/tests/pid-file.sh
+++ b/tests/pid-file.sh
@@ -34,7 +34,11 @@ cat > "$conf"<<EOF
(use-modules (ice-9 match))
(define %command
- '("$SHELL" "-c" "echo \$\$ > $PWD/$service_pid ; sleep 600"))
+ ;; Purposefully introduce a delay between the time the PID file
+ ;; is created and the time it actually contains a valid PID. This
+ ;; simulates PID files not created atomically, as is the case with
+ ;; wpa_supplicant 2.7 for instance.
+ '("$SHELL" "-c" "echo > $PWD/$service_pid ; sleep 1.5; echo \$\$ >
$PWD/$service_pid ; exec sleep 600"))
(register-services
(make <service>
@@ -50,6 +54,15 @@ cat > "$conf"<<EOF
;; up to 4 seconds or so.
#:pid-file-timeout 6)
#:stop (make-kill-destructor)
+ #:respawn? #f)
+
+ (make <service>
+ ;; Same one, but actually produces the PID file.
+ #:provides '(test-works)
+ #:start (make-forkexec-constructor %command
+ #:pid-file "$PWD/$service_pid"
+ #:pid-file-timeout 6)
+ #:stop (make-kill-destructor)
#:respawn? #f))
EOF
@@ -73,3 +86,16 @@ test -f "$service_pid"
# Make sure it did not leave a process behind it.
if kill -0 `cat "$service_pid"`
then false; else true; fi
+
+# Now start the service that works.
+$herd start test-works
+$herd status test-works | grep started
+test -f "$service_pid"
+kill -0 "`cat $service_pid`"
+known_pid="`$herd status test-works | grep Running \
+ | sed -es'/.*Running value.* \([0-9]\+\)\.$/\1/g'`"
+test `cat $service_pid` -eq $known_pid
+
+$herd stop test-works
+if kill -0 `cat "$service_pid"`
+then false; else true; fi