guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: remote-worker: Do not block on request-work respo


From: Mathieu Othacehe
Subject: branch master updated: remote-worker: Do not block on request-work response.
Date: Sat, 12 Nov 2022 05:51:06 -0500

This is an automated email from the git hooks/post-receive script.

mothacehe pushed a commit to branch master
in repository guix-cuirass.

The following commit(s) were added to refs/heads/master by this push:
     new 1fb4b0a  remote-worker: Do not block on request-work response.
1fb4b0a is described below

commit 1fb4b0ac1297e9bd680d0f4a356ce3050b27f913
Author: Mathieu Othacehe <othacehe@gnu.org>
AuthorDate: Sat Nov 12 11:45:16 2022 +0100

    remote-worker: Do not block on request-work response.
    
    When the worker sends a request-work message to the server, it then waits
    undefinitely for a response. If the server receives the response but dies
    before answering, the client can be blocked forever.
    
    * src/cuirass/remote.scm (EAGAIN-safe): New macro.
    (zmq-get-msg-parts-bytevector/no-wait): New procedure.
    * src/cuirass/scripts/remote-worker.scm (start-worker): Use the above
    procedure not to wait the server response undefinitely.
---
 src/cuirass/remote.scm                | 29 +++++++++++++++++++++++++++++
 src/cuirass/scripts/remote-worker.scm |  5 ++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/src/cuirass/remote.scm b/src/cuirass/remote.scm
index 3513a81..a030da7 100644
--- a/src/cuirass/remote.scm
+++ b/src/cuirass/remote.scm
@@ -69,6 +69,7 @@
             receive-logs
             send-log
 
+            zmq-get-msg-parts-bytevector/no-wait
             zmq-poll*
             zmq-message-receive*
             zmq-socket-ready?
@@ -382,6 +383,34 @@ retries a call to PROC."
 
   safe)
 
+(define-syntax-rule (EAGAIN-safe proc ...)
+  "Return a variant of PROC that catches EAGAIN 'zmq-error' exceptions."
+  (catch 'zmq-error
+    (lambda ()
+      proc ...)
+    (lambda (key errno . rest)
+      (if (= errno EAGAIN)
+          'egain
+          (apply throw key errno rest)))))
+
+(define* (zmq-get-msg-parts-bytevector/no-wait socket parts
+                                               #:key
+                                               (retries 10)
+                                               (interval 1))
+  "Call ZMQ-GET-MSG-PARTS-BYTEVECTOR but pass it the ZMQ_DONTWAIT flag.  If
+there is nothing to be read on SOCKET and it returns EAGAIN, catch it and
+retry RETRIES times spaced by INTERVAL seconds.  Return #false if nothing was
+read after retrying."
+  (let loop ((retries retries))
+    (and (> retries 0)
+         (match (EAGAIN-safe
+                 (zmq-get-msg-parts-bytevector socket parts
+                                               #:flags ZMQ_DONTWAIT))
+           ('egain
+            (sleep interval)
+            (loop (- retries 1)))
+           (x x)))))
+
 (define zmq-poll*
   ;; Return a variant of ZMQ-POLL that catches EINTR errors.
   (EINTR-safe zmq-poll))
diff --git a/src/cuirass/scripts/remote-worker.scm 
b/src/cuirass/scripts/remote-worker.scm
index 465b6ce..f8f50d0 100644
--- a/src/cuirass/scripts/remote-worker.scm
+++ b/src/cuirass/scripts/remote-worker.scm
@@ -353,7 +353,10 @@ and executing them.  The worker can reply on the same 
socket."
          (let loop ()
            (log-info (G_ "~a: request work.") (worker-name wrk))
            (request-work socket worker)
-           (match (zmq-get-msg-parts-bytevector socket '())
+           ;; Use a no-wait variant because the server could die unexpectedly
+           ;; and we would be blocked on the receive call forever.
+           (match (zmq-get-msg-parts-bytevector/no-wait socket '())
+             (#f #f) ;no response, keep going.
              ((empty command)
               (run-command (bv->string command) server
                            #:reply (reply socket)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]