guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: hydra: berlin: Serve a restrictive robots.txt.


From: Tobias Geerinckx-Rice
Subject: branch master updated: hydra: berlin: Serve a restrictive robots.txt.
Date: Fri, 10 Dec 2021 12:20:13 -0500

This is an automated email from the git hooks/post-receive script.

nckx pushed a commit to branch master
in repository maintenance.

The following commit(s) were added to refs/heads/master by this push:
     new 2ec0a03  hydra: berlin: Serve a restrictive robots.txt.
2ec0a03 is described below

commit 2ec0a03774069f390a499fe93bdec6cec34819dc
Author: Tobias Geerinckx-Rice <me@tobias.gr>
AuthorDate: Fri Dec 10 18:01:01 2021 +0100

    hydra: berlin: Serve a restrictive robots.txt.
    
    * hydra/nginx/berlin.scm (publish-robots.txt): New variable.
    (publish-locations): Add a new nginx location to serve it to crawlers.
---
 hydra/nginx/berlin.scm | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/hydra/nginx/berlin.scm b/hydra/nginx/berlin.scm
index 1f4b0be..8350723 100644
--- a/hydra/nginx/berlin.scm
+++ b/hydra/nginx/berlin.scm
@@ -3,7 +3,7 @@
 ;; Copyright © 2017, 2018, 2019, 2020, 2021 Ricardo Wurmus <rekado@elephly.net>
 ;; Copyright © 2020 Christopher Baines <mail@cbaines.net>
 ;; Copyright © 2020, 2021 Florian Pelz <pelzflorian@pelzflorian.de>
-;; Copyright © 2020 Tobias Geerinckx-Rice <me@tobias.gr>
+;; Copyright © 2020, 2021 Tobias Geerinckx-Rice <me@tobias.gr>
 ;; Copyright © 2021 Mathieu Othacehe <othacehe@gnu.org>
 ;; Released under the GNU GPLv3 or any later version.
 
@@ -24,6 +24,17 @@
    (uri (string-append "= " old)) ;= means highest priority
    (body (list (string-append "return 301 " new ";\n")))))
 
+(define publish-robots.txt
+  ;; Try to prevent good-faith crawlers from downloading substitutes.  Allow
+  ;; indexing the root—which is expected to be static or cheap—to remain 
visible
+  ;; in search engine results for, e.g., ‘Guix CI’.
+  "\
+User-agent: *\r
+Disallow: /\r
+Allow: /$\r
+\r
+")
+
 (define (publish-locations url)
   "Return the nginx location blocks for 'guix publish' running on URL."
   (list (nginx-location-configuration
@@ -97,9 +108,9 @@
 
            ;; Do not tolerate slowness of hydra.gnu.org when fetching
            ;; narinfos: better return 504 quickly than wait forever.
-           "proxy_connect_timeout 2s;"
-           "proxy_read_timeout 2s;"
-           "proxy_send_timeout 2s;"
+           "proxy_connect_timeout 10s;"
+           "proxy_read_timeout 10s;"
+           "proxy_send_timeout 10s;"
 
            ;; 'guix publish --ttl' produces a 'Cache-Control' header for
            ;; use by 'guix substitute'.  Let it through rather than use
@@ -125,7 +136,17 @@
            "proxy_cache_valid 200 200d;"          ; cache hits
            "proxy_cache_valid any 5m;"            ; cache misses/others
 
-           "proxy_ignore_client_abort on;")))))
+           "proxy_ignore_client_abort on;")))
+
+       ;; Try to prevent good-faith crawlers from downloading substitutes.
+       (nginx-location-configuration
+        (uri "= /robots.txt")
+        (body
+         (list
+          #~(string-append "try_files "
+                           #$(plain-file "robots.txt" publish-robots.txt)
+                           " =404;")
+          "root /;")))))
 
 (define %tls-settings
   (list
@@ -938,7 +959,6 @@ PUBLISH-URL."
       %tls-settings
       (list
        "add_header Content-Security-Policy \"frame-ancestors 'none'\";"
-
        ;; TODO This works around NGinx using the epoch for the
        ;; Last-Modified date, as well as the etag.
        ;; See http://issues.guix.gnu.org/37207



reply via email to

[Prev in Thread] Current Thread [Next in Thread]