emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

feature/native-comp 5f5d664 3/5: Rework eln hash filename strategy


From: Andrea Corallo
Subject: feature/native-comp 5f5d664 3/5: Rework eln hash filename strategy
Date: Sun, 23 Aug 2020 08:02:03 -0400 (EDT)

branch: feature/native-comp
commit 5f5d664c734414597c1c7d9981b1ceb9ff69c5b1
Author: Andrea Corallo <akrl@sdf.org>
Commit: Andrea Corallo <akrl@sdf.org>

    Rework eln hash filename strategy
    
    Generate eln filename hashing also the source file content in the form:
    
    /absolute/path/filename.el + content ->
    eln-cache/filename-path_hash-content_hash.eln
    
        * src/lread.c (maybe_swap_for_eln): Always call
        Fcomp_el_to_eln_filename on an existing source file.
    
        * src/comp.c (md5.h, sysstdio.h, zlib.h): New include.
        (comp_hash_string): Use md5 instead of sha512.
        (MD5_BLOCKSIZE): New macro.
        (accumulate_and_process_md5, final_process_md5, md5_gz_stream)
        (comp_hash_source_file): New functions.
        (Fcomp_el_to_eln_filename): Rework for hasing using also source
        file content.
    
        * src/lread.c (maybe_swap_for_eln): Rename el_name -> src_name as
        this can be also a have .el.gz extention.
---
 configure.ac    |   9 ++--
 lib/Makefile.in |   6 +++
 src/comp.c      | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 src/lread.c     |  13 +++--
 4 files changed, 167 insertions(+), 22 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0582b2f..cdc18ea 100644
--- a/configure.ac
+++ b/configure.ac
@@ -3787,6 +3787,12 @@ Here instructions on how to compile and install 
libgccjit from source:
 HAVE_NATIVE_COMP=no
 LIBGCCJIT_LIB=
 if test "${with_nativecomp}" != "no"; then
+    if test "${HAVE_PDUMPER}" = no; then
+       AC_MSG_ERROR(['--with-nativecomp' requires '--with-dumping=pdumper'])
+    fi
+    if test "${HAVE_ZLIB}" = no; then
+       AC_MSG_ERROR(['--with-nativecomp' requires zlib])
+    fi
     emacs_save_LIBS=$LIBS
     LIBS="-lgccjit"
     AC_RUN_IFELSE([libgccjit_smoke_test], [], [libgccjit_broken],
@@ -3800,9 +3806,6 @@ if test "${with_nativecomp}" != "no"; then
     NEED_DYNLIB=yes
     AC_DEFINE(HAVE_NATIVE_COMP, 1, [Define to 1 if you have the libgccjit 
library (-lgccjit).])
 fi
-if test "${HAVE_NATIVE_COMP}" = yes && test "${HAVE_PDUMPER}" = no; then
-   AC_MSG_ERROR(['--with-nativecomp' requires '--with-dumping=pdumper'])
-fi
 AC_DEFINE_UNQUOTED(NATIVE_ELISP_SUFFIX, ".eln",
   [System extension for native compiled elisp])
 AC_SUBST(HAVE_NATIVE_COMP)
diff --git a/lib/Makefile.in b/lib/Makefile.in
index 06d8e56..8d97d3b 100644
--- a/lib/Makefile.in
+++ b/lib/Makefile.in
@@ -50,12 +50,18 @@ am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
 am__v_at_0 = @
 am__v_at_1 =
 
+HAVE_NATIVE_COMP = @HAVE_NATIVE_COMP@
+
 ALL_CFLAGS= \
   $(C_SWITCH_SYSTEM) $(C_SWITCH_MACHINE) $(DEPFLAGS) \
   $(GNULIB_WARN_CFLAGS) $(WERROR_CFLAGS) $(PROFILING_CFLAGS) $(CFLAGS) \
   -I. -I../src -I$(srcdir) -I$(srcdir)/../src \
   $(if $(patsubst e-%,,$(notdir $<)),,-Demacs)
 
+ifeq ($(HAVE_NATIVE_COMP),yes)
+ALL_CFLAGS += -DGL_COMPILE_CRYPTO_STREAM
+endif
+
 SYSTEM_TYPE = @SYSTEM_TYPE@
 ifeq ($(SYSTEM_TYPE),windows-nt)
   include $(srcdir)/../nt/gnulib-cfg.mk
diff --git a/src/comp.c b/src/comp.c
index ff73245..5f1257f 100644
--- a/src/comp.c
+++ b/src/comp.c
@@ -36,7 +36,9 @@ along with GNU Emacs.  If not, see 
<https://www.gnu.org/licenses/>.  */
 #include "dynlib.h"
 #include "buffer.h"
 #include "blockinput.h"
-#include "sha512.h"
+#include "md5.h"
+#include "sysstdio.h"
+#include "zlib.h"
 
 
 /********************************/
@@ -394,8 +396,6 @@ load_gccjit_if_necessary (bool mandatory)
 }
 
 
-#define ELN_FILENAME_HASH_LEN 64
-
 /* C symbols emitted for the load relocation mechanism.  */
 #define CURRENT_THREAD_RELOC_SYM "current_thread_reloc"
 #define PURE_RELOC_SYM "pure_reloc"
@@ -640,9 +640,123 @@ format_string (const char *format, ...)
 static Lisp_Object
 comp_hash_string (Lisp_Object string)
 {
-  Lisp_Object digest = make_uninit_string (SHA512_DIGEST_SIZE * 2);
-  sha512_buffer (SSDATA (string), SCHARS (string), SSDATA (digest));
-  hexbuf_digest (SSDATA (digest), SDATA (digest), SHA512_DIGEST_SIZE);
+  Lisp_Object digest = make_uninit_string (MD5_DIGEST_SIZE * 2);
+  md5_buffer (SSDATA (string), SCHARS (string), SSDATA (digest));
+  hexbuf_digest (SSDATA (digest), SDATA (digest), MD5_DIGEST_SIZE);
+
+  return digest;
+}
+
+#define MD5_BLOCKSIZE 32768 /* From md5.c  */
+
+static char acc_buff[2 * MD5_BLOCKSIZE];
+static size_t acc_size;
+
+static void
+accumulate_and_process_md5 (void *data, size_t len, struct md5_ctx *ctxt)
+{
+  eassert (len <= MD5_BLOCKSIZE);
+  /* We may optimize this saving some of these memcpy/move using
+     directly the outer buffers but so far I'll not bother.  */
+  memcpy (acc_buff + acc_size, data, len);
+  acc_size += len;
+  if (acc_size >= MD5_BLOCKSIZE)
+    {
+      acc_size -= MD5_BLOCKSIZE;
+      md5_process_block (acc_buff, MD5_BLOCKSIZE, ctxt);
+      memmove (acc_buff, acc_buff + MD5_BLOCKSIZE, acc_size);
+    }
+}
+
+static void
+final_process_md5 (struct md5_ctx *ctxt)
+{
+  if (acc_size)
+    {
+      md5_process_bytes (acc_buff, acc_size, ctxt);
+      acc_size = 0;
+    }
+}
+
+static int
+md5_gz_stream (FILE *source, void *resblock)
+{
+  z_stream stream;
+  unsigned char in[MD5_BLOCKSIZE];
+  unsigned char out[MD5_BLOCKSIZE];
+
+  eassert (!acc_size);
+
+  struct md5_ctx ctx;
+  md5_init_ctx (&ctx);
+
+  /* allocate inflate state */
+  stream.zalloc = Z_NULL;
+  stream.zfree = Z_NULL;
+  stream.opaque = Z_NULL;
+  stream.avail_in = 0;
+  stream.next_in = Z_NULL;
+  int res = inflateInit2 (&stream, MAX_WBITS + 32);
+  if (res != Z_OK)
+    return -1;
+
+  do {
+    stream.avail_in = fread (in, 1, MD5_BLOCKSIZE, source);
+    if (ferror (source)) {
+      inflateEnd (&stream);
+      return -1;
+    }
+    if (stream.avail_in == 0)
+      break;
+    stream.next_in = in;
+
+    do {
+      stream.avail_out = MD5_BLOCKSIZE;
+      stream.next_out = out;
+      res = inflate (&stream, Z_NO_FLUSH);
+
+      if (res != Z_OK && res != Z_STREAM_END)
+       return -1;
+
+      accumulate_and_process_md5 (out, MD5_BLOCKSIZE - stream.avail_out, &ctx);
+    } while (!stream.avail_out);
+
+  } while (res != Z_STREAM_END);
+
+  final_process_md5 (&ctx);
+  inflateEnd (&stream);
+
+  if (res != Z_STREAM_END)
+    return -1;
+
+  md5_finish_ctx (&ctx, resblock);
+
+  return 0;
+}
+#undef MD5_BLOCKSIZE
+
+static Lisp_Object
+comp_hash_source_file (Lisp_Object filename)
+{
+  /* Can't use Finsert_file_contents + Fbuffer_hash as this is called
+     by Fcomp_el_to_eln_filename too early during bootstrap.  */
+  bool is_gz = suffix_p (filename, ".gz");
+  FILE *f = emacs_fopen (SSDATA (filename), is_gz ? "rb" : "r");
+
+  if (!f)
+    report_file_error ("Opening source file", filename);
+
+  Lisp_Object digest = make_uninit_string (MD5_DIGEST_SIZE * 2);
+
+  int res = is_gz
+    ? md5_gz_stream (f, SSDATA (digest))
+    : md5_stream (f, SSDATA (digest));
+  fclose (f);
+
+  if (res)
+    xsignal2 (Qfile_notify_error, build_string ("hashing failed"), filename);
+
+  hexbuf_digest (SSDATA (digest), SSDATA (digest), MD5_DIGEST_SIZE);
 
   return digest;
 }
@@ -3872,21 +3986,36 @@ If BASE-DIR is nil use the first entry in 
`comp-eln-load-path'.  */)
 {
   CHECK_STRING (filename);
 
+  filename = Fexpand_file_name (filename, Qnil);
+
+  if (NILP (Ffile_exists_p (filename)))
+    xsignal1 (Qfile_missing, filename);
+
+  Lisp_Object content_hash = comp_hash_source_file (filename);
+
   if (suffix_p (filename, ".gz"))
     filename = Fsubstring (filename, Qnil, make_fixnum (-3));
-  filename = Fexpand_file_name (filename, Qnil);
 
   /* We create eln filenames with an hash in order to look-up these
      starting from the source filename, IOW have a relation
-     /absolute/path/filename.el -> eln-cache/filename-hash.eln.
+
+     /absolute/path/filename.el + content ->
+     eln-cache/filename-path_hash-content_hash.eln.
+
+     'dlopen' can return the same handle if two shared with the same
+     filename are loaded in two different times (even if the first was
+     deleted!).  To prevent this scenario the source file content is
+     included in the hashing algorithm.
+
+     As at any point in time no more then one file can exist with the
+     same filename, should be possibile to clean up all
+     filename-path_hash-* except the most recent one (or the new one
+     being recompiled).
 
      As installing .eln files compiled during the build changes their
      absolute path we need an hashing mechanism that is not sensitive
      to that.  For this we replace if match PATH_DUMPLOADSEARCH or
-     PATH_LOADSEARCH with '//' before generating the hash.
-
-     Another approach would be to hash using the source file content
-     but this may have a measurable performance impact.  */
+     PATH_LOADSEARCH with '//' before generating the hash.  */
 
   if (NILP (loadsearch_re_list))
     {
@@ -3909,12 +4038,12 @@ If BASE-DIR is nil use the first entry in 
`comp-eln-load-path'.  */)
          break;
        }
     }
-
-  Lisp_Object hash = Fsubstring (comp_hash_string (filename), Qnil,
-                                make_fixnum (ELN_FILENAME_HASH_LEN));
+  Lisp_Object separator = build_string ("-");
+  Lisp_Object path_hash = comp_hash_string (filename);
   filename = concat2 (Ffile_name_nondirectory (Fsubstring (filename, Qnil,
                                                           make_fixnum (-3))),
-                      build_string ("-"));
+                     separator);
+  Lisp_Object hash = concat3 (path_hash, separator, content_hash);
   filename = concat3 (filename, hash, build_string (NATIVE_ELISP_SUFFIX));
   if (NILP (base_dir))
     base_dir = XCAR (Vcomp_eln_load_path);
diff --git a/src/lread.c b/src/lread.c
index 521da4e..3d0de49 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -1623,10 +1623,17 @@ maybe_swap_for_eln (Lisp_Object *filename, int *fd, 
struct timespec mtime)
   Lisp_Object eln_path_tail = Vcomp_eln_load_path;
   FOR_EACH_TAIL_SAFE (eln_path_tail)
     {
-      Lisp_Object el_name =
+      Lisp_Object src_name =
        Fsubstring (*filename, Qnil, make_fixnum (-1));
+      if (NILP (Ffile_exists_p (src_name)))
+       {
+         src_name = concat2 (src_name, build_string (".gz"));
+         if (NILP (Ffile_exists_p (src_name)))
+           /* Can't find the corresponding source file.  */
+           return;
+       }
       Lisp_Object eln_name =
-       Fcomp_el_to_eln_filename (el_name, XCAR (eln_path_tail));
+       Fcomp_el_to_eln_filename (src_name, XCAR (eln_path_tail));
       int eln_fd = emacs_open (SSDATA (ENCODE_FILE (eln_name)), O_RDONLY, 0);
 
       if (eln_fd > 0)
@@ -1643,7 +1650,7 @@ maybe_swap_for_eln (Lisp_Object *filename, int *fd, 
struct timespec mtime)
                  *fd = eln_fd;
                  /* Store the eln -> el relation.  */
                  Fputhash (Ffile_name_nondirectory (eln_name),
-                           el_name, Vcomp_eln_to_el_h);
+                           src_name, Vcomp_eln_to_el_h);
                  return;
                }
              else



reply via email to

[Prev in Thread] Current Thread [Next in Thread]