emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Not using DOC for ELisp files


From: Stefan Monnier
Subject: Not using DOC for ELisp files
Date: Mon, 27 Dec 2021 20:48:22 -0500
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/29.0.50 (gnu/linux)

The patch below removes from the DOC file the docstrings coming from
ELisp files.

In numbers (in my tests), this reduces the size of DOC from about 3.2MB
to about 850kB, and increases the pdmp size by about 53kB or about 0.7%.

The benefits aren't great, obviously, but it does remove some warts in
the `lread.c` code (into which I bumped in the `scratch/fcr` branch,
which is why this comes up now).

I have a vague recollection that such a change was mentioned in some
bugreport-discussion some months ago but I can't remember where.

Comments?


        Stefan


diff --git a/lib-src/make-docfile.c b/lib-src/make-docfile.c
index d17c28be90..4f4b135589 100644
--- a/lib-src/make-docfile.c
+++ b/lib-src/make-docfile.c
@@ -20,7 +20,7 @@ along with GNU Emacs.  If not, see 
<https://www.gnu.org/licenses/>.  */
 
 
 /* The arguments given to this program are all the C and Lisp source files
- of GNU Emacs.  .elc and .el and .c files are allowed.
+ of GNU Emacs.  .c files are allowed.
  A .o file can also be specified; the .c file it was made from is used.
  This helps the makefile pass the correct list of files.
  Option -d DIR means change to DIR before looking for files.
@@ -62,13 +62,9 @@ along with GNU Emacs.  If not, see 
<https://www.gnu.org/licenses/>.  */
    Similarly, msdos defines this as sys_chdir, but we're not linking with the
    file where that function is defined.  */
 #undef chdir
-#define IS_SLASH(c)  ((c) == '/' || (c) == '\\' || (c) == ':')
-#else  /* not DOS_NT */
-#define IS_SLASH(c)  ((c) == '/')
 #endif /* not DOS_NT */
 
 static void scan_file (char *filename);
-static void scan_lisp_file (const char *filename, const char *mode);
 static void scan_c_file (char *filename, const char *mode);
 static void scan_c_stream (FILE *infile);
 static void start_globals (void);
@@ -238,16 +234,9 @@ put_filename (char *filename)
 static void
 scan_file (char *filename)
 {
-  ptrdiff_t len = strlen (filename);
-
   if (!generate_globals)
     put_filename (filename);
-  if (len > 4 && !strcmp (filename + len - 4, ".elc"))
-    scan_lisp_file (filename, "rb");
-  else if (len > 3 && !strcmp (filename + len - 3, ".el"))
-    scan_lisp_file (filename, "r");
-  else
-    scan_c_file (filename, "r");
+  scan_c_file (filename, "r");
 }
 
 static void
@@ -1225,453 +1214,4 @@ scan_c_stream (FILE *infile)
     fatal ("read error");
 }
 
-/* Read a file of Lisp code, compiled or interpreted.
- Looks for
-  (defun NAME ARGS DOCSTRING ...)
-  (defmacro NAME ARGS DOCSTRING ...)
-  (defsubst NAME ARGS DOCSTRING ...)
-  (autoload (quote NAME) FILE DOCSTRING ...)
-  (defvar NAME VALUE DOCSTRING)
-  (defconst NAME VALUE DOCSTRING)
-  (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
-  (fset (quote NAME) #[... DOCSTRING ...])
-  (defalias (quote NAME) #[... DOCSTRING ...])
-  (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
- starting in column zero.
- (quote NAME) may appear as 'NAME as well.
-
- We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
- When we find that, we save it for the following defining-form,
- and we use that instead of reading a doc string within that defining-form.
-
- For defvar, defconst, and fset we skip to the docstring with a kludgy
- formatting convention: all docstrings must appear on the same line as the
- initial open-paren (the one in column zero) and must contain a backslash
- and a newline immediately after the initial double-quote.  No newlines
- must appear between the beginning of the form and the first double-quote.
- For defun, defmacro, and autoload, we know how to skip over the
- arglist, but the doc string must still have a backslash and newline
- immediately after the double quote.
- The only source files that must follow this convention are preloaded
- uncompiled ones like loaddefs.el; aside from that, it is always the .elc
- file that we should look at, and they are no problem because byte-compiler
- output follows this convention.
- The NAME and DOCSTRING are output.
- NAME is preceded by `F' for a function or `V' for a variable.
- An entry is output only if DOCSTRING has \ newline just after the opening ".
- */
-
-static void
-skip_white (FILE *infile)
-{
-  int c;
-  do
-    c = getc (infile);
-  while (c_isspace (c));
-
-  ungetc (c, infile);
-}
-
-static void
-read_lisp_symbol (FILE *infile, char *buffer)
-{
-  int c;
-  char *fillp = buffer;
-
-  skip_white (infile);
-  while (true)
-    {
-      c = getc (infile);
-      if (c == '\\')
-       {
-         c = getc (infile);
-         if (c < 0)
-           return;
-         *fillp++ = c;
-       }
-      else if (c_isspace (c) || c == '(' || c == ')' || c < 0)
-       {
-         ungetc (c, infile);
-         *fillp = 0;
-         break;
-       }
-      else
-       *fillp++ = c;
-    }
-
-  if (! buffer[0])
-    fprintf (stderr, "## expected a symbol, got '%c'\n", c);
-
-  skip_white (infile);
-}
-
-static bool
-search_lisp_doc_at_eol (FILE *infile)
-{
-  int c = 0, c1 = 0, c2 = 0;
-
-  /* Skip until the end of line; remember two previous chars.  */
-  while (c != '\n' && c != '\r' && c != EOF)
-    {
-      c2 = c1;
-      c1 = c;
-      c = getc (infile);
-    }
-
-  /* If two previous characters were " and \,
-     this is a doc string.  Otherwise, there is none.  */
-  if (c2 != '"' || c1 != '\\')
-    {
-#ifdef DEBUG
-      fprintf (stderr, "## non-docstring found\n");
-#endif
-      ungetc (c, infile);
-      return false;
-    }
-  return true;
-}
-
-#define DEF_ELISP_FILE(fn)  { #fn, sizeof(#fn) - 1 }
-
-static void
-scan_lisp_file (const char *filename, const char *mode)
-{
-  FILE *infile;
-  int c;
-  char *saved_string = 0;
-  /* These are the only files that are loaded uncompiled, and must
-     follow the conventions of the doc strings expected by this
-     function.  These conventions are automatically followed by the
-     byte compiler when it produces the .elc files.  */
-  static struct {
-    const char *fn;
-    int fl;
-  } const uncompiled[] = {
-    DEF_ELISP_FILE (loaddefs.el),
-    DEF_ELISP_FILE (loadup.el),
-    DEF_ELISP_FILE (charprop.el),
-    DEF_ELISP_FILE (cp51932.el),
-    DEF_ELISP_FILE (eucjp-ms.el)
-  };
-  int i;
-  int flen = strlen (filename);
-
-  if (generate_globals)
-    fatal ("scanning lisp file when -g specified");
-  if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
-    {
-      bool match = false;
-      for (i = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]); i++)
-       {
-         if (uncompiled[i].fl <= flen
-             && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
-             && (flen == uncompiled[i].fl
-                 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
-           {
-             match = true;
-             break;
-           }
-       }
-      if (!match)
-       fatal ("uncompiled lisp file %s is not supported", filename);
-    }
-
-  infile = fopen (filename, mode);
-  if (infile == NULL)
-    {
-      perror (filename);
-      exit (EXIT_FAILURE);
-    }
-
-  c = '\n';
-  while (!feof (infile))
-    {
-      char buffer[BUFSIZ];
-      char type;
-
-      /* If not at end of line, skip till we get to one.  */
-      if (c != '\n' && c != '\r')
-       {
-         c = getc (infile);
-         continue;
-       }
-      /* Skip the line break.  */
-      while (c == '\n' || c == '\r')
-       c = getc (infile);
-      /* Detect a dynamic doc string and save it for the next expression.  */
-      if (c == '#')
-       {
-         c = getc (infile);
-         if (c == '@')
-           {
-             ptrdiff_t length = 0;
-             ptrdiff_t i;
-
-             /* Read the length.  */
-             while ((c = getc (infile),
-                     c_isdigit (c)))
-               {
-                 if (INT_MULTIPLY_WRAPV (length, 10, &length)
-                     || INT_ADD_WRAPV (length, c - '0', &length)
-                     || SIZE_MAX < length)
-                   memory_exhausted ();
-               }
-
-             if (length <= 1)
-               fatal ("invalid dynamic doc string length");
-
-             if (c != ' ')
-               fatal ("space not found after dynamic doc string length");
-
-             /* The next character is a space that is counted in the length
-                but not part of the doc string.
-                We already read it, so just ignore it.  */
-             length--;
-
-             /* Read in the contents.  */
-             free (saved_string);
-             saved_string = xmalloc (length);
-             for (i = 0; i < length; i++)
-               saved_string[i] = getc (infile);
-             /* The last character is a ^_.
-                That is needed in the .elc file
-                but it is redundant in DOC.  So get rid of it here.  */
-             saved_string[length - 1] = 0;
-             /* Skip the line break.  */
-             while (c == '\n' || c == '\r')
-               c = getc (infile);
-             /* Skip the following line.  */
-             while (! (c == '\n' || c == '\r' || c < 0))
-               c = getc (infile);
-           }
-         continue;
-       }
-
-      if (c != '(')
-       continue;
-
-      read_lisp_symbol (infile, buffer);
-
-      if (! strcmp (buffer, "defun")
-         || ! strcmp (buffer, "defmacro")
-         || ! strcmp (buffer, "defsubst"))
-       {
-         type = 'F';
-         read_lisp_symbol (infile, buffer);
-
-         /* Skip the arguments: either "nil" or a list in parens.  */
-
-         c = getc (infile);
-         if (c == 'n') /* nil */
-           {
-             if ((c = getc (infile)) != 'i'
-                 || (c = getc (infile)) != 'l')
-               {
-                 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
-                          buffer, filename);
-                 continue;
-               }
-           }
-         else if (c != '(')
-           {
-             fprintf (stderr, "## unparsable arglist in %s (%s)\n",
-                      buffer, filename);
-             continue;
-           }
-         else
-           while (! (c == ')' || c < 0))
-             c = getc (infile);
-         skip_white (infile);
-
-         /* If the next three characters aren't `dquote bslash newline'
-            then we're not reading a docstring.
-          */
-         if ((c = getc (infile)) != '"'
-             || (c = getc (infile)) != '\\'
-             || ((c = getc (infile)) != '\n' && c != '\r'))
-           {
-#ifdef DEBUG
-             fprintf (stderr, "## non-docstring in %s (%s)\n",
-                      buffer, filename);
-#endif
-             continue;
-           }
-       }
-
-      /* defcustom can only occur in uncompiled Lisp files.  */
-      else if (! strcmp (buffer, "defvar")
-              || ! strcmp (buffer, "defconst")
-              || ! strcmp (buffer, "defcustom"))
-       {
-         type = 'V';
-         read_lisp_symbol (infile, buffer);
-
-         if (saved_string == 0)
-           if (!search_lisp_doc_at_eol (infile))
-             continue;
-       }
-
-      else if (! strcmp (buffer, "custom-declare-variable")
-              || ! strcmp (buffer, "defvaralias")
-              )
-       {
-         type = 'V';
-
-         c = getc (infile);
-         if (c == '\'')
-           read_lisp_symbol (infile, buffer);
-         else
-           {
-             if (c != '(')
-               {
-                 fprintf (stderr,
-                          "## unparsable name in custom-declare-variable in 
%s\n",
-                          filename);
-                 continue;
-               }
-             read_lisp_symbol (infile, buffer);
-             if (strcmp (buffer, "quote"))
-               {
-                 fprintf (stderr,
-                          "## unparsable name in custom-declare-variable in 
%s\n",
-                          filename);
-                 continue;
-               }
-             read_lisp_symbol (infile, buffer);
-             c = getc (infile);
-             if (c != ')')
-               {
-                 fprintf (stderr,
-                          "## unparsable quoted name in 
custom-declare-variable in %s\n",
-                          filename);
-                 continue;
-               }
-           }
-
-         if (saved_string == 0)
-           if (!search_lisp_doc_at_eol (infile))
-             continue;
-       }
-
-      else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
-       {
-         type = 'F';
-
-         c = getc (infile);
-         if (c == '\'')
-           read_lisp_symbol (infile, buffer);
-         else
-           {
-             if (c != '(')
-               {
-                 fprintf (stderr, "## unparsable name in fset in %s\n",
-                          filename);
-                 continue;
-               }
-             read_lisp_symbol (infile, buffer);
-             if (strcmp (buffer, "quote"))
-               {
-                 fprintf (stderr, "## unparsable name in fset in %s\n",
-                          filename);
-                 continue;
-               }
-             read_lisp_symbol (infile, buffer);
-             c = getc (infile);
-             if (c != ')')
-               {
-                 fprintf (stderr,
-                          "## unparsable quoted name in fset in %s\n",
-                          filename);
-                 continue;
-               }
-           }
-
-         if (saved_string == 0)
-           if (!search_lisp_doc_at_eol (infile))
-             continue;
-       }
-
-      else if (! strcmp (buffer, "autoload"))
-       {
-         type = 'F';
-         c = getc (infile);
-         if (c == '\'')
-           read_lisp_symbol (infile, buffer);
-         else
-           {
-             if (c != '(')
-               {
-                 fprintf (stderr, "## unparsable name in autoload in %s\n",
-                          filename);
-                 continue;
-               }
-             read_lisp_symbol (infile, buffer);
-             if (strcmp (buffer, "quote"))
-               {
-                 fprintf (stderr, "## unparsable name in autoload in %s\n",
-                          filename);
-                 continue;
-               }
-             read_lisp_symbol (infile, buffer);
-             c = getc (infile);
-             if (c != ')')
-               {
-                 fprintf (stderr,
-                          "## unparsable quoted name in autoload in %s\n",
-                          filename);
-                 continue;
-               }
-           }
-         skip_white (infile);
-         c = getc (infile);
-         if (c != '\"')
-           {
-             fprintf (stderr, "## autoload of %s unparsable (%s)\n",
-                      buffer, filename);
-             continue;
-           }
-         read_c_string_or_comment (infile, 0, false, 0);
-
-         if (saved_string == 0)
-           if (!search_lisp_doc_at_eol (infile))
-             continue;
-       }
-
-#ifdef DEBUG
-      else if (! strcmp (buffer, "if")
-              || ! strcmp (buffer, "byte-code"))
-       continue;
-#endif
-
-      else
-       {
-#ifdef DEBUG
-         fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
-                  buffer, filename);
-#endif
-         continue;
-       }
-
-      /* At this point, we should either use the previous dynamic doc string in
-        saved_string or gobble a doc string from the input file.
-        In the latter case, the opening quote (and leading backslash-newline)
-        have already been read.  */
-
-      printf ("\037%c%s\n", type, buffer);
-      if (saved_string)
-       {
-         fputs (saved_string, stdout);
-         /* Don't use one dynamic doc string twice.  */
-         free (saved_string);
-         saved_string = 0;
-       }
-      else
-       read_c_string_or_comment (infile, 1, false, 0);
-    }
-  free (saved_string);
-  if (ferror (infile) || fclose (infile) != 0)
-    fatal ("%s: read error", filename);
-}
-
-
 /* make-docfile.c ends here */
diff --git a/lisp/startup.el b/lisp/startup.el
index b79467339b..727432a4cb 100644
--- a/lisp/startup.el
+++ b/lisp/startup.el
@@ -1056,6 +1056,9 @@ startup--load-user-init-file
     (when debug-on-error-should-be-set
       (setq debug-on-error debug-on-error-from-init-file))))
 
+(defvar lisp-directory nil
+  "Directory containing the Lisp files that come with GNU Emacs.")
+
 (defun command-line ()
   "A subroutine of `normal-top-level'.
 Amongst another things, it parses the command-line arguments."
@@ -1087,8 +1090,7 @@ command-line
   (let ((simple-file-name
         ;; Look for simple.el or simple.elc and use their directory
         ;; as the place where all Lisp files live.
-        (locate-file "simple" load-path (get-load-suffixes)))
-       lisp-dir)
+        (locate-file "simple" load-path (get-load-suffixes))))
     ;; Don't abort if simple.el cannot be found, but print a warning.
     ;; Although in most usage we are going to cryptically abort a moment
     ;; later anyway, due to missing required bidi data files (eg bug#13430).
@@ -1104,12 +1106,13 @@ command-line
          (unless (file-readable-p lispdir)
            (princ (format "Lisp directory %s not readable?" lispdir))
            (terpri)))
-      (setq lisp-dir (file-truename (file-name-directory simple-file-name)))
+      (setq lisp-directory
+            (file-truename (file-name-directory simple-file-name)))
       (setq load-history
            (mapcar (lambda (elt)
                      (if (and (stringp (car elt))
                               (not (file-name-absolute-p (car elt))))
-                         (cons (concat lisp-dir
+                         (cons (concat lisp-directory
                                        (car elt))
                                (cdr elt))
                        elt))
diff --git a/src/Makefile.in b/src/Makefile.in
index ea4a7207ff..76e4675c2a 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -642,13 +642,11 @@ $(pdmp):
 ## for the first time, this prevents any variation between configurations
 ## in the contents of the DOC file.
 ##
-$(etc)/DOC: lisp.mk $(libsrc)/make-docfile$(EXEEXT) $(doc_obj) $(lisp)
+$(etc)/DOC: $(libsrc)/make-docfile$(EXEEXT) $(doc_obj)
        $(AM_V_GEN)$(MKDIR_P) $(etc)
        $(AM_V_at)rm -f $(etc)/DOC
        $(AM_V_at)$(libsrc)/make-docfile -d $(srcdir) \
          $(SOME_MACHINE_OBJECTS) $(doc_obj) > $(etc)/DOC
-       $(AM_V_at)$(libsrc)/make-docfile -a $(etc)/DOC -d $(lispsource) \
-         $(shortlisp)
 
 $(libsrc)/make-docfile$(EXEEXT) $(libsrc)/make-fingerprint$(EXEEXT): \
   $(lib)/libgnu.a
diff --git a/src/doc.c b/src/doc.c
index 6be023bb93..17601c700e 100644
--- a/src/doc.c
+++ b/src/doc.c
@@ -84,16 +84,19 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool 
definition)
   char *from, *to, *name, *p, *p1;
   Lisp_Object file, pos;
   ptrdiff_t count = SPECPDL_INDEX ();
+  Lisp_Object dir;
   USE_SAFE_ALLOCA;
 
   if (FIXNUMP (filepos))
     {
       file = Vdoc_file_name;
+      dir = Vdoc_directory;
       pos = filepos;
     }
   else if (CONSP (filepos))
     {
       file = XCAR (filepos);
+      dir = Fsymbol_value (intern ("lisp-directory"));
       pos = XCDR (filepos);
     }
   else
@@ -101,7 +104,7 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool 
definition)
 
   EMACS_INT position = eabs (XFIXNUM (pos));
 
-  if (!STRINGP (Vdoc_directory))
+  if (!STRINGP (dir))
     return Qnil;
 
   if (!STRINGP (file))
@@ -113,7 +116,7 @@ get_doc_string (Lisp_Object filepos, bool unibyte, bool 
definition)
   Lisp_Object tem = Ffile_name_absolute_p (file);
   file = ENCODE_FILE (file);
   Lisp_Object docdir
-    = NILP (tem) ? ENCODE_FILE (Vdoc_directory) : empty_unibyte_string;
+    = NILP (tem) ? ENCODE_FILE (dir) : empty_unibyte_string;
   ptrdiff_t docdir_sizemax = SBYTES (docdir) + 1;
   if (will_dump_p ())
     docdir_sizemax = max (docdir_sizemax, sizeof sibling_etc);
diff --git a/src/lread.c b/src/lread.c
index 4992576414..55b3d473dc 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -1545,7 +1545,7 @@ Return t if the file exists and loads successfully.  */)
        message_with_string ("Loading %s...", file, 1);
     }
 
-  specbind (Qload_file_name, found_eff);
+  specbind (Qload_file_name, hist_file_name);
   specbind (Qload_true_file_name, found);
   specbind (Qinhibit_file_name_operation, Qnil);
   specbind (Qload_in_progress, Qt);
@@ -3224,23 +3224,6 @@ read1 (Lisp_Object readcharfun, int *pch, bool 
first_in_list)
                    Fstring_as_unibyte (AREF (tmp, COMPILED_BYTECODE)));
            }
 
-         if (COMPILED_DOC_STRING < ASIZE (tmp)
-             && EQ (AREF (tmp, COMPILED_DOC_STRING), make_fixnum (0)))
-           {
-             /* read_list found a docstring like '(#$ . 5521)' and treated it
-                as 0.  This placeholder 0 would lead to accidental sharing in
-                purecopy's hash-consing, so replace it with a (hopefully)
-                unique integer placeholder, which is negative so that it is
-                not confused with a DOC file offset (the USE_LSB_TAG shift
-                relies on the fact that VALMASK is one bit narrower than
-                INTMASK).  Eventually Snarf-documentation should replace the
-                placeholder with the actual docstring.  */
-             verify (INTMASK & ~VALMASK);
-             EMACS_UINT hash = ((XHASH (tmp) >> USE_LSB_TAG)
-                                | (INTMASK - INTMASK / 2));
-             ASET (tmp, COMPILED_DOC_STRING, make_ufixnum (hash));
-           }
-
          XSETPVECTYPE (vec, PVEC_COMPILED);
          return tmp;
        }
@@ -4208,31 +4191,13 @@ read_list (bool flag, Lisp_Object readcharfun)
 
       /* While building, if the list starts with #$, treat it specially.  */
       if (EQ (elt, Vload_file_name)
-         && ! NILP (elt)
-         && !NILP (Vpurify_flag))
+         && ! NILP (elt))
        {
-         if (NILP (Vdoc_file_name))
-           /* We have not yet called Snarf-documentation, so assume
-              this file is described in the DOC file
-              and Snarf-documentation will fill in the right value later.
-              For now, replace the whole list with 0.  */
-           doc_reference = 1;
-         else
-           /* We have already called Snarf-documentation, so make a relative
-              file name for this file, so it can be found properly
-              in the installed Lisp directory.
-              We don't use Fexpand_file_name because that would make
-              the directory absolute now.  */
-           {
-             AUTO_STRING (dot_dot_lisp, "../lisp/");
-             elt = concat2 (dot_dot_lisp, Ffile_name_nondirectory (elt));
-           }
+         if (!NILP (Vpurify_flag))
+           doc_reference = 0;
+         else if (load_force_doc_strings)
+           doc_reference = 2;
        }
-      else if (EQ (elt, Vload_file_name)
-              && ! NILP (elt)
-              && load_force_doc_strings)
-       doc_reference = 2;
-
       if (ch)
        {
          if (flag > 0)
@@ -4253,8 +4218,6 @@ read_list (bool flag, Lisp_Object readcharfun)
 
              if (ch == ')')
                {
-                 if (doc_reference == 1)
-                   return make_fixnum (0);
                  if (doc_reference == 2 && FIXNUMP (XCDR (val)))
                    {
                      char *saved = NULL;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]