bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PROPOSED 1/4] memset_explicit: new module


From: Paul Eggert
Subject: [PROPOSED 1/4] memset_explicit: new module
Date: Sun, 27 Nov 2022 20:55:40 -0800

* doc/posix-functions/memset_explicit.texi, lib/memset_explicit.c:
* m4/memset_explicit.m4, modules/memset_explicit:
* modules/memset_explicit-tests, tests/test-memset_explicit.c:
New files.
* lib/string.in.h (memset_explict): New decl.
* m4/string_h.m4 (gl_STRING_H, gl_STRING_H_REQUIRE_DEFAULTS)
(gl_STRING_H_DEFAULTS):
* modules/string (string.h):
Support memset_explicit.
---
 ChangeLog                                |  11 ++
 MODULES.html.sh                          |   8 +
 doc/gnulib.texi                          |   2 +
 doc/posix-functions/memset_explicit.texi |  43 +++++
 lib/memset_explicit.c                    |  62 +++++++
 lib/string.in.h                          |  17 ++
 m4/memset_explicit.m4                    |  20 +++
 m4/string_h.m4                           |   6 +-
 modules/memset_explicit                  |  31 ++++
 modules/memset_explicit-tests            |  14 ++
 modules/string                           |   2 +
 tests/test-memset_explicit.c             | 203 +++++++++++++++++++++++
 12 files changed, 417 insertions(+), 2 deletions(-)
 create mode 100644 doc/posix-functions/memset_explicit.texi
 create mode 100644 lib/memset_explicit.c
 create mode 100644 m4/memset_explicit.m4
 create mode 100644 modules/memset_explicit
 create mode 100644 modules/memset_explicit-tests
 create mode 100644 tests/test-memset_explicit.c

diff --git a/ChangeLog b/ChangeLog
index 14283da87e..80f8d2c9cd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,16 @@
 2022-11-27  Paul Eggert  <eggert@cs.ucla.edu>
 
+       memset_explicit: new module
+       * doc/posix-functions/memset_explicit.texi, lib/memset_explicit.c:
+       * m4/memset_explicit.m4, modules/memset_explicit:
+       * modules/memset_explicit-tests, tests/test-memset_explicit.c:
+       New files.
+       * lib/string.in.h (memset_explict): New decl.
+       * m4/string_h.m4 (gl_STRING_H, gl_STRING_H_REQUIRE_DEFAULTS)
+       (gl_STRING_H_DEFAULTS):
+       * modules/string (string.h):
+       Support memset_explicit.
+
        explicit_bzero: add poison
        * m4/string_h.m4 (gl_STRING_H): Poison explicit_bzero.
        This was inadvertently omitted when explicit_bzero was added.
diff --git a/MODULES.html.sh b/MODULES.html.sh
index 02cfe32f06..7222459c69 100755
--- a/MODULES.html.sh
+++ b/MODULES.html.sh
@@ -2386,6 +2386,14 @@ func_all_modules ()
   func_module stdckdint
   func_end_table
 
+  element="String handling <string.h>"
+  element=`printf "%s" "$element" | sed -e "$sed_lt" -e "$sed_gt"`
+  func_section_wrap c23_ext_string
+  func_wrap H3
+  func_echo "$element"
+
+  func_module memset_explicit
+
   element="Support for GNU multiple precision arithmetic"
   func_section_wrap gmp
   func_wrap H2
diff --git a/doc/gnulib.texi b/doc/gnulib.texi
index e1debc9742..eeb9b81f8a 100644
--- a/doc/gnulib.texi
+++ b/doc/gnulib.texi
@@ -1744,6 +1744,7 @@ problems are not worked around by Gnulib.
 * memcpy::
 * memmove::
 * memset::
+* memset_explicit::
 * mkdir::
 * mkdirat::
 * mkdtemp::
@@ -3036,6 +3037,7 @@ problems are not worked around by Gnulib.
 @include posix-functions/memcpy.texi
 @include posix-functions/memmove.texi
 @include posix-functions/memset.texi
+@include posix-functions/memset_explicit.texi
 @include posix-functions/mkdir.texi
 @include posix-functions/mkdirat.texi
 @include posix-functions/mkdtemp.texi
diff --git a/doc/posix-functions/memset_explicit.texi 
b/doc/posix-functions/memset_explicit.texi
new file mode 100644
index 0000000000..528cee4e2b
--- /dev/null
+++ b/doc/posix-functions/memset_explicit.texi
@@ -0,0 +1,43 @@
+@node memset_explicit
+@subsection @code{memset_explicit}
+@findex memset_explicit
+
+Documentation:
+@itemize
+@item
+@ifinfo
+@ref{Erasing Sensitive Data,,Erasing Sensitive Data,libc},
+@end ifinfo
+@ifnotinfo
+@url{https://www.gnu.org/software/libc/manual/html_node/Erasing-Sensitive-Data.html},
+@end ifnotinfo
+@c Not yet present:
+@c @item
+@c 
@uref{https://www.kernel.org/doc/man-pages/online/pages/man3/memset_explicit.3.html,,man
 memset_explicit}.
+@end itemize
+
+Gnulib module: memset_explicit
+
+The @code{memset_explicit} function is an approximation to what is
+needed, and does not suffice in general to erase information.
+Although calling @code{memset_explicit} should clear the memory in
+question, the information that was in memory may still be available
+elsewhere on the machine.  Proper implementation of information
+erasure requires support from levels below C code.
+
+Portability problems fixed by Gnulib:
+@itemize
+@item
+This function is missing on some platforms:
+glibc 2.36, FreeBSD 13.1, NetBSD 9.3, OpenBSD 7.2, macOS 13, Solaris 11.4, 
Android 13,
+and many other systems.
+@end itemize
+
+Portability problems not fixed by Gnulib:
+@itemize
+@item
+Although the module's implementation should set the memory on
+platforms compatible with GCC and on platforms using traditional
+linkers, it may not set the memory on non-GCC platforms that use
+whole-program optimization.
+@end itemize
diff --git a/lib/memset_explicit.c b/lib/memset_explicit.c
new file mode 100644
index 0000000000..27fb16a9ba
--- /dev/null
+++ b/lib/memset_explicit.c
@@ -0,0 +1,62 @@
+/* Erase sensitive data from memory.
+   Copyright 2022 Free Software Foundation, Inc.
+
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   This file is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+/* memset_s need this define */
+#if HAVE_MEMSET_S
+# define __STDC_WANT_LIB_EXT1__ 1
+#endif
+
+#include <string.h>
+
+#if defined _WIN32 && !defined __CYGWIN__
+# define WIN32_LEAN_AND_MEAN
+# include <windows.h>
+#endif
+
+/* Set S's bytes to C, where S has LEN bytes.  The compiler will not
+   optimize effects away, even if S is dead after the call.  */
+void *
+memset_explicit (void *s, int c, size_t len)
+{
+#if defined _WIN32 && !defined __CYGWIN__
+  if (!c)
+    return SecureZeroMemory (s, len);
+#endif
+#if HAVE_EXPLICIT_MEMSET
+  return explicit_memset (s, '\0', len);
+#elif HAVE_MEMSET_S
+  (void) memset_s (s, len, '\0', len);
+  return s;
+#elif defined __GNUC__ && !defined __clang__
+  return memset (s, c, len);
+  /* Compiler barrier.  */
+  __asm__ volatile ("" ::: "memory");
+#elif defined __clang__
+  return memset (s, c, len);
+  /* Compiler barrier.  */
+  /* With asm ("" ::: "memory") LLVM analyzes uses of 's' and finds that the
+     whole thing is dead and eliminates it.  Use 'g' to work around this
+     problem.  See <https://bugs.llvm.org/show_bug.cgi?id=15495#c11>.  */
+  __asm__ volatile ("" : : "g"(s) : "memory");
+#else
+  /* Invoke memset through a volatile function pointer.  This defeats compiler
+     optimizations.  */
+  void * (* const volatile volatile_memset) (void *, int, size_t) = memset;
+  return volatile_memset (s, c, len);
+#endif
+}
diff --git a/lib/string.in.h b/lib/string.in.h
index e56f6db0c9..21356914e2 100644
--- a/lib/string.in.h
+++ b/lib/string.in.h
@@ -347,6 +347,23 @@ _GL_WARN_ON_USE (memrchr, "memrchr is unportable - "
 # endif
 #endif
 
+/* Overwrite a block of memory.  The compiler will not optimize
+   effects away, even if the block is dead after the call.  */
+#if @GNULIB_MEMSET_EXPLICIT@
+# if ! @HAVE_MEMSET_EXPLICIT@
+_GL_FUNCDECL_SYS (memset_explicit, void *,
+                  (void *__dest, int __c, size_t __n) _GL_ARG_NONNULL ((1)));
+# endif
+_GL_CXXALIAS_SYS (memset_explicit, void *, (void *__dest, int __c, size_t 
__n));
+_GL_CXXALIASWARN (memset_explicit);
+#elif defined GNULIB_POSIXCHECK
+# undef memset_explicit
+# if HAVE_RAW_DECL_MEMSET_EXPLICIT
+_GL_WARN_ON_USE (memset_explicit, "memset_explicit is unportable - "
+                 "use gnulib module memset_explicit for portability");
+# endif
+#endif
+
 /* Find the first occurrence of C in S.  More efficient than
    memchr(S,C,N), at the expense of undefined behavior if C does not
    occur within N bytes.  */
diff --git a/m4/memset_explicit.m4 b/m4/memset_explicit.m4
new file mode 100644
index 0000000000..3d4dcb3095
--- /dev/null
+++ b/m4/memset_explicit.m4
@@ -0,0 +1,20 @@
+dnl Copyright 2022 Free Software Foundation, Inc.
+dnl This file is free software; the Free Software Foundation
+dnl gives unlimited permission to copy and/or distribute it,
+dnl with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_FUNC_MEMSET_EXPLICIT],
+[
+  AC_REQUIRE([gl_STRING_H_DEFAULTS])
+
+  AC_CHECK_FUNCS_ONCE([memset_explicit])
+  if test $ac_cv_func_memset_explicit = no; then
+    HAVE_MEMSET_EXPLICIT=0
+  fi
+])
+
+AC_DEFUN([gl_PREREQ_MEMSET_EXPLICIT],
+[
+  AC_CHECK_FUNCS([explicit_memset])
+  AC_CHECK_FUNCS_ONCE([memset_s])
+])
diff --git a/m4/string_h.m4 b/m4/string_h.m4
index df44e85e59..6069d4a752 100644
--- a/m4/string_h.m4
+++ b/m4/string_h.m4
@@ -5,7 +5,7 @@
 # gives unlimited permission to copy and/or distribute it,
 # with or without modifications, as long as this notice is preserved.
 
-# serial 34
+# serial 35
 
 # Written by Paul Eggert.
 
@@ -21,7 +21,7 @@ AC_DEFUN_ONCE([gl_STRING_H],
   dnl guaranteed by C89.
   gl_WARN_ON_USE_PREPARE([[#include <string.h>
     ]],
-    [explicit_bzero ffsl ffsll memmem mempcpy memrchr
+    [explicit_bzero ffsl ffsll memmem mempcpy memrchr memset_explicit
      rawmemchr stpcpy stpncpy strchrnul
      strdup strncat strndup strnlen strpbrk strsep strcasestr strtok_r
      strerror_r strerrorname_np sigabbrev_np sigdescr_np strsignal strverscmp])
@@ -55,6 +55,7 @@ AC_DEFUN([gl_STRING_H_REQUIRE_DEFAULTS],
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MEMMEM])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MEMPCPY])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MEMRCHR])
+    gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_MEMSET_EXPLICIT])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_RAWMEMCHR])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STPCPY])
     gl_MODULE_INDICATOR_INIT_VARIABLE([GNULIB_STPNCPY])
@@ -108,6 +109,7 @@ AC_DEFUN([gl_STRING_H_DEFAULTS],
   HAVE_FFSLL=1;                 AC_SUBST([HAVE_FFSLL])
   HAVE_DECL_MEMMEM=1;           AC_SUBST([HAVE_DECL_MEMMEM])
   HAVE_MEMPCPY=1;               AC_SUBST([HAVE_MEMPCPY])
+  HAVE_MEMSET_EXPLICIT=1;       AC_SUBST([HAVE_MEMSET_EXPLICIT])
   HAVE_DECL_MEMRCHR=1;          AC_SUBST([HAVE_DECL_MEMRCHR])
   HAVE_RAWMEMCHR=1;             AC_SUBST([HAVE_RAWMEMCHR])
   HAVE_STPCPY=1;                AC_SUBST([HAVE_STPCPY])
diff --git a/modules/memset_explicit b/modules/memset_explicit
new file mode 100644
index 0000000000..3290bd3679
--- /dev/null
+++ b/modules/memset_explicit
@@ -0,0 +1,31 @@
+Description:
+Erase sensitive data from memory.
+
+Files:
+lib/memset_explicit.c
+m4/memset_explicit.m4
+
+Depends-on:
+string
+
+configure.ac:
+gl_FUNC_MEMSET_EXPLICIT
+gl_CONDITIONAL([GL_COND_OBJ_MEMSET_EXPLICIT], [test $HAVE_MEMSET_EXPLICIT = 0])
+AM_COND_IF([GL_COND_OBJ_MEMSET_EXPLICIT], [
+  gl_PREREQ_MEMSET_EXPLICIT
+])
+gl_STRING_MODULE_INDICATOR([memset_explicit])
+
+Makefile.am:
+if GL_COND_OBJ_MEMSET_EXPLICIT
+lib_SOURCES += memset_explicit.c
+endif
+
+Include:
+<string.h>
+
+License:
+LGPLv2+
+
+Maintainer:
+all
diff --git a/modules/memset_explicit-tests b/modules/memset_explicit-tests
new file mode 100644
index 0000000000..3bb133fc11
--- /dev/null
+++ b/modules/memset_explicit-tests
@@ -0,0 +1,14 @@
+Files:
+tests/test-memset_explicit.c
+tests/signature.h
+tests/macros.h
+
+Depends-on:
+stdint
+vma-iter
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-memset_explicit
+check_PROGRAMS += test-memset_explicit
diff --git a/modules/string b/modules/string
index 6711ed74ab..70bf2b869f 100644
--- a/modules/string
+++ b/modules/string
@@ -55,6 +55,7 @@ string.h: string.in.h $(top_builddir)/config.status 
$(CXXDEFS_H) $(ARG_NONNULL_H
              -e 's/@''GNULIB_MEMMEM''@/$(GNULIB_MEMMEM)/g' \
              -e 's/@''GNULIB_MEMPCPY''@/$(GNULIB_MEMPCPY)/g' \
              -e 's/@''GNULIB_MEMRCHR''@/$(GNULIB_MEMRCHR)/g' \
+             -e 's/@''GNULIB_MEMSET_EXPLICIT''@/$(GNULIB_MEMSET_EXPLICIT)/g' \
              -e 's/@''GNULIB_RAWMEMCHR''@/$(GNULIB_RAWMEMCHR)/g' \
              -e 's/@''GNULIB_STPCPY''@/$(GNULIB_STPCPY)/g' \
              -e 's/@''GNULIB_STPNCPY''@/$(GNULIB_STPNCPY)/g' \
@@ -86,6 +87,7 @@ string.h: string.in.h $(top_builddir)/config.status 
$(CXXDEFS_H) $(ARG_NONNULL_H
              -e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \
              -e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \
              -e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \
+             -e 's|@''HAVE_MEMSET_EXPLICIT''@|$(HAVE_MEMSET_EXPLICIT)|g' \
              -e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \
              -e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \
              -e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \
diff --git a/tests/test-memset_explicit.c b/tests/test-memset_explicit.c
new file mode 100644
index 0000000000..29d2730ac9
--- /dev/null
+++ b/tests/test-memset_explicit.c
@@ -0,0 +1,203 @@
+/* Test memset_explicit.
+   Copyright 2020-2022 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <bruno@clisp.org>, 2020.  */
+/* Adapted for memset_explicit by Paul Eggert <eggert@cs.ucla.edu>, 2022.  */
+
+#include <config.h>
+
+/* Specification.  */
+#include <string.h>
+
+#include "signature.h"
+SIGNATURE_CHECK (memset_explicit, void, (void *, int, size_t));
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "vma-iter.h"
+#include "macros.h"
+
+#define SECRET "xyzzy1729"
+#define SECRET_SIZE 9
+
+static char zero[SECRET_SIZE] = { 0 };
+
+/* Enable this to verify that the test is effective.  */
+#if 0
+# define memset_explicit(a, c, n)  memset (a, c, n)
+#endif
+
+/* =================== Verify operation on static memory =================== */
+
+static char stbuf[SECRET_SIZE];
+
+static void
+test_static (void)
+{
+  memcpy (stbuf, SECRET, SECRET_SIZE);
+  memset_explicit (stbuf, 0, SECRET_SIZE);
+  ASSERT (memcmp (zero, stbuf, SECRET_SIZE) == 0);
+  for (int i = 1; i <= UCHAR_MAX; i++)
+    {
+      char checkbuf[SECRET_SIZE];
+      memset (checkbuf, i, SECRET_SIZE);
+      memset_explicit (stbuf, i, SECRET_SIZE);
+      ASSERT (memcmp (checkbuf, stbuf, SECRET_SIZE) == 0);
+    }
+}
+
+/* =============== Verify operation on heap-allocated memory =============== */
+
+/* Test whether an address range is mapped in memory.  */
+#if VMA_ITERATE_SUPPORTED
+
+struct locals
+{
+  uintptr_t range_start;
+  uintptr_t range_end;
+};
+
+static int
+vma_iterate_callback (void *data, uintptr_t start, uintptr_t end,
+                      unsigned int flags)
+{
+  struct locals *lp = (struct locals *) data;
+
+  /* Remove from [range_start, range_end) the part at the beginning or at the
+     end that is covered by [start, end).  */
+  if (start <= lp->range_start && end > lp->range_start)
+    lp->range_start = (end < lp->range_end ? end : lp->range_end);
+  if (start < lp->range_end && end >= lp->range_end)
+    lp->range_end = (start > lp->range_start ? start : lp->range_start);
+
+  return 0;
+}
+
+static bool
+is_range_mapped (uintptr_t range_start, uintptr_t range_end)
+{
+  struct locals l;
+
+  l.range_start = range_start;
+  l.range_end = range_end;
+  vma_iterate (vma_iterate_callback, &l);
+  return l.range_start == l.range_end;
+}
+
+#else
+
+static bool
+is_range_mapped (uintptr_t range_start, uintptr_t range_end)
+{
+  return true;
+}
+
+#endif
+
+static void
+test_heap (void)
+{
+  char *heapbuf = (char *) malloc (SECRET_SIZE);
+  ASSERT (heapbuf);
+  uintptr_t volatile addr = (uintptr_t) heapbuf;
+  memcpy (heapbuf, SECRET, SECRET_SIZE);
+  memset_explicit (heapbuf, 0, SECRET_SIZE);
+  free (heapbuf);
+  heapbuf = (char *) addr;
+  if (is_range_mapped (addr, addr + SECRET_SIZE))
+    {
+      /* some implementation could override freed memory by canaries so
+         compare against secret */
+      ASSERT (memcmp (heapbuf, SECRET, SECRET_SIZE) != 0);
+      printf ("test_heap: address range is still mapped after free().\n");
+    }
+  else
+    printf ("test_heap: address range is unmapped after free().\n");
+}
+
+/* =============== Verify operation on stack-allocated memory =============== 
*/
+
+/* There are two passes:
+     1. Put a secret in memory and invoke memset_explicit on it.
+     2. Verify that the memory has been erased.
+   Implement them in the same function, so that they access the same memory
+   range on the stack.  Declare the local scalars to be volatile so they
+   are not optimized away.  That way, the test verifies that the compiler
+   does not eliminate a call to memset_explicit, even if data flow analysis
+   reveals that the stack area is dead at the end of the function.  */
+static bool _GL_ATTRIBUTE_NOINLINE
+do_secret_stuff (int volatile pass, char *volatile *volatile last_stackbuf)
+{
+  char stackbuf[SECRET_SIZE];
+  if (pass == 1)
+    {
+      memcpy (stackbuf, SECRET, SECRET_SIZE);
+      memset_explicit (stackbuf, 0, SECRET_SIZE);
+      *last_stackbuf = stackbuf;
+      return false;
+    }
+  else /* pass == 2 */
+    {
+      /* Use *last_stackbuf here, because stackbuf may be allocated at a
+         different address than *last_stackbuf.  This can happen
+         when the compiler splits this function into different functions,
+         one for pass == 1 and one for pass != 1.  */
+      return memcmp (zero, *last_stackbuf, SECRET_SIZE) != 0;
+    }
+}
+
+static void
+test_stack (void)
+{
+  int count = 0;
+  int repeat;
+  char *volatile last_stackbuf;
+
+  for (repeat = 2 * 1000; repeat > 0; repeat--)
+    {
+      /* This odd way of writing two consecutive statements
+           do_secret_stuff (1, &last_stackbuf);
+           count += do_secret_stuff (2, &last_stackbuf);
+         ensures that the two do_secret_stuff calls are performed with the same
+         stack pointer value, on m68k.  */
+      if ((repeat % 2) == 0)
+        do_secret_stuff (1, &last_stackbuf);
+      else
+        count += do_secret_stuff (2, &last_stackbuf);
+    }
+  /* If memset_explicit works, count is near 0.  (It may be > 0 if there were
+     some asynchronous signal invocations between the two calls of
+     do_secret_stuff.)
+     If memset_explicit is optimized away by the compiler, count comes out as
+     approximately 1000.  */
+  printf ("test_stack: count = %d\n", count);
+  ASSERT (count < 50);
+}
+
+/* ========================================================================== 
*/
+
+int
+main ()
+{
+  test_static ();
+  test_heap ();
+  test_stack ();
+
+  return 0;
+}
-- 
2.37.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]