From 4cb6b1493983e76eafa2da3aabff87510f2d63d5 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Sun, 13 Dec 2020 16:48:26 -0800 Subject: [PATCH 1/5] readlink, readlinkat: add ERANGE portability Fix some portability issues with Gnulib's readlink and readlinkat, notably mostly working around the ERANGE problem in AIX and HP-UX. * doc/posix-functions/readlink.texi: * doc/posix-functions/readlinkat.texi: ERANGE problem is mostly fixed now. Mention AIX problem with trailing / and EINVAL. Lessen differences between these two files. * lib/readlink.c (rpl_readlink): * lib/readlinkat.c (rpl_readlinkat): If stat ("FILE/", ...) reports EOVERFLOW, treat FILE/ as an existing directory. Mostly work around READLINK_TRUNCATE BUG. Lessen spurious differences between the readlink and readlinkat code. * lib/readlinkat.c (rpl_readlinkat): Fix bug where stat was used where fstatat was intended. * m4/readlink.m4 (gl_FUNC_READLINK): Rename gl_cv_func_readlink_works to gl_cv_func_readlink_trailing_slash to identify readlink problems more precisely. All uses changed. Guess no on AIX or HP-UX for this variable. Add check for whether readlink truncates results, and define new macro READLINK_TRUCATE_BUG accordingly. * m4/readlinkat.m4 (gl_FUNC_READLINKAT): Also check gl_cv_func_readlink_trailing_slash when deciding whether to replace readlinkat. * modules/readlinkat (Depends-on): Most dependencies are also needed if replacing readlinkat. fstatat is different, as it is needed only if replacing an existing readlinkat. --- ChangeLog | 29 ++++++++++++++ doc/posix-functions/readlink.texi | 41 ++++++++++++++----- doc/posix-functions/readlinkat.texi | 35 ++++++++++++----- lib/readlink.c | 48 ++++++++++++++++++----- lib/readlinkat.c | 47 ++++++++++++++++++---- m4/readlink.m4 | 61 ++++++++++++++++++++++++----- m4/readlinkat.m4 | 13 +++--- modules/readlinkat | 20 +++++----- 8 files changed, 230 insertions(+), 64 deletions(-) diff --git a/ChangeLog b/ChangeLog index b3f62960f..2ebf31995 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,32 @@ +2020-12-13 Paul Eggert + + readlink, readlinkat: add ERANGE portability + Fix some portability issues with Gnulib's readlink and readlinkat, + notably mostly working around the ERANGE problem in AIX and HP-UX. + * doc/posix-functions/readlink.texi: + * doc/posix-functions/readlinkat.texi: + ERANGE problem is mostly fixed now. Mention AIX problem with + trailing / and EINVAL. Lessen differences between these two files. + * lib/readlink.c (rpl_readlink): + * lib/readlinkat.c (rpl_readlinkat): + If stat ("FILE/", ...) reports EOVERFLOW, treat FILE/ as an + existing directory. Mostly work around READLINK_TRUNCATE BUG. + Lessen spurious differences between the readlink and readlinkat code. + * lib/readlinkat.c (rpl_readlinkat): + Fix bug where stat was used where fstatat was intended. + * m4/readlink.m4 (gl_FUNC_READLINK): + Rename gl_cv_func_readlink_works to gl_cv_func_readlink_trailing_slash + to identify readlink problems more precisely. All uses changed. + Guess no on AIX or HP-UX for this variable. + Add check for whether readlink truncates results, + and define new macro READLINK_TRUCATE_BUG accordingly. + * m4/readlinkat.m4 (gl_FUNC_READLINKAT): + Also check gl_cv_func_readlink_trailing_slash when deciding + whether to replace readlinkat. + * modules/readlinkat (Depends-on): Most dependencies are also + needed if replacing readlinkat. fstatat is different, as it + is needed only if replacing an existing readlinkat. + 2020-12-13 Bruno Haible spawn-pipe: Fix hanging processes on Windows (regression 2020-11-30). diff --git a/doc/posix-functions/readlink.texi b/doc/posix-functions/readlink.texi index b2ccceee7..d4421380a 100644 --- a/doc/posix-functions/readlink.texi +++ b/doc/posix-functions/readlink.texi @@ -9,34 +9,55 @@ Gnulib module: readlink Portability problems fixed by Gnulib: @itemize @item -Some platforms mistakenly succeed on @code{readlink("link/",buf,len)}: +This function is missing on some platforms: +mingw, MSVC 14. +@item +Some platforms mistakenly succeed on file names ending in @file{/}: FreeBSD 7.2, Solaris 9, Mac OS X 10.10. @item -On some platforms, @code{readlink} returns @code{int} instead of +On some platforms, this function returns @code{int} instead of @code{ssize_t}: glibc 2.4, FreeBSD 6.0, OpenBSD 6.7, Cygwin 1.5.x, AIX 7.1. +@end itemize + +Portability problems mostly fixed by Gnulib: +@itemize @item -This function is missing on some platforms: -mingw, MSVC 14. +On some platforms, this function fails and sets @code{errno} to +@code{ERANGE} rather than returning truncated contents: +AIX 7.2, HP-UX 11. +The Gnulib replacement normally works as POSIX requires by returning +the truncated contents. However, if the full link contents are +unreasonably large (more than 4000 bytes) the replacement clears the +entire buffer and returns the buffer size; although this is not a +complete fix, it suffices for typical callers, which ignore the buffer +contents anyway. @end itemize Portability problems not fixed by Gnulib: @itemize @item -When @code{readlink} is called on a directory: In the case of NFS mounted +This function always fails on platforms that don't support symlinks: +mingw, MSVC 14. +@item +When this function is called on a directory: In the case of NFS mounted directories, Cygwin sets @code{errno} to @code{ENOENT} or @code{EIO} instead of @code{EINVAL}. To avoid this problem, check for a directory before calling -@code{readlink}. +this function. @item -When @code{readlink} is called on a file that is not a symbolic link: +When this function is called on a file that is not a symbolic link: IRIX may set @code{errno} to @code{ENXIO} instead of @code{EINVAL}. Cygwin may set errno to @code{EACCES} instead of @code{EINVAL}. @item +When this function fails because it is called on an existing +non-directory's name concatenated to @file{/}, +it sets @code{errno} to @code{EINVAL}: +AIX 7.2. +@item Symlink contents do not always have a trailing null byte, and there is no indication if symlink contents were truncated if the return value -matches the length. Furthermore, AIX 5.1 and HP-UX 11 set -@code{errno} to @code{ERANGE} rather than returning truncated -contents, and Linux sets @code{errno} to @code{EINVAL} if the +matches the length. Furthermore, +Linux sets @code{errno} to @code{EINVAL} if the requested length is zero. Use the gnulib module @code{areadlink} for improved ability to read symlink contents. @end itemize diff --git a/doc/posix-functions/readlinkat.texi b/doc/posix-functions/readlinkat.texi index 246894605..2cd2e7be5 100644 --- a/doc/posix-functions/readlinkat.texi +++ b/doc/posix-functions/readlinkat.texi @@ -13,13 +13,26 @@ This function is missing on some platforms: glibc 2.3.6, Mac OS X 10.9, FreeBSD 6.0, NetBSD 5.0, OpenBSD 3.8, Minix 3.1.8, AIX 5.1, HP-UX 11, IRIX 6.5, Solaris 10, Cygwin 1.5.x, mingw, MSVC 14, Android 4.4. But the replacement function is not safe to be used in libraries and is not multithread-safe. @item -Some platforms mistakenly succeed on @code{readlink("link/",buf,len)}: -Mac OS X 10.10. +Some platforms mistakenly succeed on file names ending in @file{/}: +OS X 10.10. @item -On some platforms, @code{readlinkat} returns @code{int} instead of +On some platforms, this function returns @code{int} instead of @code{ssize_t}: AIX 7.1. +@end itemize + +Portability problems mostly fixed by Gnulib: +@itemize @item +On some platforms, this function fails and sets @code{errno} to +@code{ERANGE} rather than returning truncated contents: +AIX 7.2. +The Gnulib replacement normally works as POSIX requires by returning +the truncated contents. However, if the full link contents are +unreasonably large (more than 4000 bytes) the replacement clears the +entire buffer and returns the buffer size; although this is not a +complete fix, it suffices for typical callers, which ignore the buffer +contents anyway. @end itemize Portability problems not fixed by Gnulib: @@ -28,20 +41,24 @@ Portability problems not fixed by Gnulib: This function always fails on platforms that don't support symlinks: mingw, MSVC 14. @item -When @code{readlink} is called on a directory: In the case of NFS mounted +When this function is called on a directory: In the case of NFS mounted directories, Cygwin sets @code{errno} to @code{ENOENT} or @code{EIO} instead of @code{EINVAL}. To avoid this problem, check for a directory before calling -@code{readlink}. +this function. @item -When @code{readlink} is called on a file that is not a symbolic link: +When this function is called on a file that is not a symbolic link: IRIX may set @code{errno} to @code{ENXIO} instead of @code{EINVAL}. Cygwin may set errno to @code{EACCES} instead of @code{EINVAL}. @item +When this function fails because it is called on an existing +non-directory's name concatenated to @file{/}, +it sets @code{errno} to @code{EINVAL}: +AIX 7.2. +@item Symlink contents do not always have a trailing null byte, and there is no indication if symlink contents were truncated if the return value -matches the length. Furthermore, AIX 5.1 and HP-UX 11 set -@code{errno} to @code{ERANGE} rather than returning truncated -contents, and Linux sets @code{errno} to @code{EINVAL} if the +matches the length. Furthermore, +Linux sets @code{errno} to @code{EINVAL} if the requested length is zero. Use the gnulib module @code{areadlink} for improved ability to read symlink contents. @end itemize diff --git a/lib/readlink.c b/lib/readlink.c index 4d392ef69..531913eba 100644 --- a/lib/readlink.c +++ b/lib/readlink.c @@ -1,4 +1,4 @@ -/* Stub for readlink(). +/* Read the contents of a symbolic link. Copyright (C) 2003-2007, 2009-2020 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify @@ -29,7 +29,7 @@ such as DJGPP 2.03 and mingw32. */ ssize_t -readlink (const char *name, char *buf _GL_UNUSED, +readlink (char const *file, char *buf _GL_UNUSED, size_t bufsize _GL_UNUSED) { struct stat statbuf; @@ -37,7 +37,7 @@ readlink (const char *name, char *buf _GL_UNUSED, /* In general we should use lstat() here, not stat(). But on platforms without symbolic links, lstat() - if it exists - would be equivalent to stat(), therefore we can use stat(). This saves us a configure check. */ - if (stat (name, &statbuf) >= 0) + if (stat (file, &statbuf) >= 0) errno = EINVAL; return -1; } @@ -51,24 +51,54 @@ readlink (const char *name, char *buf _GL_UNUSED, for Solaris 9. */ ssize_t -rpl_readlink (const char *name, char *buf, size_t bufsize) +rpl_readlink (char const *file, char *buf, size_t bufsize) { # if READLINK_TRAILING_SLASH_BUG - size_t len = strlen (name); - if (len && name[len - 1] == '/') + size_t file_len = strlen (file); + if (file_len && file[file_len - 1] == '/') { - /* Even if name without the slash is a symlink to a directory, + /* Even if FILE without the slash is a symlink to a directory, both lstat() and stat() must resolve the trailing slash to the directory rather than the symlink. We can therefore safely use stat() to distinguish between EINVAL and ENOTDIR/ENOENT, avoiding extra overhead of rpl_lstat(). */ struct stat st; - if (stat (name, &st) == 0) + if (stat (file, &st) == 0 || errno == EOVERFLOW) errno = EINVAL; return -1; } # endif /* READLINK_TRAILING_SLASH_BUG */ - return readlink (name, buf, bufsize); + + ssize_t r = readlink (file, buf, bufsize); + +# if READLINK_TRUNCATE_BUG + if (r < 0 && errno == ERANGE) + { + /* Try again with a bigger buffer. This is just for test cases; + real code invariably discards short reads. */ + char stackbuf[4032]; + r = readlink (file, stackbuf, sizeof stackbuf); + if (r < 0) + { + if (errno == ERANGE) + { + /* Clear the buffer, which is good enough for real code. + Thankfully, no test cases try short reads of enormous + symlinks and what would be the point anyway? */ + r = bufsize; + memset (buf, 0, r); + } + } + else + { + if (bufsize < r) + r = bufsize; + memcpy (buf, stackbuf, r); + } + } +# endif + + return r; } #endif /* HAVE_READLINK */ diff --git a/lib/readlinkat.c b/lib/readlinkat.c index 68ec65ebf..7a41208eb 100644 --- a/lib/readlinkat.c +++ b/lib/readlinkat.c @@ -28,10 +28,11 @@ #if HAVE_READLINKAT +# undef fstatat # undef readlinkat ssize_t -rpl_readlinkat (int fd, char const *file, char *buf, size_t len) +rpl_readlinkat (int fd, char const *file, char *buf, size_t bufsize) { # if READLINK_TRAILING_SLASH_BUG size_t file_len = strlen (file); @@ -40,15 +41,45 @@ rpl_readlinkat (int fd, char const *file, char *buf, size_t len) /* Even if FILE without the slash is a symlink to a directory, both lstat() and stat() must resolve the trailing slash to the directory rather than the symlink. We can therefore - safely use stat() to distinguish between EINVAL and - ENOTDIR/ENOENT, avoiding extra overhead of rpl_lstat(). */ + safely use fstatat(..., 0) to distinguish between EINVAL and + ENOTDIR/ENOENT, avoiding extra overhead of rpl_fstatat(). */ struct stat st; - if (stat (file, &st) == 0) + if (fstatat (fd, file, &st, 0) == 0 || errno == EOVERFLOW) errno = EINVAL; return -1; } # endif /* READLINK_TRAILING_SLASH_BUG */ - return readlinkat (fd, file, buf, len); + + ssize_t r = readlinkat (fd, file, buf, bufsize); + +# if READLINK_TRUNCATE_BUG + if (r < 0 && errno == ERANGE) + { + /* Try again with a bigger buffer. This is just for test cases; + real code invariably discards short reads. */ + char stackbuf[4032]; + r = readlinkat (fd, file, stackbuf, sizeof stackbuf); + if (r < 0) + { + if (errno == ERANGE) + { + /* Clear the buffer, which is good enough for real code. + Thankfully, no test cases try short reads of enormous + symlinks and what would be the point anyway? */ + r = bufsize; + memset (buf, 0, r); + } + } + else + { + if (bufsize < r) + r = bufsize; + memcpy (buf, stackbuf, r); + } + } +# endif + + return r; } #else @@ -61,7 +92,7 @@ rpl_readlinkat (int fd, char const *file, char *buf, size_t len) readlinkat worthless since readlink does not guarantee a NUL-terminated buffer. Assume this was a bug in POSIX. */ -/* Read the contents of symlink FILE into buffer BUF of size LEN, in the +/* Read the contents of symlink FILE into buffer BUF of size BUFSIZE, in the directory open on descriptor FD. If possible, do it without changing the working directory. Otherwise, resort to using save_cwd/fchdir, then readlink/restore_cwd. If either the save_cwd or the restore_cwd @@ -69,8 +100,8 @@ rpl_readlinkat (int fd, char const *file, char *buf, size_t len) # define AT_FUNC_NAME readlinkat # define AT_FUNC_F1 readlink -# define AT_FUNC_POST_FILE_PARAM_DECLS , char *buf, size_t len -# define AT_FUNC_POST_FILE_ARGS , buf, len +# define AT_FUNC_POST_FILE_PARAM_DECLS , char *buf, size_t bufsize +# define AT_FUNC_POST_FILE_ARGS , buf, bufsize # define AT_FUNC_RESULT ssize_t # include "at-func.c" # undef AT_FUNC_NAME diff --git a/m4/readlink.m4 b/m4/readlink.m4 index 9aa9e46da..c7f436a31 100644 --- a/m4/readlink.m4 +++ b/m4/readlink.m4 @@ -1,4 +1,4 @@ -# readlink.m4 serial 15 +# readlink.m4 serial 16 dnl Copyright (C) 2003, 2007, 2009-2020 Free Software Foundation, Inc. dnl This file is free software; the Free Software Foundation dnl gives unlimited permission to copy and/or distribute it, @@ -23,7 +23,7 @@ AC_DEFUN([gl_FUNC_READLINK], dnl Solaris 9 ignores trailing slash. dnl FreeBSD 7.2 dereferences only one level of links with trailing slash. AC_CACHE_CHECK([whether readlink handles trailing slash correctly], - [gl_cv_func_readlink_works], + [gl_cv_func_readlink_trailing_slash], [# We have readlink, so assume ln -s works. ln -s conftest.no-such conftest.link ln -s conftest.link conftest.lnk2 @@ -32,18 +32,22 @@ AC_DEFUN([gl_FUNC_READLINK], [[#include ]], [[char buf[20]; return readlink ("conftest.lnk2/", buf, sizeof buf) != -1;]])], - [gl_cv_func_readlink_works=yes], [gl_cv_func_readlink_works=no], + [gl_cv_func_readlink_trailing_slash=yes], + [gl_cv_func_readlink_trailing_slash=no], [case "$host_os" in - # Guess yes on Linux systems. - linux-* | linux) gl_cv_func_readlink_works="guessing yes" ;; - # Guess yes on glibc systems. - *-gnu* | gnu*) gl_cv_func_readlink_works="guessing yes" ;; - # If we don't know, obey --enable-cross-guesses. - *) gl_cv_func_readlink_works="$gl_cross_guess_normal" ;; + # Guess yes on Linux or glibc systems. + linux-* | linux | *-gnu* | gnu*) + gl_cv_func_readlink_trailing_slash="guessing yes" ;; + # Guess no on AIX or HP-UX. + aix* | hpux*) + gl_cv_func_readlink_trailing_slash="guessing no" ;; + # If we don't know, obey --enable-cross-guesses. + *) + gl_cv_func_readlink_trailing_slash="$gl_cross_guess_normal" ;; esac ]) rm -f conftest.link conftest.lnk2]) - case "$gl_cv_func_readlink_works" in + case "$gl_cv_func_readlink_trailing_slash" in *yes) if test "$gl_cv_decl_readlink_works" != yes; then REPLACE_READLINK=1 @@ -55,6 +59,43 @@ AC_DEFUN([gl_FUNC_READLINK], REPLACE_READLINK=1 ;; esac + + AC_CACHE_CHECK([whether readlink truncates results correctly], + [gl_cv_func_readlink_truncate], + [# We have readlink, so assume ln -s works. + ln -s ab conftest.link + AC_RUN_IFELSE( + [AC_LANG_PROGRAM( + [[#include +]], [[char c; + return readlink ("conftest.link", &c, 1) != 1;]])], + [gl_cv_func_readlink_truncate=yes], + [gl_cv_func_readlink_truncate=no], + [case "$host_os" in + # Guess yes on Linux or glibc systems. + linux-* | linux | *-gnu* | gnu*) + gl_cv_func_readlink_truncate="guessing yes" ;; + # Guess no on AIX or HP-UX. + aix* | hpux*) + gl_cv_func_readlink_truncate="guessing no" ;; + # If we don't know, obey --enable-cross-guesses. + *) + gl_cv_func_readlink_truncate="$gl_cross_guess_normal" ;; + esac + ]) + rm -f conftest.link conftest.lnk2]) + case $gl_cv_func_readlink_truncate in + *yes) + if test "$gl_cv_decl_readlink_works" != yes; then + REPLACE_READLINK=1 + fi + ;; + *) + AC_DEFINE([READLINK_TRUNCATE_BUG], [1], [Define to 1 if readlink + sets errno instead of truncating a too-long link.]) + REPLACE_READLINK=1 + ;; + esac fi ]) diff --git a/m4/readlinkat.m4 b/m4/readlinkat.m4 index 6ef1f590c..73e343f74 100644 --- a/m4/readlinkat.m4 +++ b/m4/readlinkat.m4 @@ -1,4 +1,4 @@ -# serial 5 +# serial 6 # See if we need to provide readlinkat replacement. dnl Copyright (C) 2009-2020 Free Software Foundation, Inc. @@ -26,13 +26,10 @@ AC_DEFUN([gl_FUNC_READLINKAT], ssize_t readlinkat (int, char const *, char *, size_t);]])], [gl_cv_decl_readlinkat_works=yes], [gl_cv_decl_readlinkat_works=no])]) - # Assume readinkat has the same trailing slash bug as readlink, - # as is the case on Mac Os X 10.10 - case "$gl_cv_func_readlink_works" in - *yes) - if test "$gl_cv_decl_readlinkat_works" != yes; then - REPLACE_READLINKAT=1 - fi + # Assume readlinkat has the same bugs as readlink, + # as is the case on OS X 10.10 with trailing slashes. + case $gl_cv_decl_readlinkat_works,$gl_cv_func_readlink_trailing_slash,$gl_cv_func_readlink_truncate in + *yes,*yes,*yes) ;; *) REPLACE_READLINKAT=1 diff --git a/modules/readlinkat b/modules/readlinkat index 3bc723965..171ee31e0 100644 --- a/modules/readlinkat +++ b/modules/readlinkat @@ -9,16 +9,16 @@ m4/readlinkat.m4 Depends-on: unistd extensions -at-internal [test $HAVE_READLINKAT = 0] -errno [test $HAVE_READLINKAT = 0] -fchdir [test $HAVE_READLINKAT = 0] -fcntl-h [test $HAVE_READLINKAT = 0] -filename [test $HAVE_READLINKAT = 0] -openat-die [test $HAVE_READLINKAT = 0] -openat-h [test $HAVE_READLINKAT = 0] -save-cwd [test $HAVE_READLINKAT = 0] -stat [test $HAVE_READLINKAT = 0] -readlink [test $HAVE_READLINKAT = 0] +at-internal [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +errno [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +fchdir [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +fcntl-h [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +filename [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +fstatat [test $HAVE_READLINKAT = 1 && test $REPLACE_READLINKAT = 1] +openat-die [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +openat-h [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +save-cwd [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] +readlink [test $HAVE_READLINKAT = 0 || test $REPLACE_READLINKAT = 1] configure.ac: gl_FUNC_READLINKAT -- 2.27.0