coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 10/12] cksum: support digest detection for tagged format


From: Pádraig Brady
Subject: [PATCH 10/12] cksum: support digest detection for tagged format
Date: Sun, 12 Sep 2021 19:14:04 +0100

Support `cksum --check FILE` without having to specify a digest
algorithm, allowing for more generic file check instructions.
This also supports mixed digest checksum files, supporting
more robust multi digest checks.

* src/digest.c (algorithm_from_tag): A new function to
identify the digest algorithm from a tagged format line.
(split3): Set the algorithm depending on tag, and update
the expected digest length accordingly.
* tests/misc/cksum-c.sh: Add a new test.
* tests/local.mk: Reference the new test.
* tests/misc/md5sum.pl: Adjust to more generic error.
* tests/misc/sha1sum.pl: Likewise.
* doc/coreutils.texi (md5sum invocation): Mention the new -c feature.
* NEWS: Mention the new feature.
---
 NEWS                  |  3 ++
 doc/coreutils.texi    |  6 ++++
 src/digest.c          | 69 +++++++++++++++++++++++++++++++++++++++++--
 tests/local.mk        |  1 +
 tests/misc/cksum-c.sh | 56 +++++++++++++++++++++++++++++++++++
 tests/misc/md5sum.pl  |  8 ++---
 tests/misc/sha1sum.pl |  6 ++--
 7 files changed, 139 insertions(+), 10 deletions(-)
 create mode 100755 tests/misc/cksum-c.sh

diff --git a/NEWS b/NEWS
index 8936ccbe0..3860abfed 100644
--- a/NEWS
+++ b/NEWS
@@ -95,6 +95,9 @@ GNU coreutils NEWS                                    -*- 
outline -*-
 
   cksum -a now supports the 'sm3' argument, to use the SM3 digest algorithm.
 
+  cksum --check now supports auto detecting the digest type to use,
+  when verifying tagged format checksums.
+
   expr and factor now support bignums on all platforms.
 
   ls --classify now supports the "always", "auto", or "never" flags,
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 578273746..cc690fefe 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -4079,10 +4079,16 @@ Read file names and checksum information (not data) 
from each
 whether the checksums match the contents of the named files.
 The input to this mode of @command{md5sum} is usually the output of
 a prior, checksum-generating run of @samp{md5sum}.
+
 Three input formats are supported.  Either the default output
 format described above, the @option{--tag} output format,
 or the BSD reversed mode format which is similar to the default mode,
 but doesn't use a character to distinguish binary and text modes.
+
+For the @command{cksum} command, the @option{--check} option
+supports auto-detecting the digest algorithm to use,
+when presented with checksum information in the @option{--tag} output format.
+
 Output with @option{--zero} enabled is not supported by @option{--check}.
 @sp 1
 For each such line, @command{md5sum} reads the named file and computes its
diff --git a/src/digest.c b/src/digest.c
index 633e62487..1a22e0a8d 100644
--- a/src/digest.c
+++ b/src/digest.c
@@ -311,6 +311,7 @@ static int const algorithm_bits[] =
 verify (ARRAY_CARDINALITY (algorithm_bits)
         == ARRAY_CARDINALITY (algorithm_args));
 
+static bool algorithm_specified = false;
 static enum Algorithm cksum_algorithm = crc;
 static sumfn cksumfns[]=
 {
@@ -629,6 +630,45 @@ bsd_split_3 (char *s, size_t s_len, unsigned char 
**hex_digest,
   return hex_digits (*hex_digest);
 }
 
+#if HASH_ALGO_CKSUM
+/* Return the corresponding Algorithm for the string S,
+   or -1 for no match.  */
+
+static ptrdiff_t
+algorithm_from_tag (char *s)
+{
+  /* Limit check size to this length for perf reasons.  */
+  static size_t max_tag_len;
+  if (! max_tag_len)
+    {
+      char const * const * tag = algorithm_tags;
+      while (*tag)
+        {
+          size_t tag_len = strlen (*tag++);
+          max_tag_len = MAX (tag_len, max_tag_len);
+        }
+    }
+
+  size_t i = 0;
+
+  /* Find end of tag */
+  while (i < max_tag_len && s[i] && ! ISWHITE (s[i])
+         && s[i] != '-' && s[i] != '(')
+    ++i;
+
+  if (i > max_tag_len)
+    return -1;
+
+  /* Terminate tag, and lookup.  */
+  char sep = s[i];
+  s[i] = '\0';
+  ptrdiff_t algo = argmatch (s, algorithm_tags, NULL, 0);
+  s[i] = sep;
+
+  return algo;
+}
+#endif
+
 /* Split the string S (of length S_LEN) into three parts:
    a hexadecimal digest, binary flag, and the file name.
    S is modified.  Return true if successful.  */
@@ -652,6 +692,21 @@ split_3 (char *s, size_t s_len,
 
   /* Check for BSD-style checksum line. */
 
+#if HASH_ALGO_CKSUM
+  if (! algorithm_specified)
+    {
+      ptrdiff_t algo_tag = algorithm_from_tag (s);
+      if (algo_tag >= 0)
+        {
+          if (algo_tag <= crc)
+            return false;  /* We don't support checking these older formats.  
*/
+          cksum_algorithm = algo_tag;
+        }
+      else
+        return false;  /* We only support tagged format without -a.  */
+    }
+#endif
+
   algo_name_len = strlen (DIGEST_TYPE_STRING);
   if (STREQ_LEN (s + i, DIGEST_TYPE_STRING, algo_name_len))
     {
@@ -686,7 +741,14 @@ split_3 (char *s, size_t s_len,
 # if HASH_ALGO_CKSUM
       }
 # endif
+# if HASH_ALGO_CKSUM
+      if (cksum_algorithm == blake2b)
+        digest_hex_bytes = b2_length / 4;
+      else
+        digest_hex_bytes = algorithm_bits[cksum_algorithm] / 4;
+# else
       digest_hex_bytes = b2_length / 4;
+# endif
 #endif
       if (s[i] == ' ')
         ++i;
@@ -1101,8 +1163,8 @@ digest_check (char const *checkfile_name)
   if (! properly_formatted_lines)
     {
       /* Warn if no tests are found.  */
-      error (0, 0, _("%s: no properly formatted %s checksum lines found"),
-             quotef (checkfile_name), DIGEST_TYPE_STRING);
+      error (0, 0, _("%s: no properly formatted checksum lines found"),
+             quotef (checkfile_name));
     }
   else
     {
@@ -1189,6 +1251,7 @@ main (int argc, char **argv)
       case 'a':
         cksum_algorithm = XARGMATCH ("--algorithm", optarg,
                                      algorithm_args, algorithm_types);
+        algorithm_specified = true;
         break;
 
       case DEBUG_PROGRAM_OPTION:
@@ -1302,7 +1365,7 @@ main (int argc, char **argv)
         if (prefix_tag)
           die (EXIT_FAILURE, 0,
               _("--tag is not supported with --algorithm={bsd,sysv,crc}"));
-        if (do_check)
+        if (do_check && algorithm_specified)
           die (EXIT_FAILURE, 0,
               _("--check is not supported with --algorithm={bsd,sysv,crc}"));
         break;
diff --git a/tests/local.mk b/tests/local.mk
index 192c0d31c..228d0e368 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -290,6 +290,7 @@ all_tests =                                 \
   tests/misc/chroot-fail.sh                    \
   tests/misc/cksum.sh                          \
   tests/misc/cksum-a.sh                                \
+  tests/misc/cksum-c.sh                                \
   tests/misc/comm.pl                           \
   tests/misc/csplit.sh                         \
   tests/misc/csplit-1000.sh                    \
diff --git a/tests/misc/cksum-c.sh b/tests/misc/cksum-c.sh
new file mode 100755
index 000000000..d9ea35672
--- /dev/null
+++ b/tests/misc/cksum-c.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# Validate cksum --check dynamic operation
+
+# Copyright (C) 2021 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ cksum shuf
+
+shuf -i 1-10 > input || framework_failure_
+
+for args in '-a sha384' '-a blake2b' '-a blake2b -l 384' '-a sm3'; do
+  cksum $args --tag 'input' >> CHECKSUMS || fail=1
+done
+cksum --strict --check CHECKSUMS || fail=1
+
+# Check common signed checksums format works in non strict mode
+cat >> signed_CHECKSUMS <<\EOF
+-----BEGIN PGP SIGNED MESSAGE-----
+Hash: SHA384
+
+# ignored comment
+EOF
+cat CHECKSUMS >> signed_CHECKSUMS
+cat >> signed_input <<\EOF
+-----BEGIN PGP SIGNATURE-----
+
+# Note base64 doesn't have ambiguous delimiters in its charset
+SHA384+BCAAdFiEEjFummQvbJuGfKhqAEWGuaUVxmjkFAmCDId0ACgkQEWGuaUVx
+BLAKE2b/00001EuON62pTEnqrJ5lav61QxRByiuDp/6VODrRL2JWM6Stxu1Myws=
+=AWU7
+-----END PGP SIGNATURE-----
+EOF
+cksum --check signed_CHECKSUMS || fail=1
+
+# Can check individual digests in a mixed file
+cksum --check -a sm3 CHECKSUMS || fail=1
+
+# Checks against older (non hex) checksum formats not supported
+returns_ 1 cksum -a crc -c CHECKSUMS || fail=1
+cksum -a crc 'input' > CHECKSUMS.crc || fail=1
+returns_ 1 cksum -c CHECKSUMS.crc || fail=1
+
+Exit $fail
diff --git a/tests/misc/md5sum.pl b/tests/misc/md5sum.pl
index 695acba65..c32dac0e1 100755
--- a/tests/misc/md5sum.pl
+++ b/tests/misc/md5sum.pl
@@ -105,7 +105,7 @@ my @Tests =
      ['check-bsd', '--check', {IN=> {'f.sha1' => "SHA1 (f) = $degenerate\n"}},
                                 {AUX=> {f=> ''}},
                                 {ERR=>"md5sum: f.sha1: no properly formatted "
-                                       . "MD5 checksum lines found\n"},
+                                       . "checksum lines found\n"},
                                 {EXIT=> 1}],
      ['check-bsd2', '--check', {IN=> {'f.md5' => "MD5 (f) = $degenerate\n"}},
                                 {AUX=> {f=> ''}}, {OUT=>"f: OK\n"}],
@@ -115,7 +115,7 @@ my @Tests =
      ['check-openssl', '--check', {IN=> {'f.sha1' => "SHA1(f)= 
$degenerate\n"}},
                                 {AUX=> {f=> ''}},
                                 {ERR=>"md5sum: f.sha1: no properly formatted "
-                                       . "MD5 checksum lines found\n"},
+                                       . "checksum lines found\n"},
                                 {EXIT=> 1}],
      ['check-openssl2', '--check', {IN=> {'f.md5' => "MD5(f)= $degenerate\n"}},
                                 {AUX=> {f=> ''}}, {OUT=>"f: OK\n"}],
@@ -157,7 +157,7 @@ my @Tests =
                                     "006999e6df389641adf1fa3a74801d9d  f\n"}},
                                 {OUT=>"f: OK\n"}],
      ['bsd-segv', '--check', {IN=> {'z' => "MD5 ("}}, {EXIT=> 1},
-      {ERR=> "$prog: z: no properly formatted MD5 checksum lines found\n"}],
+      {ERR=> "$prog: z: no properly formatted checksum lines found\n"}],
 
      # Ensure that when there's a NUL byte among the checksum hex digits
      # we detect the invalid formatting and don't even open the file.
@@ -165,7 +165,7 @@ my @Tests =
      #   h: FAILED
      #   md5sum: WARNING: 1 of 1 computed checksum did NOT match
      ['nul-in-cksum', '--check', {IN=> {'h'=>("\0"x32)."  h\n"}}, {EXIT=> 1},
-      {ERR=> "$prog: h: no properly formatted MD5 checksum lines found\n"}],
+      {ERR=> "$prog: h: no properly formatted checksum lines found\n"}],
     );
 
 # Insert the '--text' argument for each test.
diff --git a/tests/misc/sha1sum.pl b/tests/misc/sha1sum.pl
index 8e33aba6f..abbda1c49 100755
--- a/tests/misc/sha1sum.pl
+++ b/tests/misc/sha1sum.pl
@@ -54,7 +54,7 @@ my @Tests =
      ['check-bsd', '--check', {IN=> {'f.md5' => "MD5 (f) = 
$sha_degenerate\n"}},
                         {AUX=> {f=> ''}},
                         {ERR=>"sha1sum: f.md5: no properly formatted "
-                          . "SHA1 checksum lines found\n"},
+                          . "checksum lines found\n"},
                         {EXIT=> 1}],
      ['check-bsd2', '--check',
                         {IN=> {'f.sha1' => "SHA1 (f) = $sha_degenerate\n"}},
@@ -66,7 +66,7 @@ my @Tests =
                         {IN=> {'f.md5' => "MD5(f)= $sha_degenerate\n"}},
                         {AUX=> {f=> ''}},
                         {ERR=>"sha1sum: f.md5: no properly formatted "
-                          . "SHA1 checksum lines found\n"},
+                          . "checksum lines found\n"},
                         {EXIT=> 1}],
      ['check-openssl2', '--check',
                         {IN=> {'f.sha1' => "SHA1(f)= $sha_degenerate\n"}},
@@ -75,7 +75,7 @@ my @Tests =
                         {IN=> {'f.sha1' => "SHA1(f)= $sha_degenerate\n"}},
                         {AUX=> {f=> 'bar'}}, {EXIT=> 1}],
      ['bsd-segv', '--check', {IN=> {'z' => "SHA1 ("}}, {EXIT=> 1},
-      {ERR=> "$prog: z: no properly formatted SHA1 checksum lines found\n"}],
+      {ERR=> "$prog: z: no properly formatted checksum lines found\n"}],
     );
 
 # Insert the '--text' argument for each test.
-- 
2.26.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]