Actually my previous patch had a bug, in the case where there is one
character remaining to be converted in the input buffer, and the size
of the output buffer need to convert it is greater than in_remain * 2
(2 bytes). This patch addresses that as well.
diff -ur wget-1.21.1.orig/src/iri.c wget-1.21.1/src/iri.c
--- wget-1.21.1.orig/src/iri.c 2021-01-08 17:51:43.000000000 -0500
+++ wget-1.21.1/src/iri.c 2021-04-21 10:05:28.557549874 -0400
@@ -130,9 +130,9 @@
{
iconv_t cd;
/* sXXXav : hummm hard to guess... */
- size_t len, done, outlen;
+ size_t outbuf_len = 0, out_remain = 0, converted = 0;
int invalid = 0, tooshort = 0;
- char *s, *in, *in_save;
+ char *outcur, *in, *in_save;
cd = iconv_open (tocode, fromcode);
if (cd == (iconv_t)(-1))
@@ -148,17 +148,16 @@
url_unescape_except_reserved (in);
inlen = strlen(in);
- len = outlen = inlen * 2;
- *out = s = xmalloc (outlen + 1);
- done = 0;
+ /* Leave 4 bytes for null for e.g. UTF-32 */
+ outbuf_len = out_remain = inlen * 2;
+ *out = outcur = xmalloc (outlen + 4);
for (;;)
{
- if (iconv (cd, (ICONV_CONST char **) &in, &inlen, out, &outlen) != (size_t)(-1)
&&
+ if (iconv (cd, (ICONV_CONST char **) &in, &inlen, &outcur, &outlen) !=
(size_t)(-1) &&
iconv (cd, NULL, NULL, out, &outlen) != (size_t)(-1))
{
- *out = s;
- *(s + len - outlen - done) = '\0';
+ for (int i = 0; i < 4; ++i) *(outcur + i) = '\0';
xfree(in_save);
iconv_close(cd);
IF_DEBUG
@@ -188,12 +187,23 @@
}
else if (errno == E2BIG) /* Output buffer full */
{
- tooshort++;
- done = len;
- len = done + inlen * 2;
- s = xrealloc (s, len + 1);
- *out = s + done - outlen;
- outlen += inlen * 2;
+ converted = outbuf_len - out_remain;
+ /* If we merely allocate converted + in_remain * 2 chars for the new
+ output buffer, then any case where the input buffer contains 1
char
+ and requires more than 2 bytes (in_remain * 2 == 1 * 2) to convert
+ it will result in an infinite loop: In this case no conversion
+ will be done so converted will not change, and the buffer will
+ remain too small to complete the conversion on subsequent passes,
+ resulting in an infinite loop. This happens e.g. when converting
+ ASCII to UTF-32. Thus, we need to instead add in_remain * 2
+ characters to the current outbuf_len. That way next time through
+ the loop the buffer size will actually increase, eventually being
+ large enough to hold the conversion. */
+ outbuf_len += in_remain * 2;
+ /* again, leave 4 bytes for null */
+ *out = xrealloc (*out, outbuf_len + 4);
+ outcur = *out + converted;
+ out_remain = outbuf_len - converted;
}
else /* Weird, we got an unspecified error */
{