bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

new module 'striconv'


From: Bruno Haible
Subject: new module 'striconv'
Date: Wed, 6 Sep 2006 14:26:32 +0200
User-agent: KMail/1.9.1

Hi,

I added this merge between Simon's iconvme module and gettext's iconvstring
module.

Migration path:

iconvme::iconv_string -> str_iconv
iconvme::iconv_alloc -> str_cd_iconv (with reversed arguments)
iconvstring::iconv_string -> xmem_cd_iconv (with modified arguments)


        * modules/striconv: New file.
        * lib/striconv.h: New file.
        * lib/striconv.c: New file.

============================= modules/striconv =============================
Description:
Character set conversion of strings made easy, uses iconv.

Files:
lib/striconv.h
lib/striconv.c

Depends-on:
iconv
strdup
c-strcase

configure.ac:

Makefile.am:
lib_SOURCES += striconv.h striconv.c
if GL_COND_LIBTOOL
lib_LDFLAGS += $(LTLIBICONV)
else
lib_LDFLAGS += $(LIBICONV)
endif

Include:
"striconv.h"

License:
LGPL

Maintainer:
Bruno Haible, Simon Josefsson

============================== lib/striconv.h ==============================
/* Charset conversion.
   Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
   Written by Bruno Haible and Simon Josefsson.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#ifndef _STRICONV_H
#define _STRICONV_H

#include <stddef.h>
#if HAVE_ICONV
#include <iconv.h>
#endif


#ifdef __cplusplus
extern "C" {
#endif


#if HAVE_ICONV

/* Convert an entire string from one encoding to another, using iconv.
   The original string is at [SRC,...,SRC+SRCLEN-1].
   The conversion descriptor is passed as CD.
   *RESULTP should initially contain NULL or a malloced memory block.
   May change the size of the allocated memory block in *RESULTP, storing
   its new address in *RESULTP and its new length in *LENGTHP.
   Return value: 0 if successful, otherwise -1 and errno set.
   If successful, the resulting string is stored in *RESULTP and its length
   in *LENGTHP.  */
extern int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
                         char **resultp, size_t *lengthp);

/* Convert an entire string from one encoding to another, using iconv.
   The original string is the NUL-terminated string starting at SRC.
   The conversion descriptor is passed as CD.  Both the "from" and the "to"
   encoding must use a single NUL byte at the end of the string (i.e. not
   UCS-2, UCS-4, UTF-16, UTF-32).
   Allocate a malloced memory block for the result.
   Return value: the freshly allocated resulting NUL-terminated string if
   successful, otherwise NULL and errno set.  */
extern char * str_cd_iconv (const char *src, iconv_t cd);

#endif

/* Convert an entire string from one encoding to another, using iconv.
   The original string is the NUL-terminated string starting at SRC.
   Both the "from" and the "to" encoding must use a single NUL byte at the
   end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
   Allocate a malloced memory block for the result.
   Return value: the freshly allocated resulting NUL-terminated string if
   successful, otherwise NULL and errno set.  */
extern char * str_iconv (const char *src,
                         const char *from_codeset, const char *to_codeset);


#ifdef __cplusplus
}
#endif


#endif /* _STRICONV_H */
============================== lib/striconv.c ==============================
/* Charset conversion.
   Copyright (C) 2001-2006 Free Software Foundation, Inc.
   Written by Bruno Haible and Simon Josefsson.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

/* Specification.  */
#include "striconv.h"

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#if HAVE_ICONV
# include <iconv.h>
/* Get MB_LEN_MAX, CHAR_BIT.  */
# include <limits.h>
#endif

#include "strdup.h"
#include "c-strcase.h"

#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif


#if HAVE_ICONV

int
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
              char **resultp, size_t *lengthp)
{
# define tmpbufsize 4096
  size_t length;
  char *result;

  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
  /* Set to the initial state.  */
  iconv (cd, NULL, NULL, NULL, NULL);
# endif

  /* Determine the length we need.  */
  {
    size_t count = 0;
    char tmpbuf[tmpbufsize];
    const char *inptr = src;
    size_t insize = srclen;

    while (insize > 0)
      {
        char *outptr = tmpbuf;
        size_t outsize = tmpbufsize;
        size_t res = iconv (cd,
                            (ICONV_CONST char **) &inptr, &insize,
                            &outptr, &outsize);

        if (res == (size_t)(-1))
          {
            if (errno == E2BIG)
              ;
            else if (errno == EINVAL)
              break;
            else
              return -1;
          }
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
        /* Irix iconv() inserts a NUL byte if it cannot convert.  */
        else if (res > 0)
          {
            errno = EILSEQ;
            return -1;
          }
# endif
        count += outptr - tmpbuf;
      }
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    {
      char *outptr = tmpbuf;
      size_t outsize = tmpbufsize;
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);

      if (res == (size_t)(-1))
        return -1;
      count += outptr - tmpbuf;
    }
# endif
    length = count;
  }

  if (length == 0)
    {
      *lengthp = 0;
      return 0;
    }
  result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
  if (result == NULL)
    {
      errno = ENOMEM;
      return -1;
    }
  *resultp = result;
  *lengthp = length;

  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
  /* Return to the initial state.  */
  iconv (cd, NULL, NULL, NULL, NULL);
# endif

  /* Do the conversion for real.  */
  {
    const char *inptr = src;
    size_t insize = srclen;
    char *outptr = result;
    size_t outsize = length;

    while (insize > 0)
      {
        size_t res = iconv (cd,
                            (ICONV_CONST char **) &inptr, &insize,
                            &outptr, &outsize);

        if (res == (size_t)(-1))
          {
            if (errno == EINVAL)
              break;
            else
              return -1;
          }
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
        /* Irix iconv() inserts a NUL byte if it cannot convert.  */
        else if (res > 0)
          {
            errno = EILSEQ;
            return -1;
          }
# endif
      }
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    {
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);

      if (res == (size_t)(-1))
        return -1;
    }
# endif
    if (outsize != 0)
      abort ();
  }

  return 0;
# undef tmpbufsize
}

char *
str_cd_iconv (const char *src, iconv_t cd)
{
  /* For most encodings, a trailing NUL byte in the input will be converted
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
     function is usable for UTF-7, we have to exclude the NUL byte from the
     conversion and add it by hand afterwards.  */
# if PROBABLY_SLOWER

  char *result = NULL;
  size_t length;
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
  char *final_result;

  if (retval < 0)
    {
      if (result != NULL)
        {
          int saved_errno = errno;
          free (result);
          errno = saved_errno;
        }
      return NULL;
    }

  /* Add the terminating NUL byte.  */
  final_result =
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
  if (final_result == NULL)
    {
      if (result != NULL)
        free (result);
      errno = ENOMEM;
      return NULL;
    }
  final_result[length] = '\0';

  return final_result;

# else

  char *result;
  size_t result_size;
  size_t length;
  const char *inptr = src;
  size_t inbytes_remaining = strlen (src);

  /* Make a guess for the worst-case output size, in order to avoid a
     realloc.  It's OK if the guess is wrong as long as it is not zero and
     doesn't lead to an integer overflow.  */
  result_size = inbytes_remaining;
  {
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
      result_size *= MB_LEN_MAX;
  }
  result_size += 1; /* for the terminating NUL */

  result = (char *) malloc (result_size);
  if (result == NULL)
    {
      errno = ENOMEM;
      return NULL;
    }

  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
  /* Set to the initial state.  */
  iconv (cd, NULL, NULL, NULL, NULL);
# endif

  /* Do the conversion.  */
  {
    char *outptr = result;
    size_t outbytes_remaining = result_size - 1;

    for (;;)
      {
        /* Here inptr + inbytes_remaining = src + strlen (src),
                outptr + outbytes_remaining = result + result_size - 1.  */
        size_t res = iconv (cd,
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
                            &outptr, &outbytes_remaining);

        if (res == (size_t)(-1))
          {
            if (errno == EINVAL)
              break;
            else if (errno == E2BIG)
              {
                size_t used = outptr - result;
                size_t newsize = result_size * 2;
                char *newresult;

                if (!(newsize > result_size))
                  {
                    errno = ENOMEM;
                    goto failed;
                  }
                newresult = (char *) realloc (result, newsize);
                if (newresult == NULL)
                  {
                    errno = ENOMEM;
                    goto failed;
                  }
                result = newresult;
                result_size = newsize;
                outptr = result + used;
                outbytes_remaining = result_size - 1 - used;
              }
            else
              goto failed;
          }
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
        /* Irix iconv() inserts a NUL byte if it cannot convert.  */
        else if (res > 0)
          {
            errno = EILSEQ;
            goto failed;
          }
# endif
        else
          break;
      }
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
# if defined _LIBICONV_VERSION \
    || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
    for (;;)
      {
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);

        if (res == (size_t)(-1))
          {
            if (errno == E2BIG)
              {
                size_t used = outptr - result;
                size_t newsize = result_size * 2;
                char *newresult;

                if (!(newsize > result_size))
                  {
                    errno = ENOMEM;
                    goto failed;
                  }
                newresult = (char *) realloc (result, newsize);
                if (newresult == NULL)
                  {
                    errno = ENOMEM;
                    goto failed;
                  }
                result = newresult;
                result_size = newsize;
                outptr = result + used;
                outbytes_remaining = result_size - 1 - used;
              }
            else
              goto failed;
          }
        else
          break;
      }
# endif

    /* Add the terminating NUL byte.  */
    *outptr++ = '\0';

    length = outptr - result;
  }

  /* Give away unused memory.  */
  if (length < result_size)
    {
      char *smaller_result = (char *) realloc (result, length);

      if (smaller_result != NULL)
        result = smaller_result;
    }

  return result;

 failed:
  {
    int saved_errno = errno;
    free (result);
    errno = saved_errno;
    return NULL;
  }

# endif
}

#endif

char *
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
{
  if (c_strcasecmp (from_codeset, to_codeset) == 0)
    return strdup (src);
  else
    {
#if HAVE_ICONV
      iconv_t cd;
      char *result;

      /* Avoid glibc-2.1 bug with EUC-KR.  */
# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined 
_LIBICONV_VERSION
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
          || c_strcasecmp (to_codeset, "EUC-KR") == 0)
        {
          errno = EINVAL;
          return NULL;
        }
# endif
      cd = iconv_open (to_codeset, from_codeset);
      if (cd == (iconv_t) -1)
        return NULL;

      result = str_cd_iconv (src, cd);

      if (result == NULL)
        {
          /* Close cd, but preserve the errno from str_cd_iconv.  */
          int saved_errno = errno;
          iconv_close (cd);
          errno = saved_errno;
        }
      else
        {
          if (iconv_close (cd) < 0)
            {
              /* Return NULL, but free the allocated memory, and while doing
                 that, preserve the errno from iconv_close.  */
              int saved_errno = errno;
              free (result);
              errno = saved_errno;
              return NULL;
            }
        }
      return result;
#else
      /* This is a different error code than if iconv_open existed but didn't
         support from_codeset and to_codeset, so that the caller can emit
         an error message such as
           "iconv() is not supported. Installing GNU libiconv and
            then reinstalling this package would fix this."  */
      errno = ENOSYS;
      return NULL;
#endif
    }
}




reply via email to

[Prev in Thread] Current Thread [Next in Thread]