[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
new module 'striconv'
From: |
Bruno Haible |
Subject: |
new module 'striconv' |
Date: |
Wed, 6 Sep 2006 14:26:32 +0200 |
User-agent: |
KMail/1.9.1 |
Hi,
I added this merge between Simon's iconvme module and gettext's iconvstring
module.
Migration path:
iconvme::iconv_string -> str_iconv
iconvme::iconv_alloc -> str_cd_iconv (with reversed arguments)
iconvstring::iconv_string -> xmem_cd_iconv (with modified arguments)
* modules/striconv: New file.
* lib/striconv.h: New file.
* lib/striconv.c: New file.
============================= modules/striconv =============================
Description:
Character set conversion of strings made easy, uses iconv.
Files:
lib/striconv.h
lib/striconv.c
Depends-on:
iconv
strdup
c-strcase
configure.ac:
Makefile.am:
lib_SOURCES += striconv.h striconv.c
if GL_COND_LIBTOOL
lib_LDFLAGS += $(LTLIBICONV)
else
lib_LDFLAGS += $(LIBICONV)
endif
Include:
"striconv.h"
License:
LGPL
Maintainer:
Bruno Haible, Simon Josefsson
============================== lib/striconv.h ==============================
/* Charset conversion.
Copyright (C) 2001-2004, 2006 Free Software Foundation, Inc.
Written by Bruno Haible and Simon Josefsson.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _STRICONV_H
#define _STRICONV_H
#include <stddef.h>
#if HAVE_ICONV
#include <iconv.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if HAVE_ICONV
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
The conversion descriptor is passed as CD.
*RESULTP should initially contain NULL or a malloced memory block.
May change the size of the allocated memory block in *RESULTP, storing
its new address in *RESULTP and its new length in *LENGTHP.
Return value: 0 if successful, otherwise -1 and errno set.
If successful, the resulting string is stored in *RESULTP and its length
in *LENGTHP. */
extern int mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
The conversion descriptor is passed as CD. Both the "from" and the "to"
encoding must use a single NUL byte at the end of the string (i.e. not
UCS-2, UCS-4, UTF-16, UTF-32).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char * str_cd_iconv (const char *src, iconv_t cd);
#endif
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
Both the "from" and the "to" encoding must use a single NUL byte at the
end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char * str_iconv (const char *src,
const char *from_codeset, const char *to_codeset);
#ifdef __cplusplus
}
#endif
#endif /* _STRICONV_H */
============================== lib/striconv.c ==============================
/* Charset conversion.
Copyright (C) 2001-2006 Free Software Foundation, Inc.
Written by Bruno Haible and Simon Josefsson.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
/* Specification. */
#include "striconv.h"
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#if HAVE_ICONV
# include <iconv.h>
/* Get MB_LEN_MAX, CHAR_BIT. */
# include <limits.h>
#endif
#include "strdup.h"
#include "c-strcase.h"
#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif
#if HAVE_ICONV
int
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
char **resultp, size_t *lengthp)
{
# define tmpbufsize 4096
size_t length;
char *result;
/* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
/* Set to the initial state. */
iconv (cd, NULL, NULL, NULL, NULL);
# endif
/* Determine the length we need. */
{
size_t count = 0;
char tmpbuf[tmpbufsize];
const char *inptr = src;
size_t insize = srclen;
while (insize > 0)
{
char *outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv (cd,
(ICONV_CONST char **) &inptr, &insize,
&outptr, &outsize);
if (res == (size_t)(-1))
{
if (errno == E2BIG)
;
else if (errno == EINVAL)
break;
else
return -1;
}
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
/* Irix iconv() inserts a NUL byte if it cannot convert. */
else if (res > 0)
{
errno = EILSEQ;
return -1;
}
# endif
count += outptr - tmpbuf;
}
/* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
{
char *outptr = tmpbuf;
size_t outsize = tmpbufsize;
size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
if (res == (size_t)(-1))
return -1;
count += outptr - tmpbuf;
}
# endif
length = count;
}
if (length == 0)
{
*lengthp = 0;
return 0;
}
result = (*resultp != NULL ? realloc (*resultp, length) : malloc (length));
if (result == NULL)
{
errno = ENOMEM;
return -1;
}
*resultp = result;
*lengthp = length;
/* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
/* Return to the initial state. */
iconv (cd, NULL, NULL, NULL, NULL);
# endif
/* Do the conversion for real. */
{
const char *inptr = src;
size_t insize = srclen;
char *outptr = result;
size_t outsize = length;
while (insize > 0)
{
size_t res = iconv (cd,
(ICONV_CONST char **) &inptr, &insize,
&outptr, &outsize);
if (res == (size_t)(-1))
{
if (errno == EINVAL)
break;
else
return -1;
}
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
/* Irix iconv() inserts a NUL byte if it cannot convert. */
else if (res > 0)
{
errno = EILSEQ;
return -1;
}
# endif
}
/* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
{
size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
if (res == (size_t)(-1))
return -1;
}
# endif
if (outsize != 0)
abort ();
}
return 0;
# undef tmpbufsize
}
char *
str_cd_iconv (const char *src, iconv_t cd)
{
/* For most encodings, a trailing NUL byte in the input will be converted
to a trailing NUL byte in the output. But not for UTF-7. So that this
function is usable for UTF-7, we have to exclude the NUL byte from the
conversion and add it by hand afterwards. */
# if PROBABLY_SLOWER
char *result = NULL;
size_t length;
int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
char *final_result;
if (retval < 0)
{
if (result != NULL)
{
int saved_errno = errno;
free (result);
errno = saved_errno;
}
return NULL;
}
/* Add the terminating NUL byte. */
final_result =
(result != NULL ? realloc (result, length + 1) : malloc (length + 1));
if (final_result == NULL)
{
if (result != NULL)
free (result);
errno = ENOMEM;
return NULL;
}
final_result[length] = '\0';
return final_result;
# else
char *result;
size_t result_size;
size_t length;
const char *inptr = src;
size_t inbytes_remaining = strlen (src);
/* Make a guess for the worst-case output size, in order to avoid a
realloc. It's OK if the guess is wrong as long as it is not zero and
doesn't lead to an integer overflow. */
result_size = inbytes_remaining;
{
size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
result_size *= MB_LEN_MAX;
}
result_size += 1; /* for the terminating NUL */
result = (char *) malloc (result_size);
if (result == NULL)
{
errno = ENOMEM;
return NULL;
}
/* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
/* Set to the initial state. */
iconv (cd, NULL, NULL, NULL, NULL);
# endif
/* Do the conversion. */
{
char *outptr = result;
size_t outbytes_remaining = result_size - 1;
for (;;)
{
/* Here inptr + inbytes_remaining = src + strlen (src),
outptr + outbytes_remaining = result + result_size - 1. */
size_t res = iconv (cd,
(ICONV_CONST char **) &inptr, &inbytes_remaining,
&outptr, &outbytes_remaining);
if (res == (size_t)(-1))
{
if (errno == EINVAL)
break;
else if (errno == E2BIG)
{
size_t used = outptr - result;
size_t newsize = result_size * 2;
char *newresult;
if (!(newsize > result_size))
{
errno = ENOMEM;
goto failed;
}
newresult = (char *) realloc (result, newsize);
if (newresult == NULL)
{
errno = ENOMEM;
goto failed;
}
result = newresult;
result_size = newsize;
outptr = result + used;
outbytes_remaining = result_size - 1 - used;
}
else
goto failed;
}
# if !defined _LIBICONV_VERSION && (defined sgi || defined __sgi)
/* Irix iconv() inserts a NUL byte if it cannot convert. */
else if (res > 0)
{
errno = EILSEQ;
goto failed;
}
# endif
else
break;
}
/* Avoid glibc-2.1 bug and Solaris 2.7 bug. */
# if defined _LIBICONV_VERSION \
|| !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
for (;;)
{
/* Here outptr + outbytes_remaining = result + result_size - 1. */
size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
if (res == (size_t)(-1))
{
if (errno == E2BIG)
{
size_t used = outptr - result;
size_t newsize = result_size * 2;
char *newresult;
if (!(newsize > result_size))
{
errno = ENOMEM;
goto failed;
}
newresult = (char *) realloc (result, newsize);
if (newresult == NULL)
{
errno = ENOMEM;
goto failed;
}
result = newresult;
result_size = newsize;
outptr = result + used;
outbytes_remaining = result_size - 1 - used;
}
else
goto failed;
}
else
break;
}
# endif
/* Add the terminating NUL byte. */
*outptr++ = '\0';
length = outptr - result;
}
/* Give away unused memory. */
if (length < result_size)
{
char *smaller_result = (char *) realloc (result, length);
if (smaller_result != NULL)
result = smaller_result;
}
return result;
failed:
{
int saved_errno = errno;
free (result);
errno = saved_errno;
return NULL;
}
# endif
}
#endif
char *
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
{
if (c_strcasecmp (from_codeset, to_codeset) == 0)
return strdup (src);
else
{
#if HAVE_ICONV
iconv_t cd;
char *result;
/* Avoid glibc-2.1 bug with EUC-KR. */
# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined
_LIBICONV_VERSION
if (c_strcasecmp (from_codeset, "EUC-KR") == 0
|| c_strcasecmp (to_codeset, "EUC-KR") == 0)
{
errno = EINVAL;
return NULL;
}
# endif
cd = iconv_open (to_codeset, from_codeset);
if (cd == (iconv_t) -1)
return NULL;
result = str_cd_iconv (src, cd);
if (result == NULL)
{
/* Close cd, but preserve the errno from str_cd_iconv. */
int saved_errno = errno;
iconv_close (cd);
errno = saved_errno;
}
else
{
if (iconv_close (cd) < 0)
{
/* Return NULL, but free the allocated memory, and while doing
that, preserve the errno from iconv_close. */
int saved_errno = errno;
free (result);
errno = saved_errno;
return NULL;
}
}
return result;
#else
/* This is a different error code than if iconv_open existed but didn't
support from_codeset and to_codeset, so that the caller can emit
an error message such as
"iconv() is not supported. Installing GNU libiconv and
then reinstalling this package would fix this." */
errno = ENOSYS;
return NULL;
#endif
}
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- new module 'striconv',
Bruno Haible <=