[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[bug-gettext] [PATCH] msgfilter: Add 'quot' filter
From: |
Daiki Ueno |
Subject: |
[bug-gettext] [PATCH] msgfilter: Add 'quot' filter |
Date: |
Wed, 09 Apr 2014 19:53:28 +0900 |
User-agent: |
Gnus/5.13 (Gnus v5.13) Emacs/24.3.50 (gnu/linux) |
Hi,
It is known that po/Rules-quot does not work properly with BSD Sed:
[bug-gettext] msgfilter: Rules-quot implicity depends on GNU Sed.
https://lists.gnu.org/archive/html/bug-gettext/2013-04/msg00028.html
The file basically does conversion from ASCII quotations ("...", `...',
'...') to Unicode quotations (“...”, ‘...’), using msgfilter sed. So, I
wonder if this conversion might be worth an addition to the built-in
filters. What do people think? I'm attaching a initial patch for this.
If it makes sense, I'll prepare a filter for boldquot as well (and docs
and tests).
Regards,
--
Daiki Ueno
>From 6daf24b4c3c56915057796c7de2e518bc7d58dfb Mon Sep 17 00:00:00 2001
From: Daiki Ueno <address@hidden>
Date: Wed, 9 Apr 2014 19:25:58 +0900
Subject: [PATCH] msgfilter: Add 'quot' filter
---
gettext-tools/src/Makefile.am | 1 +
gettext-tools/src/filter-quote.c | 153 +++++++++++++++++++++++++++++++++++++++
gettext-tools/src/filters.h | 8 ++
gettext-tools/src/msgfilter.c | 7 ++
4 files changed, 169 insertions(+)
create mode 100755 gettext-tools/src/filter-quote.c
diff --git a/gettext-tools/src/Makefile.am b/gettext-tools/src/Makefile.am
index fe44293..3d50c71 100644
--- a/gettext-tools/src/Makefile.am
+++ b/gettext-tools/src/Makefile.am
@@ -214,6 +214,7 @@ else
msgfilter_SOURCES = ../woe32dll/c++msgfilter.cc
endif
msgfilter_SOURCES += filter-sr-latin.c
+msgfilter_SOURCES += filter-quote.c
if !WOE32DLL
msggrep_SOURCES = msggrep.c
else
diff --git a/gettext-tools/src/filter-quote.c b/gettext-tools/src/filter-quote.c
new file mode 100755
index 0000000..bdfb3c3
--- /dev/null
+++ b/gettext-tools/src/filter-quote.c
@@ -0,0 +1,153 @@
+/* Convert ASCII quotation marks to Unicode quotation marks.
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ Written by Daiki Ueno <address@hidden>, 2014.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+/* Specification. */
+#include "filters.h"
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include "xalloc.h"
+
+void
+ascii_quote_to_unicode (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p)
+{
+ size_t i;
+ const char *start, *p;
+ char *output, *r;
+ bool state = false;
+
+ start = input;
+
+ /* Large enough. */
+ r = output = XNMALLOC (3 * input_len + 1, char);
+
+ for (i = 0; i < input_len; i++)
+ {
+ int j;
+
+ p = &input[i];
+ switch (*p)
+ {
+ case '"':
+ if (state)
+ {
+ if (*start == '"')
+ {
+ if (p > start + 1)
+ {
+ /* U+201C: LEFT DOUBLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x9c", 3);
+ r += 3;
+ memcpy (r, start + 1, p - start - 1);
+ r += p - start - 1;
+ /* U+201D: RIGHT DOUBLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x9d", 3);
+ r += 3;
+ }
+ else
+ {
+ /* Consider "" as "". */
+ memcpy (r, "\"\"", 2);
+ r += 2;
+ }
+ start = p + 1;
+ state = false;
+ }
+ }
+ else
+ {
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ state = true;
+ }
+ break;
+
+ case '`':
+ if (state)
+ {
+ if (*start == '`')
+ {
+ memcpy (r, start, p - start);
+ start = p;
+ }
+ }
+ else
+ {
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ state = true;
+ }
+ break;
+
+ case '\'':
+ if (state)
+ {
+ if (*start == '`'
+ || (*start == '\''
+ && (((start > input && *(start - 1) == ' ')
+ && (i == input_len - 1 || *(p + 1) == ' '))
+ || (start == input && i < input_len - 1
+ && *(p + 1) == ' '))))
+ {
+ /* U+2018: LEFT SINGLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x98", 3);
+ r += 3;
+ memcpy (r, start + 1, p - start - 1);
+ r += p - start - 1;
+ /* U+2019: RIGHT SINGLE QUOTATION MARK */
+ memcpy (r, "\xe2\x80\x99", 3);
+ r += 3;
+ start = p + 1;
+ }
+ else
+ {
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ }
+ state = false;
+ }
+ else if (start == input || *(start - 1) == ' ')
+ {
+ memcpy (r, start, p - start);
+ r += p - start;
+ start = p;
+ state = true;
+ }
+ break;
+ }
+ }
+
+ p = &input[i];
+ if (p > start)
+ {
+ memcpy (r, start, p - start);
+ r += p - start;
+ }
+ *r = '\0';
+
+ *output_p = output;
+ *output_len_p = r - output;
+}
diff --git a/gettext-tools/src/filters.h b/gettext-tools/src/filters.h
index 93128b0..1d47fbe 100644
--- a/gettext-tools/src/filters.h
+++ b/gettext-tools/src/filters.h
@@ -29,6 +29,14 @@ extern "C" {
extern void serbian_to_latin (const char *input, size_t input_len,
char **output_p, size_t *output_len_p);
+/* Convert a string INPUT of INPUT_LEN bytes, converting ASCII quotation
+ marks to Unicode quotation marks.
+ Store the freshly allocated result in *OUTPUT_P and its length (in bytes)
+ in *OUTPUT_LEN_P.
+ Input and output are in UTF-8 encoding. */
+extern void ascii_quote_to_unicode (const char *input, size_t input_len,
+ char **output_p, size_t *output_len_p);
+
#ifdef __cplusplus
}
#endif
diff --git a/gettext-tools/src/msgfilter.c b/gettext-tools/src/msgfilter.c
index b92eef0..0cf76b8 100644
--- a/gettext-tools/src/msgfilter.c
+++ b/gettext-tools/src/msgfilter.c
@@ -349,6 +349,13 @@ There is NO WARRANTY, to the extent permitted by law.\n\
/* Convert the input to UTF-8 first. */
result = iconv_msgdomain_list (result, po_charset_utf8, true,
input_file);
}
+ else if (strcmp (sub_name, "quot") == 0 && sub_argc == 1)
+ {
+ filter = ascii_quote_to_unicode;
+
+ /* Convert the input to UTF-8 first. */
+ result = iconv_msgdomain_list (result, po_charset_utf8, true,
input_file);
+ }
else
{
filter = generic_filter;
--
1.9.0
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [bug-gettext] [PATCH] msgfilter: Add 'quot' filter,
Daiki Ueno <=