[xiph-cvs] cvs commit: vorbis-tools/share utf8.c
Michael Smith
msmith at xiph.org
Thu Oct 18 23:30:17 PDT 2001
msmith 01/10/18 23:30:16
Modified: share utf8.c
Log:
win32 utf8 support from Peter Harris
Revision Changes Path
1.4 +142 -6 vorbis-tools/share/utf8.c
Index: utf8.c
===================================================================
RCS file: /usr/local/cvsroot/vorbis-tools/share/utf8.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- utf8.c 2001/10/02 03:03:42 1.3
+++ utf8.c 2001/10/19 06:30:16 1.4
@@ -28,16 +28,109 @@
#ifdef _WIN32
-#include <stdio.h>
-#include <windows.h>
-int utf8_encode(const char *from, char **to)
-{
/* Thanks to Peter Harris <peter.harris at hummingbird.com> for this win32
* code.
*/
+
+#include <stdio.h>
+#include <windows.h>
- unsigned short *unicode;
+static unsigned char *make_utf8_string(const wchar_t *unicode)
+{
+ int size = 0, index = 0, out_index = 0;
+ unsigned char *out;
+ unsigned short c;
+
+ /* first calculate the size of the target string */
+ c = unicode[index++];
+ while(c) {
+ if(c < 0x0080) {
+ size += 1;
+ } else if(c < 0x0800) {
+ size += 2;
+ } else {
+ size += 3;
+ }
+ c = unicode[index++];
+ }
+
+ out = malloc(size + 1);
+ if (out == NULL)
+ return NULL;
+ index = 0;
+
+ c = unicode[index++];
+ while(c)
+ {
+ if(c < 0x080) {
+ out[out_index++] = (unsigned char)c;
+ } else if(c < 0x800) {
+ out[out_index++] = 0xc0 | (c >> 6);
+ out[out_index++] = 0x80 | (c & 0x3f);
+ } else {
+ out[out_index++] = 0xe0 | (c >> 12);
+ out[out_index++] = 0x80 | ((c >> 6) & 0x3f);
+ out[out_index++] = 0x80 | (c & 0x3f);
+ }
+ c = unicode[index++];
+ }
+ out[out_index] = 0x00;
+
+ return out;
+}
+
+static wchar_t *make_unicode_string(const unsigned char *utf8)
+{
+ int size = 0, index = 0, out_index = 0;
+ wchar_t *out;
+ unsigned char c;
+
+ /* first calculate the size of the target string */
+ c = utf8[index++];
+ while(c) {
+ if((c & 0x80) == 0) {
+ index += 0;
+ } else if((c & 0xe0) == 0xe0) {
+ index += 2;
+ } else {
+ index += 1;
+ }
+ size += 1;
+ c = utf8[index++];
+ }
+
+ out = malloc((size + 1) * sizeof(wchar_t));
+ if (out == NULL)
+ return NULL;
+ index = 0;
+
+ c = utf8[index++];
+ while(c)
+ {
+ if((c & 0x80) == 0) {
+ out[out_index++] = c;
+ } else if((c & 0xe0) == 0xe0) {
+ out[out_index] = (c & 0x1F) << 12;
+ c = utf8[index++];
+ out[out_index] |= (c & 0x3F) << 6;
+ c = utf8[index++];
+ out[out_index++] |= (c & 0x3F);
+ } else {
+ out[out_index] = (c & 0x3F) << 6;
+ c = utf8[index++];
+ out[out_index++] |= (c & 0x3F);
+ }
+ c = utf8[index++];
+ }
+ out[out_index] = 0;
+
+ return out;
+}
+
+int utf8_encode(const char *from, char **to)
+{
+ wchar_t *unicode;
int wchars, err;
wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
@@ -76,7 +169,50 @@
int utf8_decode(const char *from, char **to)
{
- return -1; /* Dummy stub */
+ wchar_t *unicode;
+ int chars, err;
+
+ /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but
+ * MS doesn't actually have a consistent API across win32.
+ */
+ unicode = make_unicode_string(from);
+ if(unicode == NULL)
+ {
+ fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
+ return -1;
+ }
+
+ chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
+ -1, NULL, 0, NULL, NULL);
+
+ if(chars == 0)
+ {
+ fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+ free(unicode);
+ return -1;
+ }
+
+ *to = calloc(chars + 1, sizeof(unsigned char));
+ if(*to == NULL)
+ {
+ fprintf(stderr, "Out of memory processing string to local charset\n");
+ free(unicode);
+ return -1;
+ }
+
+ err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
+ -1, *to, chars, NULL, NULL);
+ if(err != chars)
+ {
+ fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+ free(unicode);
+ free(*to);
+ *to = NULL;
+ return -1;
+ }
+
+ free(unicode);
+ return 0;
}
#else /* End win32. Rest is for real operating systems */
--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list