[xiph-cvs] cvs commit: vorbis-tools/share utf8.c

Michael Smith msmith at xiph.org
Thu Oct 18 23:30:17 PDT 2001



msmith      01/10/18 23:30:16

  Modified:    share    utf8.c
  Log:
  win32 utf8 support from Peter Harris

Revision  Changes    Path
1.4       +142 -6    vorbis-tools/share/utf8.c

Index: utf8.c
===================================================================
RCS file: /usr/local/cvsroot/vorbis-tools/share/utf8.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- utf8.c	2001/10/02 03:03:42	1.3
+++ utf8.c	2001/10/19 06:30:16	1.4
@@ -28,16 +28,109 @@
 
 
 #ifdef _WIN32
-#include <stdio.h>
-#include <windows.h>
 
-int utf8_encode(const char *from, char **to)
-{
         /* Thanks to Peter Harris <peter.harris at hummingbird.com> for this win32
          * code.
          */
+
+#include <stdio.h>
+#include <windows.h>
 
-	unsigned short *unicode;
+static unsigned char *make_utf8_string(const wchar_t *unicode)
+{
+    int size = 0, index = 0, out_index = 0;
+    unsigned char *out;
+    unsigned short c;
+
+    /* first calculate the size of the target string */
+    c = unicode[index++];
+    while(c) {
+        if(c < 0x0080) {
+            size += 1;
+        } else if(c < 0x0800) {
+            size += 2;
+        } else {
+            size += 3;
+        }
+        c = unicode[index++];
+    }	
+
+    out = malloc(size + 1);
+    if (out == NULL)
+        return NULL;
+    index = 0;
+
+    c = unicode[index++];
+    while(c)
+    {
+        if(c < 0x080) {
+            out[out_index++] = (unsigned char)c;
+        } else if(c < 0x800) {
+            out[out_index++] = 0xc0 | (c >> 6);
+            out[out_index++] = 0x80 | (c & 0x3f);
+        } else {
+            out[out_index++] = 0xe0 | (c >> 12);
+            out[out_index++] = 0x80 | ((c >> 6) & 0x3f);
+            out[out_index++] = 0x80 | (c & 0x3f);
+        }
+        c = unicode[index++];
+    }
+    out[out_index] = 0x00;
+
+    return out;
+}
+
+static wchar_t *make_unicode_string(const unsigned char *utf8)
+{
+    int size = 0, index = 0, out_index = 0;
+    wchar_t *out;
+    unsigned char c;
+
+    /* first calculate the size of the target string */
+    c = utf8[index++];
+    while(c) {
+        if((c & 0x80) == 0) {
+            index += 0;
+        } else if((c & 0xe0) == 0xe0) {
+            index += 2;
+        } else {
+            index += 1;
+        }
+        size += 1;
+        c = utf8[index++];
+    }	
+
+    out = malloc((size + 1) * sizeof(wchar_t));
+    if (out == NULL)
+        return NULL;
+    index = 0;
+
+    c = utf8[index++];
+    while(c)
+    {
+        if((c & 0x80) == 0) {
+            out[out_index++] = c;
+        } else if((c & 0xe0) == 0xe0) {
+            out[out_index] = (c & 0x1F) << 12;
+	        c = utf8[index++];
+            out[out_index] |= (c & 0x3F) << 6;
+	        c = utf8[index++];
+            out[out_index++] |= (c & 0x3F);
+        } else {
+            out[out_index] = (c & 0x3F) << 6;
+	        c = utf8[index++];
+            out[out_index++] |= (c & 0x3F);
+        }
+        c = utf8[index++];
+    }
+    out[out_index] = 0;
+
+    return out;
+}
+
+int utf8_encode(const char *from, char **to)
+{
+	wchar_t *unicode;
         int wchars, err;
 
         wchars = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, from,
@@ -76,7 +169,50 @@
 
 int utf8_decode(const char *from, char **to)
 {
-	return -1;  /* Dummy stub */
+    wchar_t *unicode;
+    int chars, err;
+
+    /* On NT-based windows systems, we could use MultiByteToWideChar(CP_UTF8), but 
+     * MS doesn't actually have a consistent API across win32.
+     */
+    unicode = make_unicode_string(from);
+    if(unicode == NULL) 
+    {
+        fprintf(stderr, "Out of memory processing string from UTF8 to UNICODE16\n");
+        return -1;
+    }
+
+    chars = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode,
+            -1, NULL, 0, NULL, NULL);
+
+    if(chars == 0)
+    {
+        fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+        free(unicode);
+        return -1;
+    }
+
+    *to = calloc(chars + 1, sizeof(unsigned char));
+    if(*to == NULL) 
+    {
+        fprintf(stderr, "Out of memory processing string to local charset\n");
+        free(unicode);
+        return -1;
+    }
+
+    err = WideCharToMultiByte(GetConsoleCP(), WC_COMPOSITECHECK, unicode, 
+            -1, *to, chars, NULL, NULL);
+    if(err != chars)
+    {
+        fprintf(stderr, "Unicode translation error %d\n", GetLastError());
+        free(unicode);
+        free(*to);
+        *to = NULL;
+        return -1;
+    }
+
+    free(unicode);
+    return 0;
 }
 
 #else /* End win32. Rest is for real operating systems */

--- >8 ----
List archives:  http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body.  No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.



More information about the commits mailing list