[xiph-commits] r16339 - in trunk/ffmpeg2theora: . frontend frontend/theoraenc src

j at svn.xiph.org j at svn.xiph.org
Sun Jul 26 10:46:00 PDT 2009


Author: j
Date: 2009-07-26 10:46:00 -0700 (Sun, 26 Jul 2009)
New Revision: 16339

Modified:
   trunk/ffmpeg2theora/SConstruct
   trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py
   trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py
   trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py
   trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py
   trunk/ffmpeg2theora/src/ffmpeg2theora.c
   trunk/ffmpeg2theora/src/ffmpeg2theora.h
   trunk/ffmpeg2theora/src/subtitles.c
   trunk/ffmpeg2theora/src/subtitles.h
   trunk/ffmpeg2theora/subtitles.txt
Log:
if available, user iconv for character conversion, patch from ogg.k

Modified: trunk/ffmpeg2theora/SConstruct
===================================================================
--- trunk/ffmpeg2theora/SConstruct	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/SConstruct	2009-07-26 17:46:00 UTC (rev 16339)
@@ -149,6 +149,12 @@
         You can also run ./get_libkate.sh (for more information see INSTALL)
         or update PKG_CONFIG_PATH to point to libkate's source folder
     """
+
+if conf.CheckCHeader('iconv.h'):
+    env.Append(CCFLAGS=[
+      '-DHAVE_ICONV'
+    ])
+
 env = conf.Finish()
 
 # ffmpeg2theora 

Modified: trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py
===================================================================
--- trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py	2009-07-26 17:46:00 UTC (rev 16339)
@@ -216,7 +216,7 @@
     self.removeItem.Enable()
   
   def OnClickAdd(self, event):
-    result = addVideoDialog(self, theoraenc.hasKate)
+    result = addVideoDialog(self, theoraenc.hasKate, theoraenc.hasIconv)
     time.sleep(0.5)
     if result['ok']:
       self.addItemToQueue(result['videoFile'], result)

Modified: trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py
===================================================================
--- trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py	2009-07-26 17:46:00 UTC (rev 16339)
@@ -4,6 +4,7 @@
 import os
 from os.path import basename
 import time
+import subprocess
 
 import wx
 #import wx.lib.langlistctrl
@@ -23,7 +24,7 @@
 class SubtitlesProperties(wx.Dialog):
   def __init__(
           self, parent, ID, title,
-          language, category, encoding, file,
+          language, category, encoding, file, hasIconv,
           size=wx.DefaultSize, pos=wx.DefaultPosition, 
           style=wx.DEFAULT_DIALOG_STYLE,
           ):
@@ -31,6 +32,8 @@
     pre.Create(parent, ID, title, pos, size, style)
     self.PostCreate(pre)
 
+    self.hasIconv = hasIconv
+
     # defaults
     if language == '':
       language = 'en'
@@ -64,8 +67,10 @@
     self.addProperty(mainBox, 'Category', self.categoryWidget, self.OnCategoryHelp)
 
     # encoding
-    encodings = ['UTF-8', 'ISO-8859-1']
-    self.encodingWidget = wx.Choice(self, -1, (80,-1), choices=encodings, name=encoding)
+    if hasIconv:
+      self.encodingWidget = wx.ComboBox(self, -1, encoding, (80,-1), wx.DefaultSize, self.BuildEncodingsList(self.hasIconv), wx.CB_SIMPLE)
+    else:
+      self.encodingWidget = wx.Choice(self, -1, (80,-1), choices=self.BuildEncodingsList(self.hasIconv))
     self.addProperty(mainBox, 'Encoding', self.encodingWidget, self.OnEncodingHelp)
 
     #Buttons
@@ -134,11 +139,15 @@
       'If the language tag needed is not available in the list, a custom one may be entered.\n')
 
   def OnEncodingHelp(self, event):
+    iconv_blurb = ''
+    if self.hasIconv:
+      iconv_blurb = 'ffmpeg2theora was built with iconv support, so can also convert any encoding that is supported by iconv.\n'
     self.DisplayHelp(
       'Kate streams are encoded in UTF-8 (a Unicode character encoding that allows to represent '+
       'pretty much any existing script.\n'+
       'If the input file is not already encoded in UTF-8, it will need converting to UTF-8 first.\n'+
       'ffmpeg2theora can convert ISO-8859-1 (also known as latin1) encoding directly.\n'+
+      iconv_blurb+
       'Files in other encodings will have to be converted manually in order to be used. See the '+
       'subtitles.txt documentation for more information on how to manually convert files.\n')
 
@@ -175,16 +184,13 @@
     # add in whatever's known from 'locale -a' - this works fine if locale isn't found,
     # but i'm not sure what that'll do if we get another program named locale that spews
     # random stuff to stdout :)
-    f = os.popen('locale -a')
-    line = f.readline()
-    while line:
+    p = subprocess.Popen(['locale', '-a'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+    data, err = p.communicate()
+
+    for line in data.strip().split('\n'):
       line = self.ExtractLanguage(line)
-      if line != '' and line != 'C' and line != 'POSIX':
+      if line != '' and line != 'C' and line != 'POSIX' and line not in languages:
         languages.append(line)
-      line = f.readline()
-    f.close()
-    #oneliner from german python forum => unique list
-    languages = [languages[i] for i in xrange(len(languages)) if languages[i] not in languages[:i]]
     languages.sort()
     return languages
 
@@ -197,8 +203,23 @@
     line = line.split('\r')[0] # Mac or Windows
     return line
 
-def addSubtitlesPropertiesDialog(parent, language, category, encoding, file):
-  dlg = SubtitlesProperties(parent, -1, "Add subtitles", language, category, encoding, file, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
+  def BuildEncodingsList(self, hasIconv):
+    # start with a known basic set, that ffmpeg2theora can handle without iconv
+    encodings = ['UTF-8', 'ISO-8859-1']
+
+    # this creates a *huge* spammy list with my version of iconv...
+    if hasIconv:
+      # add in whatever iconv knows about
+      p = subprocess.Popen(['iconv', '-l'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+      data, stderr = p.communicate()
+      for line in data.strip().split('\n'):
+        line = line.split('/')[0] # stop at a /
+        if not line in encodings:
+          encodings.append(line)
+    return encodings
+
+def addSubtitlesPropertiesDialog(parent, language, category, encoding, file, hasIconv):
+  dlg = SubtitlesProperties(parent, -1, "Add subtitles", language, category, encoding, file, hasIconv, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
   dlg.CenterOnScreen()
   val = dlg.ShowModal()
   result = dict()
@@ -211,7 +232,10 @@
 #      result['subtitlesLanguage'] = dlg.languageWidget.GetValue()
     result['subtitlesLanguage'] = dlg.languageWidget.GetValue()
     result['subtitlesCategory'] = dlg.categoryWidget.GetValue()
-    result['subtitlesEncoding'] = dlg.encodingWidget.GetStringSelection()
+    if hasIconv:
+      result['subtitlesEncoding'] = dlg.encodingWidget.GetValue()
+    else:
+      result['subtitlesEncoding'] = dlg.encodingWidget.GetStringSelection()
     print result
   else:
     result['ok'] = False

Modified: trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py
===================================================================
--- trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py	2009-07-26 17:46:00 UTC (rev 16339)
@@ -12,11 +12,14 @@
 
 class AddVideoDialog(wx.Dialog):
   def __init__(
-          self, parent, ID, title, hasKate, size=wx.DefaultSize, pos=wx.DefaultPosition, 
+          self, parent, ID, title, hasKate, hasIconv,
+          size=wx.DefaultSize, pos=wx.DefaultPosition, 
           style=wx.DEFAULT_DIALOG_STYLE,
           ):
     
     self.videoFile = ''
+    self.hasKate = hasKate
+    self.hasIconv = hasIconv
     
     pre = wx.PreDialog()
     #pre.SetExtraStyle(wx.DIALOG_EX_CONTEXTHELP)
@@ -360,7 +363,7 @@
     category = self.subtitles.GetItem(idx, 1).GetText()
     encoding = self.subtitles.GetItem(idx, 2).GetText()
     file = self.subtitles.GetItem(idx, 3).GetText()
-    result = addSubtitlesPropertiesDialog(self, language, category, encoding, file)
+    result = addSubtitlesPropertiesDialog(self, language, category, encoding, file, self.hasIconv)
     time.sleep(0.5) # why ? race condition ?
     if result['ok']:
       self.subtitles.SetStringItem(idx, 0, result['subtitlesLanguage'])
@@ -372,8 +375,8 @@
       return False
 
 
-def addVideoDialog(parent, hasKate):
-  dlg = AddVideoDialog(parent, -1, "Add Video", hasKate, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
+def addVideoDialog(parent, hasKate, hasIconv):
+  dlg = AddVideoDialog(parent, -1, "Add Video", hasKate, hasIconv, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
   dlg.CenterOnScreen()
   val = dlg.ShowModal()
   result = dict()

Modified: trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py
===================================================================
--- trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py	2009-07-26 17:46:00 UTC (rev 16339)
@@ -31,16 +31,20 @@
 
 def probe_kate(ffmpeg2theora):
   hasKate = False
-  cmd = ffmpeg2theora + ' --help'
-  f = os.popen(cmd)
-  line = f.readline()
-  while line:
-    if line.find('Subtitles options:') >= 0:
-      hasKate = True
-    line = f.readline()
-  f.close()
+  p = subprocess.Popen([ffmpeg2theora, '--help'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+  data, err = p.communicate()
+  if 'Subtitles options:' in data:
+    hasKate = True
   return hasKate
 
+def probe_iconv(ffmpeg2theora):
+  hasIconv = False
+  p = subprocess.Popen([ffmpeg2theora, '--help'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+  data, err = p.communicate()
+  if 'supported are all encodings supported by iconv' in data:
+      hasIconv = True
+  return hasIconv
+
 def timestr(seconds):
   hours   = int(seconds/3600)
   minutes = int((seconds-( hours*3600 ))/60)
@@ -175,4 +179,5 @@
 
 ffmpeg2theora = probe_ffmpeg2theora()
 hasKate = probe_kate(ffmpeg2theora)
+hasIconv = probe_iconv(ffmpeg2theora)
 

Modified: trunk/ffmpeg2theora/src/ffmpeg2theora.c
===================================================================
--- trunk/ffmpeg2theora/src/ffmpeg2theora.c	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/ffmpeg2theora.c	2009-07-26 17:46:00 UTC (rev 16339)
@@ -1637,7 +1637,11 @@
         "Subtitles options:\n"
         "      --subtitles file                 use subtitles from the given file (SubRip (.srt) format)\n"
         "      --subtitles-encoding encoding    set encoding of the subtitles file\n"
+#ifdef HAVE_ICONV
+        "             supported are all encodings supported by iconv (see iconv help for list)\n"
+#else
         "             supported are " SUPPORTED_ENCODINGS "\n"
+#endif
         "      --subtitles-language language    set subtitles language (de, en_GB, etc)\n"
         "      --subtitles-category category    set subtitles category (default \"subtitles\")\n"
         "      --subtitles-ignore-non-utf8      ignores any non UTF-8 sequence in UTF-8 text\n"
@@ -1900,11 +1904,12 @@
                             info.with_kate=1;
                             break;
                         case SUBTITLES_ENCODING_FLAG:
-                            if (!strcasecmp(optarg,"utf-8")) set_subtitles_encoding(convert,ENC_UTF8);
-                            else if (!strcasecmp(optarg,"utf8")) set_subtitles_encoding(convert,ENC_UTF8);
-                            else if (!strcasecmp(optarg,"iso-8859-1")) set_subtitles_encoding(convert,ENC_ISO_8859_1);
-                            else if (!strcasecmp(optarg,"latin1")) set_subtitles_encoding(convert,ENC_ISO_8859_1);
-                            else report_unknown_subtitle_encoding(optarg, info.frontend);
+                            if (is_valid_encoding(optarg)) {
+                              set_subtitles_encoding(convert,optarg);
+                            }
+                            else {
+                              report_unknown_subtitle_encoding(optarg, info.frontend);
+                            }
                             flag = -1;
                             break;
                         case SUBTITLES_IGNORE_NON_UTF8_FLAG:

Modified: trunk/ffmpeg2theora/src/ffmpeg2theora.h
===================================================================
--- trunk/ffmpeg2theora/src/ffmpeg2theora.h	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/ffmpeg2theora.h	2009-07-26 17:46:00 UTC (rev 16339)
@@ -3,13 +3,6 @@
 
 #include "subtitles.h"
 
-typedef enum {
-    ENC_UNSET,
-    ENC_UTF8,
-    ENC_ISO_8859_1,
-} F2T_ENCODING;
-
-
 typedef struct ff2theora_subtitle{
     char *text;
     size_t len;
@@ -28,7 +21,7 @@
 
     /* this block valid for all subtitle sources */
     size_t subtitles_count; /* total subtitles output so far */
-    F2T_ENCODING subtitles_encoding;
+    char *subtitles_encoding;
     char subtitles_language[16];
     char subtitles_category[16];
 } ff2theora_kate_stream;

Modified: trunk/ffmpeg2theora/src/subtitles.c
===================================================================
--- trunk/ffmpeg2theora/src/subtitles.c	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/subtitles.c	2009-07-26 17:46:00 UTC (rev 16339)
@@ -27,6 +27,9 @@
 #include <math.h>
 #include <errno.h>
 #include <stdarg.h>
+#ifdef HAVE_ICONV
+#include "iconv.h"
+#endif
 
 #include "libavformat/avformat.h"
 
@@ -59,6 +62,27 @@
 }
 
 /**
+  * checks whether we support the encoding
+  */
+int is_valid_encoding(const char *encoding)
+{
+#ifdef HAVE_ICONV
+  iconv_t cd = iconv_open("UTF-8", encoding);
+  if (cd != (iconv_t)-1) {
+    iconv_close(cd);
+    return 1;
+  }
+  return 0;
+#else
+  if (!strcasecmp(encoding, "UTF-8")) return 1;
+  if (!strcasecmp(encoding, "UTF8")) return 1;
+  if (!strcasecmp(encoding, "iso-8859-1")) return 1;
+  if (!strcasecmp(encoding, "latin1")) return 1;
+  return 0;
+#endif
+}
+
+/**
   * adds a new kate stream structure
   */
 void add_kate_stream(ff2theora this){
@@ -70,7 +94,7 @@
     ks->subtitles = 0;
     ks->stream_index = -1;
     ks->subtitles_count = 0; /* denotes not set yet */
-    ks->subtitles_encoding = ENC_UNSET;
+    ks->subtitles_encoding = NULL;
     strcpy(ks->subtitles_language, "");
     strcpy(ks->subtitles_category, "");
 }
@@ -136,13 +160,13 @@
 /**
   * sets the encoding of the next subtitles file
   */
-void set_subtitles_encoding(ff2theora this,F2T_ENCODING encoding){
+void set_subtitles_encoding(ff2theora this,const char *encoding){
   size_t n;
   for (n=0; n<this->n_kate_streams;++n) {
-    if (this->kate_streams[n].stream_index==-1 && this->kate_streams[n].subtitles_encoding==ENC_UNSET) break;
+    if (this->kate_streams[n].stream_index==-1 && !this->kate_streams[n].subtitles_encoding) break;
   }
   if (n==this->n_kate_streams) add_kate_stream(this);
-  this->kate_streams[n].subtitles_encoding = encoding;
+  this->kate_streams[n].subtitles_encoding = strdup(encoding);
 }
 
 
@@ -175,23 +199,27 @@
 }
 
 /* very simple implementation when no iconv */
-static char *convert_subtitle_to_utf8(F2T_ENCODING encoding,char *text,int ignore_non_utf8, FILE *frontend)
+static char *convert_subtitle_to_utf8(const char *encoding,char *text,int ignore_non_utf8, FILE *frontend)
 {
   size_t nbytes;
   char *ptr;
   char *newtext = NULL;
   int errors=0;
+#ifdef HAVE_ICONV
+  iconv_t cd;
+#endif
 
   if (!text) return NULL;
 
-  switch (encoding) {
-    case ENC_UNSET:
-      /* we don't know what encoding this is, assume UTF-8 and we'll yell if it ain't */
-      /* fall through */
-    case ENC_UTF8:
+  if (encoding == NULL) {
+     /* we don't know what encoding this is, assume UTF-8 and we'll yell if it ain't */
+     encoding = "UTF-8";
+  }
+
+  if (!strcasecmp(encoding, "UTF-8") || !strcasecmp(encoding, "UTF8")) {
       /* nothing to do, already in UTF-8 */
       if (ignore_non_utf8) {
-        /* actually, give the user the option of just ignoring non UTF8 characters */
+        /* actually, give the user the option of just ignoring non UTF-8 characters */
         char *wptr;
         size_t wlen0;
 
@@ -231,8 +259,40 @@
       else {
         newtext = strdup(text);
       }
-      break;
-    case ENC_ISO_8859_1:
+
+      return newtext;
+  }
+
+  /* now, we can either use iconv, or convert ISO-8859-1 by hand (so to speak) */
+#ifdef HAVE_ICONV
+  /* create a conversion for each string, it avoids having to pass around this descriptor,
+     and the speed hit will be irrelevant anyway compared to video decoding/encoding.
+     that's fine, because we don't need to keep state across subtitles. */
+  cd = iconv_open("UTF-8", encoding);
+  if (cd != (iconv_t)-1) {
+    /* iconv doesn't seem to have a mode to do a dummy convert to just return the number
+       of bytes needed, so we just allocate 6 times the number of bytes in the string,
+       which should be the max we need for UTF-8 */
+    size_t insz=strlen(text)+1;
+    size_t outsz = insz*6;
+    char *inptr = text, *outptr;
+    newtext = (char*)malloc(outsz);
+    if (!newtext) {
+      warn(frontend, NULL, 0, "Memory allocation failed - cannot convert text\n");
+      iconv_close(cd);
+      return NULL;
+    }
+    outptr=newtext;
+    if (iconv(cd, &inptr, &insz, &outptr, &outsz) < 0) {
+      warn(frontend, NULL, 0, "Failed to convert text to UTF-8\n");
+      free(newtext);
+      newtext = NULL;
+    }
+    iconv_close(cd);
+  }
+
+#else
+  if (!strcasecmp(encoding, "iso-8859-1") || !strcasecmp(encoding, "latin1")) {
       /* simple, characters above 0x7f are broken in two,
          and code points map to the iso-8859-1 8 bit codes */
       nbytes=0;
@@ -256,11 +316,11 @@
         }
       }
       newtext[nbytes++]=0;
-      break;
-    default:
+  }
+#endif
+  else {
       warn(frontend, NULL, 0, "encoding %d not handled in conversion!", encoding);
       newtext = strdup("");
-      break;
   }
   return newtext;
 }
@@ -357,7 +417,7 @@
             /* we have all the lines for that subtitle, remove the last \n */
             remove_last_newline(text);
 
-            /* we want all text to be UTF8 */
+            /* we want all text to be UTF-8 */
             utf8=convert_subtitle_to_utf8(this->subtitles_encoding,text,ignore_non_utf8, frontend);
             if (!utf8) {
               warn(frontend, this->filename, line, "Failed to get UTF-8 text");
@@ -479,6 +539,7 @@
         ff2theora_kate_stream *ks=this->kate_streams+i;
         for (n=0; n<ks->num_subtitles; ++n) free(ks->subtitles[n].text);
         free(ks->subtitles);
+        free(ks->subtitles_encoding);
     }
     free(this->kate_streams);
 }

Modified: trunk/ffmpeg2theora/src/subtitles.h
===================================================================
--- trunk/ffmpeg2theora/src/subtitles.h	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/subtitles.h	2009-07-26 17:46:00 UTC (rev 16339)
@@ -15,6 +15,7 @@
 
 #define SUPPORTED_ENCODINGS "utf-8, utf8, iso-8859-1, latin1"
 
+extern int is_valid_encoding(const char *encoding);
 extern void add_kate_stream(ff2theora this);
 extern int load_subtitles(ff2theora_kate_stream *this, int ignore_non_utf8, FILE *frontend);
 extern void free_subtitles(ff2theora this);
@@ -24,7 +25,7 @@
 extern void set_subtitles_file(ff2theora this,const char *filename);
 extern void set_subtitles_language(ff2theora this,const char *language);
 extern void set_subtitles_category(ff2theora this,const char *category);
-extern void set_subtitles_encoding(ff2theora this,F2T_ENCODING encoding);
+extern void set_subtitles_encoding(ff2theora this,const char *encoding);
 extern void report_unknown_subtitle_encoding(const char *name, FILE *frontend);
 
 #endif

Modified: trunk/ffmpeg2theora/subtitles.txt
===================================================================
--- trunk/ffmpeg2theora/subtitles.txt	2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/subtitles.txt	2009-07-26 17:46:00 UTC (rev 16339)
@@ -14,6 +14,8 @@
 Kate streams. Those SubRip files must be encoded in UTF-8 (7 bit ASCII
 is a subset of UTF-8 so is valid input as well). See below for more
 information on converting SubRip files with other encodings to UTF-8.
+ffmpeg2theora can convert files to UTF-8 transparently if build with
+a C library that supports iconv.
 
 Subtitles support requires libkate, available from:
 http://code.google.com/p/libkate
@@ -79,6 +81,8 @@
 
  * Converting non-UTF-8 files to UTF-8
 
+If ffmpeg2theora wasn't build with iconv support, only UTF-8 and latin1
+input text is supported.
 If you have SubRip files in another format than UTF-8, you can use the
 iconv or recode programs to convert them to UTF-8 so ffmpeg2theora can
 read them.



More information about the commits mailing list