[xiph-commits] r16339 - in trunk/ffmpeg2theora: . frontend frontend/theoraenc src
j at svn.xiph.org
j at svn.xiph.org
Sun Jul 26 10:46:00 PDT 2009
Author: j
Date: 2009-07-26 10:46:00 -0700 (Sun, 26 Jul 2009)
New Revision: 16339
Modified:
trunk/ffmpeg2theora/SConstruct
trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py
trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py
trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py
trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py
trunk/ffmpeg2theora/src/ffmpeg2theora.c
trunk/ffmpeg2theora/src/ffmpeg2theora.h
trunk/ffmpeg2theora/src/subtitles.c
trunk/ffmpeg2theora/src/subtitles.h
trunk/ffmpeg2theora/subtitles.txt
Log:
if available, user iconv for character conversion, patch from ogg.k
Modified: trunk/ffmpeg2theora/SConstruct
===================================================================
--- trunk/ffmpeg2theora/SConstruct 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/SConstruct 2009-07-26 17:46:00 UTC (rev 16339)
@@ -149,6 +149,12 @@
You can also run ./get_libkate.sh (for more information see INSTALL)
or update PKG_CONFIG_PATH to point to libkate's source folder
"""
+
+if conf.CheckCHeader('iconv.h'):
+ env.Append(CCFLAGS=[
+ '-DHAVE_ICONV'
+ ])
+
env = conf.Finish()
# ffmpeg2theora
Modified: trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py
===================================================================
--- trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/Simple Theora Encoder.py 2009-07-26 17:46:00 UTC (rev 16339)
@@ -216,7 +216,7 @@
self.removeItem.Enable()
def OnClickAdd(self, event):
- result = addVideoDialog(self, theoraenc.hasKate)
+ result = addVideoDialog(self, theoraenc.hasKate, theoraenc.hasIconv)
time.sleep(0.5)
if result['ok']:
self.addItemToQueue(result['videoFile'], result)
Modified: trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py
===================================================================
--- trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/theoraenc/addSubtitlesDialog.py 2009-07-26 17:46:00 UTC (rev 16339)
@@ -4,6 +4,7 @@
import os
from os.path import basename
import time
+import subprocess
import wx
#import wx.lib.langlistctrl
@@ -23,7 +24,7 @@
class SubtitlesProperties(wx.Dialog):
def __init__(
self, parent, ID, title,
- language, category, encoding, file,
+ language, category, encoding, file, hasIconv,
size=wx.DefaultSize, pos=wx.DefaultPosition,
style=wx.DEFAULT_DIALOG_STYLE,
):
@@ -31,6 +32,8 @@
pre.Create(parent, ID, title, pos, size, style)
self.PostCreate(pre)
+ self.hasIconv = hasIconv
+
# defaults
if language == '':
language = 'en'
@@ -64,8 +67,10 @@
self.addProperty(mainBox, 'Category', self.categoryWidget, self.OnCategoryHelp)
# encoding
- encodings = ['UTF-8', 'ISO-8859-1']
- self.encodingWidget = wx.Choice(self, -1, (80,-1), choices=encodings, name=encoding)
+ if hasIconv:
+ self.encodingWidget = wx.ComboBox(self, -1, encoding, (80,-1), wx.DefaultSize, self.BuildEncodingsList(self.hasIconv), wx.CB_SIMPLE)
+ else:
+ self.encodingWidget = wx.Choice(self, -1, (80,-1), choices=self.BuildEncodingsList(self.hasIconv))
self.addProperty(mainBox, 'Encoding', self.encodingWidget, self.OnEncodingHelp)
#Buttons
@@ -134,11 +139,15 @@
'If the language tag needed is not available in the list, a custom one may be entered.\n')
def OnEncodingHelp(self, event):
+ iconv_blurb = ''
+ if self.hasIconv:
+ iconv_blurb = 'ffmpeg2theora was built with iconv support, so can also convert any encoding that is supported by iconv.\n'
self.DisplayHelp(
'Kate streams are encoded in UTF-8 (a Unicode character encoding that allows to represent '+
'pretty much any existing script.\n'+
'If the input file is not already encoded in UTF-8, it will need converting to UTF-8 first.\n'+
'ffmpeg2theora can convert ISO-8859-1 (also known as latin1) encoding directly.\n'+
+ iconv_blurb+
'Files in other encodings will have to be converted manually in order to be used. See the '+
'subtitles.txt documentation for more information on how to manually convert files.\n')
@@ -175,16 +184,13 @@
# add in whatever's known from 'locale -a' - this works fine if locale isn't found,
# but i'm not sure what that'll do if we get another program named locale that spews
# random stuff to stdout :)
- f = os.popen('locale -a')
- line = f.readline()
- while line:
+ p = subprocess.Popen(['locale', '-a'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+ data, err = p.communicate()
+
+ for line in data.strip().split('\n'):
line = self.ExtractLanguage(line)
- if line != '' and line != 'C' and line != 'POSIX':
+ if line != '' and line != 'C' and line != 'POSIX' and line not in languages:
languages.append(line)
- line = f.readline()
- f.close()
- #oneliner from german python forum => unique list
- languages = [languages[i] for i in xrange(len(languages)) if languages[i] not in languages[:i]]
languages.sort()
return languages
@@ -197,8 +203,23 @@
line = line.split('\r')[0] # Mac or Windows
return line
-def addSubtitlesPropertiesDialog(parent, language, category, encoding, file):
- dlg = SubtitlesProperties(parent, -1, "Add subtitles", language, category, encoding, file, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
+ def BuildEncodingsList(self, hasIconv):
+ # start with a known basic set, that ffmpeg2theora can handle without iconv
+ encodings = ['UTF-8', 'ISO-8859-1']
+
+ # this creates a *huge* spammy list with my version of iconv...
+ if hasIconv:
+ # add in whatever iconv knows about
+ p = subprocess.Popen(['iconv', '-l'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+ data, stderr = p.communicate()
+ for line in data.strip().split('\n'):
+ line = line.split('/')[0] # stop at a /
+ if not line in encodings:
+ encodings.append(line)
+ return encodings
+
+def addSubtitlesPropertiesDialog(parent, language, category, encoding, file, hasIconv):
+ dlg = SubtitlesProperties(parent, -1, "Add subtitles", language, category, encoding, file, hasIconv, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
dlg.CenterOnScreen()
val = dlg.ShowModal()
result = dict()
@@ -211,7 +232,10 @@
# result['subtitlesLanguage'] = dlg.languageWidget.GetValue()
result['subtitlesLanguage'] = dlg.languageWidget.GetValue()
result['subtitlesCategory'] = dlg.categoryWidget.GetValue()
- result['subtitlesEncoding'] = dlg.encodingWidget.GetStringSelection()
+ if hasIconv:
+ result['subtitlesEncoding'] = dlg.encodingWidget.GetValue()
+ else:
+ result['subtitlesEncoding'] = dlg.encodingWidget.GetStringSelection()
print result
else:
result['ok'] = False
Modified: trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py
===================================================================
--- trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/theoraenc/addVideoDialog.py 2009-07-26 17:46:00 UTC (rev 16339)
@@ -12,11 +12,14 @@
class AddVideoDialog(wx.Dialog):
def __init__(
- self, parent, ID, title, hasKate, size=wx.DefaultSize, pos=wx.DefaultPosition,
+ self, parent, ID, title, hasKate, hasIconv,
+ size=wx.DefaultSize, pos=wx.DefaultPosition,
style=wx.DEFAULT_DIALOG_STYLE,
):
self.videoFile = ''
+ self.hasKate = hasKate
+ self.hasIconv = hasIconv
pre = wx.PreDialog()
#pre.SetExtraStyle(wx.DIALOG_EX_CONTEXTHELP)
@@ -360,7 +363,7 @@
category = self.subtitles.GetItem(idx, 1).GetText()
encoding = self.subtitles.GetItem(idx, 2).GetText()
file = self.subtitles.GetItem(idx, 3).GetText()
- result = addSubtitlesPropertiesDialog(self, language, category, encoding, file)
+ result = addSubtitlesPropertiesDialog(self, language, category, encoding, file, self.hasIconv)
time.sleep(0.5) # why ? race condition ?
if result['ok']:
self.subtitles.SetStringItem(idx, 0, result['subtitlesLanguage'])
@@ -372,8 +375,8 @@
return False
-def addVideoDialog(parent, hasKate):
- dlg = AddVideoDialog(parent, -1, "Add Video", hasKate, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
+def addVideoDialog(parent, hasKate, hasIconv):
+ dlg = AddVideoDialog(parent, -1, "Add Video", hasKate, hasIconv, size=(490, 560), style=wx.DEFAULT_DIALOG_STYLE)
dlg.CenterOnScreen()
val = dlg.ShowModal()
result = dict()
Modified: trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py
===================================================================
--- trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/frontend/theoraenc/theoraenc.py 2009-07-26 17:46:00 UTC (rev 16339)
@@ -31,16 +31,20 @@
def probe_kate(ffmpeg2theora):
hasKate = False
- cmd = ffmpeg2theora + ' --help'
- f = os.popen(cmd)
- line = f.readline()
- while line:
- if line.find('Subtitles options:') >= 0:
- hasKate = True
- line = f.readline()
- f.close()
+ p = subprocess.Popen([ffmpeg2theora, '--help'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+ data, err = p.communicate()
+ if 'Subtitles options:' in data:
+ hasKate = True
return hasKate
+def probe_iconv(ffmpeg2theora):
+ hasIconv = False
+ p = subprocess.Popen([ffmpeg2theora, '--help'], shell=False, stdout=subprocess.PIPE, close_fds=True)
+ data, err = p.communicate()
+ if 'supported are all encodings supported by iconv' in data:
+ hasIconv = True
+ return hasIconv
+
def timestr(seconds):
hours = int(seconds/3600)
minutes = int((seconds-( hours*3600 ))/60)
@@ -175,4 +179,5 @@
ffmpeg2theora = probe_ffmpeg2theora()
hasKate = probe_kate(ffmpeg2theora)
+hasIconv = probe_iconv(ffmpeg2theora)
Modified: trunk/ffmpeg2theora/src/ffmpeg2theora.c
===================================================================
--- trunk/ffmpeg2theora/src/ffmpeg2theora.c 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/ffmpeg2theora.c 2009-07-26 17:46:00 UTC (rev 16339)
@@ -1637,7 +1637,11 @@
"Subtitles options:\n"
" --subtitles file use subtitles from the given file (SubRip (.srt) format)\n"
" --subtitles-encoding encoding set encoding of the subtitles file\n"
+#ifdef HAVE_ICONV
+ " supported are all encodings supported by iconv (see iconv help for list)\n"
+#else
" supported are " SUPPORTED_ENCODINGS "\n"
+#endif
" --subtitles-language language set subtitles language (de, en_GB, etc)\n"
" --subtitles-category category set subtitles category (default \"subtitles\")\n"
" --subtitles-ignore-non-utf8 ignores any non UTF-8 sequence in UTF-8 text\n"
@@ -1900,11 +1904,12 @@
info.with_kate=1;
break;
case SUBTITLES_ENCODING_FLAG:
- if (!strcasecmp(optarg,"utf-8")) set_subtitles_encoding(convert,ENC_UTF8);
- else if (!strcasecmp(optarg,"utf8")) set_subtitles_encoding(convert,ENC_UTF8);
- else if (!strcasecmp(optarg,"iso-8859-1")) set_subtitles_encoding(convert,ENC_ISO_8859_1);
- else if (!strcasecmp(optarg,"latin1")) set_subtitles_encoding(convert,ENC_ISO_8859_1);
- else report_unknown_subtitle_encoding(optarg, info.frontend);
+ if (is_valid_encoding(optarg)) {
+ set_subtitles_encoding(convert,optarg);
+ }
+ else {
+ report_unknown_subtitle_encoding(optarg, info.frontend);
+ }
flag = -1;
break;
case SUBTITLES_IGNORE_NON_UTF8_FLAG:
Modified: trunk/ffmpeg2theora/src/ffmpeg2theora.h
===================================================================
--- trunk/ffmpeg2theora/src/ffmpeg2theora.h 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/ffmpeg2theora.h 2009-07-26 17:46:00 UTC (rev 16339)
@@ -3,13 +3,6 @@
#include "subtitles.h"
-typedef enum {
- ENC_UNSET,
- ENC_UTF8,
- ENC_ISO_8859_1,
-} F2T_ENCODING;
-
-
typedef struct ff2theora_subtitle{
char *text;
size_t len;
@@ -28,7 +21,7 @@
/* this block valid for all subtitle sources */
size_t subtitles_count; /* total subtitles output so far */
- F2T_ENCODING subtitles_encoding;
+ char *subtitles_encoding;
char subtitles_language[16];
char subtitles_category[16];
} ff2theora_kate_stream;
Modified: trunk/ffmpeg2theora/src/subtitles.c
===================================================================
--- trunk/ffmpeg2theora/src/subtitles.c 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/subtitles.c 2009-07-26 17:46:00 UTC (rev 16339)
@@ -27,6 +27,9 @@
#include <math.h>
#include <errno.h>
#include <stdarg.h>
+#ifdef HAVE_ICONV
+#include "iconv.h"
+#endif
#include "libavformat/avformat.h"
@@ -59,6 +62,27 @@
}
/**
+ * checks whether we support the encoding
+ */
+int is_valid_encoding(const char *encoding)
+{
+#ifdef HAVE_ICONV
+ iconv_t cd = iconv_open("UTF-8", encoding);
+ if (cd != (iconv_t)-1) {
+ iconv_close(cd);
+ return 1;
+ }
+ return 0;
+#else
+ if (!strcasecmp(encoding, "UTF-8")) return 1;
+ if (!strcasecmp(encoding, "UTF8")) return 1;
+ if (!strcasecmp(encoding, "iso-8859-1")) return 1;
+ if (!strcasecmp(encoding, "latin1")) return 1;
+ return 0;
+#endif
+}
+
+/**
* adds a new kate stream structure
*/
void add_kate_stream(ff2theora this){
@@ -70,7 +94,7 @@
ks->subtitles = 0;
ks->stream_index = -1;
ks->subtitles_count = 0; /* denotes not set yet */
- ks->subtitles_encoding = ENC_UNSET;
+ ks->subtitles_encoding = NULL;
strcpy(ks->subtitles_language, "");
strcpy(ks->subtitles_category, "");
}
@@ -136,13 +160,13 @@
/**
* sets the encoding of the next subtitles file
*/
-void set_subtitles_encoding(ff2theora this,F2T_ENCODING encoding){
+void set_subtitles_encoding(ff2theora this,const char *encoding){
size_t n;
for (n=0; n<this->n_kate_streams;++n) {
- if (this->kate_streams[n].stream_index==-1 && this->kate_streams[n].subtitles_encoding==ENC_UNSET) break;
+ if (this->kate_streams[n].stream_index==-1 && !this->kate_streams[n].subtitles_encoding) break;
}
if (n==this->n_kate_streams) add_kate_stream(this);
- this->kate_streams[n].subtitles_encoding = encoding;
+ this->kate_streams[n].subtitles_encoding = strdup(encoding);
}
@@ -175,23 +199,27 @@
}
/* very simple implementation when no iconv */
-static char *convert_subtitle_to_utf8(F2T_ENCODING encoding,char *text,int ignore_non_utf8, FILE *frontend)
+static char *convert_subtitle_to_utf8(const char *encoding,char *text,int ignore_non_utf8, FILE *frontend)
{
size_t nbytes;
char *ptr;
char *newtext = NULL;
int errors=0;
+#ifdef HAVE_ICONV
+ iconv_t cd;
+#endif
if (!text) return NULL;
- switch (encoding) {
- case ENC_UNSET:
- /* we don't know what encoding this is, assume UTF-8 and we'll yell if it ain't */
- /* fall through */
- case ENC_UTF8:
+ if (encoding == NULL) {
+ /* we don't know what encoding this is, assume UTF-8 and we'll yell if it ain't */
+ encoding = "UTF-8";
+ }
+
+ if (!strcasecmp(encoding, "UTF-8") || !strcasecmp(encoding, "UTF8")) {
/* nothing to do, already in UTF-8 */
if (ignore_non_utf8) {
- /* actually, give the user the option of just ignoring non UTF8 characters */
+ /* actually, give the user the option of just ignoring non UTF-8 characters */
char *wptr;
size_t wlen0;
@@ -231,8 +259,40 @@
else {
newtext = strdup(text);
}
- break;
- case ENC_ISO_8859_1:
+
+ return newtext;
+ }
+
+ /* now, we can either use iconv, or convert ISO-8859-1 by hand (so to speak) */
+#ifdef HAVE_ICONV
+ /* create a conversion for each string, it avoids having to pass around this descriptor,
+ and the speed hit will be irrelevant anyway compared to video decoding/encoding.
+ that's fine, because we don't need to keep state across subtitles. */
+ cd = iconv_open("UTF-8", encoding);
+ if (cd != (iconv_t)-1) {
+ /* iconv doesn't seem to have a mode to do a dummy convert to just return the number
+ of bytes needed, so we just allocate 6 times the number of bytes in the string,
+ which should be the max we need for UTF-8 */
+ size_t insz=strlen(text)+1;
+ size_t outsz = insz*6;
+ char *inptr = text, *outptr;
+ newtext = (char*)malloc(outsz);
+ if (!newtext) {
+ warn(frontend, NULL, 0, "Memory allocation failed - cannot convert text\n");
+ iconv_close(cd);
+ return NULL;
+ }
+ outptr=newtext;
+ if (iconv(cd, &inptr, &insz, &outptr, &outsz) < 0) {
+ warn(frontend, NULL, 0, "Failed to convert text to UTF-8\n");
+ free(newtext);
+ newtext = NULL;
+ }
+ iconv_close(cd);
+ }
+
+#else
+ if (!strcasecmp(encoding, "iso-8859-1") || !strcasecmp(encoding, "latin1")) {
/* simple, characters above 0x7f are broken in two,
and code points map to the iso-8859-1 8 bit codes */
nbytes=0;
@@ -256,11 +316,11 @@
}
}
newtext[nbytes++]=0;
- break;
- default:
+ }
+#endif
+ else {
warn(frontend, NULL, 0, "encoding %d not handled in conversion!", encoding);
newtext = strdup("");
- break;
}
return newtext;
}
@@ -357,7 +417,7 @@
/* we have all the lines for that subtitle, remove the last \n */
remove_last_newline(text);
- /* we want all text to be UTF8 */
+ /* we want all text to be UTF-8 */
utf8=convert_subtitle_to_utf8(this->subtitles_encoding,text,ignore_non_utf8, frontend);
if (!utf8) {
warn(frontend, this->filename, line, "Failed to get UTF-8 text");
@@ -479,6 +539,7 @@
ff2theora_kate_stream *ks=this->kate_streams+i;
for (n=0; n<ks->num_subtitles; ++n) free(ks->subtitles[n].text);
free(ks->subtitles);
+ free(ks->subtitles_encoding);
}
free(this->kate_streams);
}
Modified: trunk/ffmpeg2theora/src/subtitles.h
===================================================================
--- trunk/ffmpeg2theora/src/subtitles.h 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/src/subtitles.h 2009-07-26 17:46:00 UTC (rev 16339)
@@ -15,6 +15,7 @@
#define SUPPORTED_ENCODINGS "utf-8, utf8, iso-8859-1, latin1"
+extern int is_valid_encoding(const char *encoding);
extern void add_kate_stream(ff2theora this);
extern int load_subtitles(ff2theora_kate_stream *this, int ignore_non_utf8, FILE *frontend);
extern void free_subtitles(ff2theora this);
@@ -24,7 +25,7 @@
extern void set_subtitles_file(ff2theora this,const char *filename);
extern void set_subtitles_language(ff2theora this,const char *language);
extern void set_subtitles_category(ff2theora this,const char *category);
-extern void set_subtitles_encoding(ff2theora this,F2T_ENCODING encoding);
+extern void set_subtitles_encoding(ff2theora this,const char *encoding);
extern void report_unknown_subtitle_encoding(const char *name, FILE *frontend);
#endif
Modified: trunk/ffmpeg2theora/subtitles.txt
===================================================================
--- trunk/ffmpeg2theora/subtitles.txt 2009-07-26 14:17:54 UTC (rev 16338)
+++ trunk/ffmpeg2theora/subtitles.txt 2009-07-26 17:46:00 UTC (rev 16339)
@@ -14,6 +14,8 @@
Kate streams. Those SubRip files must be encoded in UTF-8 (7 bit ASCII
is a subset of UTF-8 so is valid input as well). See below for more
information on converting SubRip files with other encodings to UTF-8.
+ffmpeg2theora can convert files to UTF-8 transparently if build with
+a C library that supports iconv.
Subtitles support requires libkate, available from:
http://code.google.com/p/libkate
@@ -79,6 +81,8 @@
* Converting non-UTF-8 files to UTF-8
+If ffmpeg2theora wasn't build with iconv support, only UTF-8 and latin1
+input text is supported.
If you have SubRip files in another format than UTF-8, you can use the
iconv or recode programs to convert them to UTF-8 so ffmpeg2theora can
read them.
More information about the commits
mailing list