[xiph-commits] r15046 - in trunk/ffmpeg2theora: . src
j at svn.xiph.org
j at svn.xiph.org
Tue Jun 17 04:08:24 PDT 2008
Author: j
Date: 2008-06-17 04:08:24 -0700 (Tue, 17 Jun 2008)
New Revision: 15046
Modified:
trunk/ffmpeg2theora/
trunk/ffmpeg2theora/ffmpeg2theora.1
trunk/ffmpeg2theora/src/ffmpeg2theora.c
trunk/ffmpeg2theora/src/ffmpeg2theora.h
trunk/ffmpeg2theora/src/subtitles.c
trunk/ffmpeg2theora/src/subtitles.h
trunk/ffmpeg2theora/subtitles.txt
Log:
ignoring non utf8 sequence in what is claimed to be utf8. (thanks ogg.k.ogg.k)
Property changes on: trunk/ffmpeg2theora
___________________________________________________________________
Name: bzr:revision-info
- timestamp: 2008-06-17 13:03:55.016999960 +0200
committer: j
properties:
branch-nick: ffmpeg2theora
+ timestamp: 2008-06-17 13:05:18.197000027 +0200
committer: j
properties:
branch-nick: ffmpeg2theora
Name: bzr:revision-id:v3-single1-dHJ1bmsvZmZtcGVnMnRoZW9yYQ..
- 191 j-20080517230830-he5x8v2m8yrfiw35
192 j-20080518224037-pkmoctzf4qce7tog
193 j-20080518224409-6hbfp3k2ssn6egqa
194 j-20080520111939-dhi52qwbqe7a47cu
195 j-20080523092252-gj9k9db0s67vl7dw
196 j-20080523092420-l0850yrq1qkgz9t0
197 j-20080523093057-l5g0ezzy5geu0pey
198 j-20080523094343-kcno1dm2e1lr38q4
199 j-20080523163006-kjl6ewea5sxawmq2
200 j-20080523165904-l2vm52qae0hlqkhp
201 j-20080523175432-2ed953iktnl8c7cr
202 j-20080525100939-7oja8pk08v9fquiw
203 j-20080526111321-nhzaqh6ivzn0vs7b
204 j-20080527100851-2v5eyxxrq1riqi50
205 j-20080527101341-9ynbgth2b15jw792
206 j-20080527205556-19tffvfrxgt3khld
207 j-20080527205840-zeestdde3v1zks9k
208 j-20080527210129-e73y56uwmzbcid00
209 j-20080527211813-5ll680ed1q4byp16
210 j-20080528102006-aeippim0tn70mz3f
211 j-20080528104907-40kiidjojvta8j61
212 j-20080528111329-vkqbt7xkat2o9h4z
213 j-20080529102940-q9xdwm5v9espzomv
214 j-20080529111405-nmh99aon1kmh22qm
215 j-20080530094948-ncq064s4uggd9z95
216 j-20080530095056-hko2vjfwipikwjyu
217 j-20080530171822-bab8sy8lpotf8081
218 j-20080603170442-v0pxspvfcucvsaex
219 j-20080617110355-xwbeg1xidmv8fubp
+ 191 j-20080517230830-he5x8v2m8yrfiw35
192 j-20080518224037-pkmoctzf4qce7tog
193 j-20080518224409-6hbfp3k2ssn6egqa
194 j-20080520111939-dhi52qwbqe7a47cu
195 j-20080523092252-gj9k9db0s67vl7dw
196 j-20080523092420-l0850yrq1qkgz9t0
197 j-20080523093057-l5g0ezzy5geu0pey
198 j-20080523094343-kcno1dm2e1lr38q4
199 j-20080523163006-kjl6ewea5sxawmq2
200 j-20080523165904-l2vm52qae0hlqkhp
201 j-20080523175432-2ed953iktnl8c7cr
202 j-20080525100939-7oja8pk08v9fquiw
203 j-20080526111321-nhzaqh6ivzn0vs7b
204 j-20080527100851-2v5eyxxrq1riqi50
205 j-20080527101341-9ynbgth2b15jw792
206 j-20080527205556-19tffvfrxgt3khld
207 j-20080527205840-zeestdde3v1zks9k
208 j-20080527210129-e73y56uwmzbcid00
209 j-20080527211813-5ll680ed1q4byp16
210 j-20080528102006-aeippim0tn70mz3f
211 j-20080528104907-40kiidjojvta8j61
212 j-20080528111329-vkqbt7xkat2o9h4z
213 j-20080529102940-q9xdwm5v9espzomv
214 j-20080529111405-nmh99aon1kmh22qm
215 j-20080530094948-ncq064s4uggd9z95
216 j-20080530095056-hko2vjfwipikwjyu
217 j-20080530171822-bab8sy8lpotf8081
218 j-20080603170442-v0pxspvfcucvsaex
219 j-20080617110355-xwbeg1xidmv8fubp
220 j-20080617110518-khqlhaan52kz3lii
Modified: trunk/ffmpeg2theora/ffmpeg2theora.1
===================================================================
--- trunk/ffmpeg2theora/ffmpeg2theora.1 2008-06-17 11:05:54 UTC (rev 15045)
+++ trunk/ffmpeg2theora/ffmpeg2theora.1 2008-06-17 11:08:24 UTC (rev 15046)
@@ -166,6 +166,11 @@
this available to the user for selection. The default category is
"subtitles". Suggested other categories may include "transcript",
"commentary", "lyrics", etc.
+.TP
+.B \-\-subtitles-ignore-non-utf8
+When reading an utf-8 subtitles text file, any invalid utf-8 sequence
+will be ignored. This may be useful if there are stray sequences in
+an otherwise utf-8 file.
.SS Metadata options:
.TP
.B \-\-artist
Modified: trunk/ffmpeg2theora/src/ffmpeg2theora.c
===================================================================
--- trunk/ffmpeg2theora/src/ffmpeg2theora.c 2008-06-17 11:05:54 UTC (rev 15045)
+++ trunk/ffmpeg2theora/src/ffmpeg2theora.c 2008-06-17 11:08:24 UTC (rev 15046)
@@ -62,6 +62,7 @@
SUBTITLES_ENCODING_FLAG,
SUBTITLES_LANGUAGE_FLAG,
SUBTITLES_CATEGORY_FLAG,
+ SUBTITLES_IGNORE_NON_UTF8_FLAG,
VHOOK_FLAG,
FRONTEND_FLAG,
SPEEDLEVEL_FLAG,
@@ -174,6 +175,7 @@
this->n_kate_streams=0;
this->kate_streams=NULL;
+ this->ignore_non_utf8 = 0;
this->pix_fmt = PIX_FMT_YUV420P;
@@ -1182,6 +1184,7 @@
" supported are " SUPPORTED_ENCODINGS "\n"
" --subtitles-language language set subtitles language (de, en_GB, etc)\n"
" --subtitles-category category set subtitles category (default \"subtitles\")\n"
+ " --subtitles-ignore-non-utf8 ignores any non utf-8 sequence in utf-8 text\n"
"\n"
#endif
"Metadata options:\n"
@@ -1278,6 +1281,7 @@
{"audiostream",required_argument,&flag,AUDIOSTREAM_FLAG},
{"subtitles",required_argument,&flag,SUBTITLES_FLAG},
{"subtitles-encoding",required_argument,&flag,SUBTITLES_ENCODING_FLAG},
+ {"subtitles-ignore-non-utf8",0,&flag,SUBTITLES_IGNORE_NON_UTF8_FLAG},
{"subtitles-language",required_argument,&flag,SUBTITLES_LANGUAGE_FLAG},
{"subtitles-category",required_argument,&flag,SUBTITLES_CATEGORY_FLAG},
{"starttime",required_argument,NULL,'s'},
@@ -1407,6 +1411,10 @@
else report_unknown_subtitle_encoding(optarg);
flag = -1;
break;
+ case SUBTITLES_IGNORE_NON_UTF8_FLAG:
+ convert->ignore_non_utf8 = 1;
+ flag = -1;
+ break;
case SUBTITLES_LANGUAGE_FLAG:
if (strlen(optarg)>15) {
fprintf(stderr, "WARNING - language is limited to 15 characters, and will be truncated\n");
@@ -1424,6 +1432,7 @@
#else
case SUBTITLES_FLAG:
case SUBTITLES_ENCODING_FLAG:
+ case SUBTITLES_IGNORE_NON_UTF8_FLAG:
case SUBTITLES_LANGUAGE_FLAG:
case SUBTITLES_CATEGORY_FLAG:
fprintf(stderr, "WARNING - Kate support not compiled in, subtitles will not be output\n"
@@ -1683,7 +1692,7 @@
for (n=0; n<convert->n_kate_streams; ++n) {
ff2theora_kate_stream *ks=convert->kate_streams+n;
- if (load_subtitles(ks)>=0) {
+ if (load_subtitles(ks,convert->ignore_non_utf8)>=0) {
printf("Muxing Kate stream %d from %s as %s %s\n",
n,ks->filename,
ks->subtitles_language[0]?ks->subtitles_language:"<unknown language>",
Modified: trunk/ffmpeg2theora/src/ffmpeg2theora.h
===================================================================
--- trunk/ffmpeg2theora/src/ffmpeg2theora.h 2008-06-17 11:05:54 UTC (rev 15045)
+++ trunk/ffmpeg2theora/src/ffmpeg2theora.h 2008-06-17 11:08:24 UTC (rev 15046)
@@ -85,7 +85,8 @@
size_t n_kate_streams;
ff2theora_kate_stream *kate_streams;
-
+
+ int ignore_non_utf8;
// ffmpeg2theora --nosound -f dv -H 32000 -S 0 -v 8 -x 384 -y 288 -G 1.5 input.dv
double video_gamma;
double video_bright;
Modified: trunk/ffmpeg2theora/src/subtitles.c
===================================================================
--- trunk/ffmpeg2theora/src/subtitles.c 2008-06-17 11:05:54 UTC (rev 15045)
+++ trunk/ffmpeg2theora/src/subtitles.c 2008-06-17 11:08:24 UTC (rev 15046)
@@ -111,34 +111,81 @@
fprintf(stderr, " " SUPPORTED_ENCODINGS "\n");
}
-char *fgets2(char *s,size_t sz,FILE *f)
+#ifdef HAVE_KATE
+
+static char *fgets2(char *s,size_t sz,FILE *f)
{
char *ret = fgets(s, sz, f);
/* fixup DOS newline character */
char *ptr=strchr(s, '\r');
- if (ptr) *ptr='\n';
+ if (ptr) {
+ *ptr='\n';
+ *(ptr+1)=0;
+ }
return ret;
}
-double hmsms2s(int h,int m,int s,int ms)
+static double hmsms2s(int h,int m,int s,int ms)
{
return h*3600+m*60+s+ms/1000.0;
}
/* very simple implementation when no iconv */
-void convert_subtitle_to_utf8(F2T_ENCODING encoding,unsigned char *text)
+static void convert_subtitle_to_utf8(F2T_ENCODING encoding,unsigned char *text,int ignore_non_utf8)
{
size_t nbytes;
- unsigned char *ptr,*newtext;
+ char *ptr,*newtext;
+ int errors=0;
if (!text || !*text) return;
switch (encoding) {
case ENC_UNSET:
/* we don't know what encoding this is, assume utf-8 and we'll yell if it ain't */
- break;
+ /* fall through */
case ENC_UTF8:
/* nothing to do, already in utf-8 */
+ if (ignore_non_utf8) {
+ /* actually, give the user the option of just ignoring non UTF8 characters */
+ char *wptr;
+ size_t wlen0;
+
+ nbytes = strlen(text)+1;
+ newtext=(unsigned char*)malloc(nbytes);
+ if (!newtext) {
+ fprintf(stderr, "WARNING - Memory allocation failed - cannot convert text\n");
+ return;
+ }
+ ptr = text;
+ wptr = newtext;
+ wlen0 = nbytes;
+ while (nbytes>0) {
+ int ret=kate_text_get_character(kate_utf8, (const char ** const)&ptr, &nbytes);
+ if (ret>=0) {
+ /* valid character */
+ ret=kate_text_set_character(kate_utf8, ret, &wptr, &wlen0);
+ if (ret<0) {
+ fprintf(stderr, "WARNING - failed to filter utf8 text: %s\n", text);
+ free(newtext);
+ return;
+ }
+ if (ret==0) break;
+ }
+ else {
+ /* skip offending byte - we can't skip the terminating zero as we do byte by byte */
+ ++errors;
+ ++ptr;
+ --nbytes;
+ }
+ }
+
+ if (errors) {
+ fprintf(stderr, "WARNING - Found non utf8 character(s) in string %s, scrubbed out\n", text);
+ }
+
+ strcpy(text,newtext);
+ free(newtext);
+ }
break;
case ENC_ISO_8859_1:
/* simple, characters above 0x7f are broken in two,
@@ -150,7 +197,7 @@
}
newtext=(unsigned char*)malloc(1+nbytes);
if (!newtext) {
- fprintf(stderr, "Memory allocation failed - cannot convert text\n");
+ fprintf(stderr, "WARNING - Memory allocation failed - cannot convert text\n");
return;
}
nbytes=0;
@@ -173,8 +220,16 @@
}
}
-int load_subtitles(ff2theora_kate_stream *this)
+static void remove_last_newline(char *text)
{
+ char *ptr = text+strlen(text)-1;
+ if (*ptr=='\n') *ptr=0;
+}
+
+#endif
+
+int load_subtitles(ff2theora_kate_stream *this, int ignore_non_utf8)
+{
#ifdef HAVE_KATE
enum { need_id, need_timing, need_text };
int need = need_id;
@@ -236,7 +291,11 @@
break;
case need_text:
if (*str=='\n') {
- convert_subtitle_to_utf8(this->subtitles_encoding,(unsigned char*)text);
+ /* we have all the lines for that subtitle, remove the last \n */
+ remove_last_newline(text);
+
+ /* we want all text to be UTF8 */
+ convert_subtitle_to_utf8(this->subtitles_encoding,(unsigned char*)text,ignore_non_utf8);
size_t len = strlen(text);
this->subtitles = (ff2theora_subtitle*)realloc(this->subtitles, (this->num_subtitles+1)*sizeof(ff2theora_subtitle));
if (!this->subtitles) {
Modified: trunk/ffmpeg2theora/src/subtitles.h
===================================================================
--- trunk/ffmpeg2theora/src/subtitles.h 2008-06-17 11:05:54 UTC (rev 15045)
+++ trunk/ffmpeg2theora/src/subtitles.h 2008-06-17 11:08:24 UTC (rev 15046)
@@ -16,7 +16,7 @@
#define SUPPORTED_ENCODINGS "utf-8, utf8, iso-8859-1, latin1"
extern void add_kate_stream(ff2theora this);
-extern int load_subtitles(ff2theora_kate_stream *this);
+extern int load_subtitles(ff2theora_kate_stream *this, int ignore_non_utf8);
extern void free_subtitles(ff2theora this);
extern void set_subtitles_file(ff2theora this,const char *filename);
@@ -25,8 +25,5 @@
extern void set_subtitles_encoding(ff2theora this,F2T_ENCODING encoding);
extern void report_unknown_subtitle_encoding(const char *name);
-extern char *fgets2(char *s,size_t sz,FILE *f);
-extern double hmsms2s(int h,int m,int s,int ms);
-extern void convert_subtitle_to_utf8(F2T_ENCODING encoding,unsigned char *text);
#endif
Modified: trunk/ffmpeg2theora/subtitles.txt
===================================================================
--- trunk/ffmpeg2theora/subtitles.txt 2008-06-17 11:05:54 UTC (rev 15045)
+++ trunk/ffmpeg2theora/subtitles.txt 2008-06-17 11:08:24 UTC (rev 15046)
@@ -1,9 +1,10 @@
-Subtitles can be embedded in an Ogg stream alongside a Theora video.
+Text subtitles can be embedded in an Ogg stream alongside a Theora video.
* Overview
* Subtitles related options
* Converting non-utf-8 files to utf-8
* Examples
+ * Playing subtitles
@@ -60,8 +61,14 @@
converting other encoding to utf-8.
If unspecified, the default is utf-8.
+--subtitles-ignore-non-utf8
+ Any invalid sequence in utf-8 text will be ignored. This may be useful
+ when using an utf-8 file with stray non utf-8 characters. This is not
+ a substitute for converting a non utf-8 file to utf-8, however, as the
+ non utf-8 sequence will be missing from the output stream.
+
* Converting non-utf-8 files to utf-8
If you have SubRip files in another format than utf-8, you can use the
@@ -129,3 +136,9 @@
input.avi
+ * Playing subtitles
+
+At the moment, only VLC has playback support for Kate streams. However, the
+libkate distribution includes patches for other players and media frameworks
+(MPlayer, GStreamer).
+
More information about the commits
mailing list