[xiph-cvs] cvs commit: vorbis-tools/ogginfo ogginfo2.c
Michael Smith
msmith at xiph.org
Sun Jun 30 08:43:05 PDT 2002
msmith 02/06/30 08:43:04
Modified: ogginfo ogginfo2.c
Log:
Checking for vorbis comment header validity. UTF-8 code is yucky.
Revision Changes Path
1.4 +179 -10 vorbis-tools/ogginfo/ogginfo2.c
Index: ogginfo2.c
===================================================================
RCS file: /usr/local/cvsroot/vorbis-tools/ogginfo/ogginfo2.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- ogginfo2.c 2002/06/23 06:28:57 1.3
+++ ogginfo2.c 2002/06/30 15:43:04 1.4
@@ -30,7 +30,6 @@
#define INT64FORMAT "%Ld"
#endif
-
/* TODO:
*
* - detect decreasing granulepos
@@ -47,6 +46,8 @@
int isillegal;
int shownillegal;
int isnew;
+ long seqno;
+ int lostseq;
int start;
int end;
@@ -142,6 +143,11 @@
header = 1;
inf->doneheaders++;
if(inf->doneheaders == 3) {
+ if(ogg_page_granulepos(page) != 0 || ogg_page_continued(page))
+ warn(_("Warning: Vorbis stream %d does not have headers "
+ "correctly framed. Terminal header page contains "
+ "additional packets or has non-zero granulepos\n"),
+ stream->num);
info(_("Vorbis headers parsed for stream %d, "
"information follows...\n"), stream->num);
@@ -171,8 +177,147 @@
if(inf->vc.comments > 0)
info(_("User comments section follows...\n"));
- for(i=0; i < inf->vc.comments; i++)
- info("\t%s\n", inf->vc.user_comments[i]);
+ for(i=0; i < inf->vc.comments; i++) {
+ char *sep = strchr(inf->vc.user_comments[i], '=');
+ char *decoded;
+ int j;
+ int broken = 0;
+ unsigned char *val;
+ int bytes;
+ int remaining;
+
+ if(sep == NULL) {
+ warn(_("Warning: Comment %d in stream %d is invalidly "
+ "formatted, does not contain '=': \"%s\"\n"),
+ i, stream->num, inf->vc.user_comments[i]);
+ continue;
+ }
+
+ for(j=0; j < sep-inf->vc.user_comments[i]; j++) {
+ if(inf->vc.user_comments[i][j] < 0x20 ||
+ inf->vc.user_comments[i][j] > 0x7D) {
+ warn(_("Warning: Invalid comment fieldname in "
+ "comment %d (stream %d): \"%s\"\n"),
+ i, stream->num, inf->vc.user_comments[i]);
+ broken = 1;
+ break;
+ }
+ }
+
+ if(broken)
+ continue;
+
+ val = inf->vc.user_comments[i];
+
+ j = sep-inf->vc.user_comments[i]+1;
+ while(j < inf->vc.comment_lengths[i])
+ {
+ remaining = inf->vc.comment_lengths[i] - j;
+ if((val[j] & 0x80) == 0)
+ bytes = 1;
+ else if((val[j] & 0x40) == 0)
+ bytes = 2;
+ else if((val[j] & 0x20) == 0)
+ bytes = 3;
+ else if((val[j] & 0x10) == 0)
+ bytes = 4;
+ else if((val[j] & 0x08) == 0)
+ bytes = 5;
+ else if((val[j] & 0x04) == 0)
+ bytes = 6;
+ else {
+ warn(_("Warning: Illegal UTF-8 sequence in comment "
+ "%d (stream %d): length marker wrong\n"),
+ i, stream->num);
+ broken = 1;
+ break;
+ }
+
+ if(bytes > remaining) {
+ warn(_("Warning: Illegal UTF-8 sequence in comment "
+ "%d (stream %d): too few bytes\n"),
+ i, stream->num);
+ broken = 1;
+ break;
+ }
+
+ switch(bytes) {
+ case 1:
+ /* No more checks needed */
+ break;
+ case 2:
+ if((val[j+1] & 0xC0) != 0x80)
+ broken = 1;
+ if((val[j] & 0xFE) == 0xC0)
+ broken = 1;
+ break;
+ case 3:
+ if(!((val[j] == 0xE0 && val[j+1] >= 0xA0 &&
+ val[j+1] <= 0xBF &&
+ (val[j+2] & 0xC0) == 0x80) ||
+ (val[j] >= 0xE1 && val[j] <= 0xEC &&
+ (val[j+1] & 0xC0) == 0x80 &&
+ (val[j+2] & 0xC0) == 0x80) ||
+ (val[j] == 0xED && val[j+1] >= 0x80 &&
+ val[j+1] <= 0x9F &&
+ (val[j+2] & 0xC0) == 0x80) ||
+ (val[j] >= 0xEE && val[j] <= 0xEF &&
+ (val[j+1] & 0xC0) == 0x80 &&
+ (val[j+2] & 0xC0) == 0x80)))
+ broken = 1;
+ if(val[j] == 0xE0 && (val[j+1] & 0xE0) == 0x80)
+ broken = 1;
+ break;
+ case 4:
+ if(!((val[j] == 0xF0 && val[j+1] >= 0x90 &&
+ val[j+1] <= 0xBF &&
+ (val[j+2] & 0xC0) == 0x80 &&
+ (val[j+3] & 0xC0) == 0x80) ||
+ (val[j] >= 0xF1 && val[j] <= 0xF3 &&
+ (val[j+1] & 0xC0) == 0x80 &&
+ (val[j+2] & 0xC0) == 0x80 &&
+ (val[j+3] & 0xC0) == 0x80) ||
+ (val[j] == 0xF4 && val[j+1] >= 0x80 &&
+ val[j+1] <= 0x8F &&
+ (val[j+2] & 0xC0) == 0x80 &&
+ (val[j+3] & 0xC0) == 0x80)))
+ broken = 1;
+ if(val[j] == 0xF0 && (val[j+1] & 0xF0) == 0x80)
+ broken = 1;
+ break;
+ /* 5 and 6 aren't actually allowed at this point*/
+ case 5:
+ broken = 1;
+ break;
+ case 6:
+ broken = 1;
+ break;
+ }
+
+ if(broken) {
+ warn(_("Warning: Illegal UTF-8 sequence in comment "
+ "%d (stream %d): invalid sequence\n"),
+ i, stream->num);
+ broken = 1;
+ break;
+ }
+
+ j += bytes;
+ }
+
+ if(!broken) {
+ /* A hack around brokenness in the utf8 decoder */
+ if(strlen(sep+1) == 0)
+ decoded = sep+1;
+ else if(utf8_decode(sep+1, &decoded) < 0) {
+ warn(_("Warning: Failure in utf8 decoder. This "
+ "should be impossible\n"));
+ continue;
+ }
+ *sep = 0;
+ info("\t%s=%s\n", inf->vc.user_comments[i], decoded);
+ }
+ }
}
}
}
@@ -370,7 +515,8 @@
return stream;
}
-static int get_next_page(FILE *f, ogg_sync_state *sync, ogg_page *page)
+static int get_next_page(FILE *f, ogg_sync_state *sync, ogg_page *page,
+ ogg_int64_t *written)
{
int ret;
char *buffer;
@@ -378,13 +524,17 @@
while((ret = ogg_sync_pageout(sync, page)) <= 0) {
if(ret < 0)
- warn(_("Warning: Hole in data found. Corrupted ogg\n"));
+ warn(_("Warning: Hole in data found at approximate offset "
+ INT64FORMAT " bytes. Corrupted ogg.\n"), *written);
buffer = ogg_sync_buffer(sync, CHUNK);
bytes = fread(buffer, 1, CHUNK, f);
- ogg_sync_wrote(sync, bytes);
- if(bytes == 0)
+ if(bytes <= 0) {
+ ogg_sync_wrote(sync, 0);
return 0;
+ }
+ ogg_sync_wrote(sync, bytes);
+ *written += bytes;
}
return 1;
@@ -395,6 +545,8 @@
ogg_sync_state sync;
ogg_page page;
stream_set *processors = create_stream_set();
+ int gotpage = 0;
+ ogg_int64_t written = 0;
if(!file) {
error(_("Error opening input file \"%s\": %s\n"), filename,
@@ -402,11 +554,12 @@
return;
}
- info(_("Processing file \"%s\"...\n\n"), filename);
+ printf(_("Processing file \"%s\"...\n\n"), filename);
ogg_sync_init(&sync);
- while(get_next_page(file, &sync, &page)) {
+ while(get_next_page(file, &sync, &page, &written)) {
+ gotpage = 1;
stream_processor *p = find_stream_processor(processors, &page);
if(!p) {
@@ -432,6 +585,17 @@
warn(_("Warning: stream start flag found in mid-stream "
"on stream %d\n"), p->num);
+ if(p->seqno++ != ogg_page_pageno(&page)) {
+ if(!p->lostseq)
+ warn(_("Warning: sequence number gap in stream %d. Got page "
+ "%ld when expecting page %ld. Indicates missing data.\n"
+ ), p->num, ogg_page_pageno(&page), p->seqno - 1);
+ p->seqno = ogg_page_pageno(&page);
+ p->lostseq = 1;
+ }
+ else
+ p->lostseq = 0;
+
if(!p->isillegal) {
p->process_page(p, &page);
@@ -444,6 +608,10 @@
}
}
+ if(!gotpage)
+ error(_("Error: No ogg data found in file \"%s\".\n"
+ "Input probably not ogg.\n"), filename);
+
free_stream_set(processors);
ogg_sync_clear(&sync);
@@ -501,7 +669,8 @@
printwarn = 0;
if(optind >= argc) {
- fprintf(stderr, _("No input files specified. \"ogginfo -h\" for help\n"));
+ fprintf(stderr,
+ _("No input files specified. \"ogginfo -h\" for help\n"));
return 1;
}
<p><p><p>--- >8 ----
List archives: http://www.xiph.org/archives/
Ogg project homepage: http://www.xiph.org/ogg/
To unsubscribe from this list, send a message to 'cvs-request at xiph.org'
containing only the word 'unsubscribe' in the body. No subject is needed.
Unsubscribe messages sent to the list will be ignored/filtered.
More information about the commits
mailing list