[xiph-commits] r16465 - branches/theora-monty-post-1-1/examples

Tue Aug 11 20:35:33 PDT 2009

Author: xiphmont
Date: 2009-08-11 20:35:33 -0700 (Tue, 11 Aug 2009)
New Revision: 16465

Modified:
   branches/theora-monty-post-1-1/examples/Makefile.am
   branches/theora-monty-post-1-1/examples/encoder_example.c
Log:
Add y4o support to encoder_example

Clean up encoder example decode flow slightly (mostly in the actual 
encode loop)



Modified: branches/theora-monty-post-1-1/examples/Makefile.am
===================================================================

--- branches/theora-monty-post-1-1/examples/Makefile.am	2009-08-12 03:10:31 UTC (rev 16464)
+++ branches/theora-monty-post-1-1/examples/Makefile.am	2009-08-12 03:35:33 UTC (rev 16465)
@@ -24,7 +24,7 @@
 player_example_CFLAGS = $(SDL_CFLAGS) $(OGG_CFLAGS) $(VORBIS_CFLAGS)
 player_example_LDADD = $(LDADDDEC) $(SDL_LIBS) $(VORBIS_LIBS) $(OSS_LIBS)
 
-encoder_example_SOURCES = encoder_example.c
+encoder_example_SOURCES = encoder_example.c y4o.c y4o.h
 EXTRA_encoder_example_SOURCES = getopt.c getopt1.c getopt.h
 encoder_example_CFLAGS = $(OGG_CFLAGS) $(VORBIS_CFLAGS)
 encoder_example_LDADD = $(GETOPT_OBJS) $(LDADDENC) $(VORBIS_LIBS) $(VORBISENC_LIBS)

Modified: branches/theora-monty-post-1-1/examples/encoder_example.c
===================================================================
--- branches/theora-monty-post-1-1/examples/encoder_example.c	2009-08-12 03:10:31 UTC (rev 16464)
+++ branches/theora-monty-post-1-1/examples/encoder_example.c	2009-08-12 03:35:33 UTC (rev 16465)
@@ -46,7 +46,15 @@
 #include "theora/theoraenc.h"
 #include "vorbis/codec.h"
 #include "vorbis/vorbisenc.h"
+#include "y4o.h"
 
+/* Don't allow the audio/video buffers to get deeper than N seconds
+   when buffering audio and video from the same interleaved input
+   stream (eg, y4o).  This prevents eating arbitrary amounts of memory
+   futilely trying to buffer a stream that has wandered way out of
+   sync */
+#define MAX_BUFFER_SKEW 15.0
+
 #ifdef _WIN32
 /*supply missing headers and functions to Win32. going to hell, I know*/
 #include <fcntl.h>
@@ -89,9 +97,14 @@
 
 FILE *audio=NULL;
 FILE *video=NULL;
+y4o_in_t *y4o_audio=NULL;
+y4o_in_t *y4o_video=NULL;
+int y4o_audio_stream=-1;
+int y4o_video_stream=-1;
 
 int audio_ch=0;
 int audio_hz=0;
+int audio_b=0;
 
 float audio_q=.1f;
 int audio_r=-1;
@@ -112,21 +125,21 @@
 int src_c_dec_v=2;
 int dst_c_dec_h=2;
 int dst_c_dec_v=2;
-char chroma_type[16];
+y4o_chromafmt chroma_fmt;
 
 /*The size of each converted frame buffer.*/
-size_t y4m_dst_buf_sz;
+size_t y4x_dst_buf_sz;
 /*The amount to read directly into the converted frame buffer.*/
-size_t y4m_dst_buf_read_sz;
+size_t y4x_dst_buf_read_sz;
 /*The size of the auxilliary buffer.*/
-size_t y4m_aux_buf_sz;
+size_t y4x_aux_buf_sz;
 /*The amount to read into the auxilliary buffer.*/
-size_t y4m_aux_buf_read_sz;
+size_t y4x_aux_buf_read_sz;
 
 /*The function used to perform chroma conversion.*/
-typedef void (*y4m_convert_func)(unsigned char *_dst,unsigned char *_aux);
+typedef void (*y4x_convert_func)(unsigned char *_dst,unsigned char *_aux);
 
-y4m_convert_func y4m_convert=NULL;
+y4x_convert_func y4x_convert=NULL;
 
 int video_r=-1;
 int video_q=-1;
@@ -211,11 +224,13 @@
           "   -b --begin-time <h:m:s.d>      Begin encoding at offset into input\n"
           "   -e --end-time <h:m:s.d>        End encoding at offset into input\n"
           "encoder_example accepts only uncompressed RIFF WAV format audio and\n"
-          "YUV4MPEG2 uncompressed video.\n\n");
+          "YUV4MPEG2 uncompressed video. A YUV4OGG input may be used as an audio\n"
+          "and/or video source.\n\n");
   exit(1);
 }
 
 static int y4m_parse_tags(char *_tags){
+  char  tmp_chroma_type[16];
   int   got_w;
   int   got_h;
   int   got_fps;
@@ -260,8 +275,8 @@
       }break;
       case 'C':{
         if(q-p>16)return -1;
-        memcpy(chroma_type,p+1,q-p-1);
-        chroma_type[q-p-1]='\0';
+        memcpy(tmp_chroma_type,p+1,q-p-1);
+        tmp_chroma_type[q-p-1]='\0';
         got_chroma=1;
       }break;
       /*Ignore unknown tags.*/
@@ -270,12 +285,35 @@
   if(!got_w||!got_h||!got_fps||!got_interlace||!got_par)return -1;
   /*Chroma-type is not specified in older files, e.g., those generated by
      mplayer.*/
-  if(!got_chroma)strcpy(chroma_type,"420");
+  if(!got_chroma)strcpy(tmp_chroma_type,"420");
   /*Update fps and aspect ratio globals if not specified in the command line.*/
   if(video_fps_n==-1)video_fps_n=tmp_video_fps_n;
   if(video_fps_d==-1)video_fps_d=tmp_video_fps_d;
   if(video_par_n==-1)video_par_n=tmp_video_par_n;
   if(video_par_d==-1)video_par_d=tmp_video_par_d;
+
+  /* reuse Y4O chroma format enum for Y4M */
+  if(strcmp(tmp_chroma_type,"420")==0||strcmp(tmp_chroma_type,"420jpeg")==0){
+    chroma_fmt = Y4O_C420jpeg;
+  }else if(strcmp(tmp_chroma_type,"420mpeg2")==0){
+    chroma_fmt = Y4O_C420mpeg2;
+  }else if(strcmp(tmp_chroma_type,"420paldv")==0){
+    chroma_fmt = Y4O_C420paldv;
+  }else if(strcmp(tmp_chroma_type,"422")==0){
+    chroma_fmt = Y4O_C422smpte;
+  }else if(strcmp(tmp_chroma_type,"411")==0){
+    chroma_fmt = Y4O_C411ntscdv;
+  }else if(strcmp(tmp_chroma_type,"444")==0){
+    chroma_fmt = Y4O_C444;
+  }else if(strcmp(tmp_chroma_type,"444alpha")==0){
+    chroma_fmt = Y4O_C444alpha;
+  }else if(strcmp(tmp_chroma_type,"mono")==0){
+    chroma_fmt = Y4O_Cmono;
+  }else{
+    fprintf(stderr,"Unknown chroma sampling type: %s\n",tmp_chroma_type);
+    exit(1);
+  }
+
   return 0;
 }
 
@@ -342,7 +380,7 @@
   The 4:2:2 modes look exactly the same, except there are twice as many chroma
    lines, and they are vertically co-sited with the luma samples in both the
    mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
-static void y4m_convert_42xmpeg2_42xjpeg(unsigned char *_dst,
+static void y4x_convert_42xmpeg2_42xjpeg(unsigned char *_dst,
  unsigned char *_aux){
   int c_w;
   int c_h;
@@ -421,7 +459,7 @@
    the chroma plane's resolution) to the right.
   Then we use another filter to move the C_r location down one quarter pixel,
    and the C_b location up one quarter pixel.*/
-static void y4m_convert_42xpaldv_42xjpeg(unsigned char *_dst,
+static void y4x_convert_42xpaldv_42xjpeg(unsigned char *_dst,
  unsigned char *_aux){
   unsigned char *tmp;
   int            c_w;
@@ -564,7 +602,7 @@
   We use a filter to resample at site locations one eighth pixel (at the source
    chroma plane's horizontal resolution) and five eighths of a pixel to the
    right.*/
-static void y4m_convert_411_422jpeg(unsigned char *_dst,
+static void y4x_convert_411_422jpeg(unsigned char *_dst,
  unsigned char *_aux){
   int c_w;
   int dst_c_w;
@@ -610,7 +648,7 @@
 
 /*The image is padded with empty chroma components at 4:2:0.
   This costs about 17 bits a frame to code.*/
-static void y4m_convert_mono_420jpeg(unsigned char *_dst,
+static void y4x_convert_mono_420jpeg(unsigned char *_dst,
  unsigned char *_aux){
   int c_sz;
   _dst+=pic_w*pic_h;
@@ -621,7 +659,7 @@
 #if 0
 /*Right now just 444 to 420.
   Not too hard to generalize.*/
-static void y4m_convert_4xxjpeg_42xjpeg(unsigned char *_dst,
+static void y4x_convert_4xxjpeg_42xjpeg(unsigned char *_dst,
  unsigned char *_aux){
   unsigned char *tmp;
   int            c_w;
@@ -692,10 +730,109 @@
 
 
 /*No conversion function needed.*/
-static void y4m_convert_null(unsigned char *_dst,
- unsigned char *_aux){
+static void y4x_convert_null(unsigned char *_dst, unsigned char *_aux){
 }
 
+static void setup_video(char *f, FILE *test){
+
+  if(interlace!='p'){
+    fprintf(stderr,"Input video is interlaced; Theora handles only progressive scan\n\n");
+    exit(1);
+  }
+
+  switch(chroma_fmt){
+  case Y4O_C420jpeg:
+    src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
+    y4x_dst_buf_read_sz=pic_w*pic_h+2*((pic_w+1)/2)*((pic_h+1)/2);
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=0;
+    y4x_convert=y4x_convert_null;
+    break;
+  case Y4O_C420unknown:
+    fprintf(stderr,"WARNING: Unknown 4:2:0 chroma subsampling!\n"
+            "         Assuming mpeg2 chroma pixel positioning.\n\n");
+    /* fall through */
+  case Y4O_C420mpeg2:
+    src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
+    y4x_dst_buf_read_sz=pic_w*pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
+    y4x_convert=y4x_convert_42xmpeg2_42xjpeg;
+    break;
+  case Y4O_C420paldv:
+    src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
+    y4x_dst_buf_read_sz=pic_w*pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+      aux buffer.*/
+    y4x_aux_buf_sz=3*((pic_w+1)/2)*((pic_h+1)/2);
+    y4x_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
+    y4x_convert=y4x_convert_42xpaldv_42xjpeg;
+    break;
+  case Y4O_C422smpte:
+    src_c_dec_h=dst_c_dec_h=2;
+    src_c_dec_v=dst_c_dec_v=1;
+    y4x_dst_buf_read_sz=pic_w*pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=2*((pic_w+1)/2)*pic_h;
+    y4x_convert=y4x_convert_42xmpeg2_42xjpeg;
+    break;
+  case Y4O_C422jpeg:
+    src_c_dec_h=dst_c_dec_h=2;
+    src_c_dec_v=dst_c_dec_v=1;
+    y4x_dst_buf_read_sz=pic_w*pic_h;
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=0;
+    y4x_convert=y4x_convert_null;
+    break;
+  case Y4O_C411ntscdv:
+    src_c_dec_h=4;
+    /*We don't want to introduce any additional sub-sampling, so we
+      promote 4:1:1 material to 4:2:2, as the closest format Theora can
+      handle.*/
+    dst_c_dec_h=2;
+    src_c_dec_v=dst_c_dec_v=1;
+    y4x_dst_buf_read_sz=pic_w*pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=2*((pic_w+3)/4)*pic_h;
+    y4x_convert=y4x_convert_411_422jpeg;
+    break;
+  case Y4O_C444:
+    src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
+    y4x_dst_buf_read_sz=pic_w*pic_h*3;
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=0;
+    y4x_convert=y4x_convert_null;
+    break;
+  case Y4O_C444alpha:
+    src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
+    y4x_dst_buf_read_sz=pic_w*pic_h*3;
+    /*Read the extra alpha plane into the aux buf.
+          It will be discarded.*/
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=pic_w*pic_h;
+    y4x_convert=y4x_convert_null;
+    break;
+  case Y4O_Cmono:
+    src_c_dec_h=src_c_dec_v=0;
+    dst_c_dec_h=dst_c_dec_v=2;
+    y4x_dst_buf_read_sz=pic_w*pic_h;
+    y4x_aux_buf_sz=y4x_aux_buf_read_sz=0;
+    y4x_convert=y4x_convert_mono_420jpeg;
+    break;
+  default:
+    /* can't get here */
+    exit(1);
+  }
+
+  /*The size of the final frame buffers is always computed from the
+    destination chroma decimation type.*/
+  y4x_dst_buf_sz=pic_w*pic_h+2*((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*
+    ((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
+
+  video=test;
+
+  fprintf(stderr,"File %s contains %dx%d %.02f fps %s video.\n",
+          f,pic_w,pic_h,(double)video_fps_n/video_fps_d,
+          y4o_chromaformat_long[chroma_fmt]);
+}
+
 static void id_file(char *f){
   FILE *test;
   unsigned char buffer[80];
@@ -756,9 +893,13 @@
           audio_ch=buffer[6]+(buffer[7]<<8);
           audio_hz=buffer[8]+(buffer[9]<<8)+
             (buffer[10]<<16)+(buffer[11]<<24);
-
-          if(buffer[18]+(buffer[19]<<8)!=16){
-            fprintf(stderr,"Can only read 16 bit WAV files for now.\n");
+          audio_b=buffer[18]+(buffer[19]<<8);
+          switch(audio_b){
+          case 16: case 24:
+            audio_b/=8;
+            break;
+          default:
+            fprintf(stderr,"Can only read 16 and 24 bit WAV files for now.\n");
             exit(1);
           }
 
@@ -772,8 +913,8 @@
               ret=fread(buffer,1,4,test);
               if(ret<4)goto riff_err;
 
-              fprintf(stderr,"File %s is 16 bit %d channel %d Hz RIFF WAV audio.\n",
-                      f,audio_ch,audio_hz);
+              fprintf(stderr,"File %s contains %d bit %d channel %d Hz RIFF WAV audio.\n",
+                      f,audio_b*8,audio_ch,audio_hz);
 
               return;
             }
@@ -787,124 +928,99 @@
 
   }
   if(!memcmp(buffer,"YUV4",4)){
-    /* possible YUV2MPEG2 format file */
+    /* possible YUV2MPEG2 or YUV4OGG format file */
     /* read until newline, or 80 cols, whichever happens first */
     int i;
-    for(i=0;i<79;i++){
+    for(i=4;i<79;i++){
       ret=fread(buffer+i,1,1,test);
       if(ret<1)goto yuv_err;
       if(buffer[i]=='\n')break;
     }
     if(i==79){
-      fprintf(stderr,"Error parsing %s header; not a YUV2MPEG2 file?\n",f);
+      fprintf(stderr,"Error parsing %s header; not a YUV2MPEG2 or YUV4OGG file?\n",f);
     }
+
     buffer[i]='\0';
 
-    if(!memcmp(buffer,"MPEG",4)){
+    if(!memcmp(buffer+4,"OGG ",4)){
+      y4o_in_t *yf=y4o_init(test, (char *)buffer);
 
+      if(!yf)
+        goto y4o_err;
+
+      for(i=0;i<yf->num_streams;i++){
+        y4o_stream_t *s=yf->streams[i];
+        if(s->type==Y4O_STREAM_VIDEO){
+          if(video){
+            /* umm, we already have one */
+            fprintf(stderr,"Multiple video sources specified on command line.\n");
+            exit(1);
+          }
+
+          pic_w = s->m.video.w;
+          pic_h = s->m.video.h;
+          video_fps_n = s->m.video.fps_n;
+          video_fps_d = s->m.video.fps_d;
+          interlace = (s->m.video.i == Y4O_I_PROGRESSIVE ? 'p' : 'i');
+          video_par_n = s->m.video.pa_n;
+          video_par_d = s->m.video.pa_d;
+          chroma_fmt = s->m.video.format;
+
+          setup_video(f,test);
+          y4o_video = yf;
+          y4o_video_stream = i;
+
+        }else if(s->type==Y4O_STREAM_AUDIO){
+          if(audio){
+            /* umm, we already have one */
+            fprintf(stderr,"Multiple audio sources specified on command line.\n");
+            exit(1);
+          }
+
+          audio_ch = s->m.audio.ch;
+          audio_hz = s->m.audio.rate;
+          audio_b  = 3;
+
+          fprintf(stderr,"File %s contains 24 bit %d channel %d Hz RIFF WAV audio.\n",
+                  f,audio_ch,audio_hz);
+
+          audio=test;
+          y4o_audio = yf;
+          y4o_audio_stream = i;
+        }
+      }
+
+      if(!yf->synced && y4o_video && y4o_audio){
+        fprintf(stderr,"\nWARNING: YUV4OGG input stream is unsynced! Audio and video\n"
+                "         timing may be out of sync and input buffering may\n"
+                "         require larger than normal amounts of memory.\n");
+      }
+
+      return;
+    }else if(!memcmp(buffer+4,"MPEG",4)){
+
       if(video){
         /* umm, we already have one */
         fprintf(stderr,"Multiple video files specified on command line.\n");
         exit(1);
       }
 
-      if(buffer[4]!='2'){
+      if(buffer[8]!='2'){
         fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
       }
 
-      ret=y4m_parse_tags((char *)buffer+5);
+      ret=y4m_parse_tags((char *)buffer+9);
       if(ret<0){
         fprintf(stderr,"Error parsing YUV4MPEG2 header in file %s.\n",f);
         exit(1);
       }
 
-      if(interlace!='p'){
-        fprintf(stderr,"Input video is interlaced; Theora handles only progressive scan\n");
-        exit(1);
-      }
+      setup_video(f,test);
 
-      if(strcmp(chroma_type,"420")==0||strcmp(chroma_type,"420jpeg")==0){
-        src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
-        y4m_dst_buf_read_sz=pic_w*pic_h+2*((pic_w+1)/2)*((pic_h+1)/2);
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
-        y4m_convert=y4m_convert_null;
-      }
-      else if(strcmp(chroma_type,"420mpeg2")==0){
-        src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
-        y4m_dst_buf_read_sz=pic_w*pic_h;
-        /*Chroma filter required: read into the aux buf first.*/
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
-        y4m_convert=y4m_convert_42xmpeg2_42xjpeg;
-      }
-      else if(strcmp(chroma_type,"420paldv")==0){
-        src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=2;
-        y4m_dst_buf_read_sz=pic_w*pic_h;
-        /*Chroma filter required: read into the aux buf first.
-          We need to make two filter passes, so we need some extra space in the
-           aux buffer.*/
-        y4m_aux_buf_sz=3*((pic_w+1)/2)*((pic_h+1)/2);
-        y4m_aux_buf_read_sz=2*((pic_w+1)/2)*((pic_h+1)/2);
-        y4m_convert=y4m_convert_42xpaldv_42xjpeg;
-      }
-      else if(strcmp(chroma_type,"422")==0){
-        src_c_dec_h=dst_c_dec_h=2;
-        src_c_dec_v=dst_c_dec_v=1;
-        y4m_dst_buf_read_sz=pic_w*pic_h;
-        /*Chroma filter required: read into the aux buf first.*/
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+1)/2)*pic_h;
-        y4m_convert=y4m_convert_42xmpeg2_42xjpeg;
-      }
-      else if(strcmp(chroma_type,"411")==0){
-        src_c_dec_h=4;
-        /*We don't want to introduce any additional sub-sampling, so we
-           promote 4:1:1 material to 4:2:2, as the closest format Theora can
-           handle.*/
-        dst_c_dec_h=2;
-        src_c_dec_v=dst_c_dec_v=1;
-        y4m_dst_buf_read_sz=pic_w*pic_h;
-        /*Chroma filter required: read into the aux buf first.*/
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=2*((pic_w+3)/4)*pic_h;
-        y4m_convert=y4m_convert_411_422jpeg;
-      }
-      else if(strcmp(chroma_type,"444")==0){
-        src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
-        y4m_dst_buf_read_sz=pic_w*pic_h*3;
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
-        y4m_convert=y4m_convert_null;
-      }
-      else if(strcmp(chroma_type,"444alpha")==0){
-        src_c_dec_h=dst_c_dec_h=src_c_dec_v=dst_c_dec_v=1;
-        y4m_dst_buf_read_sz=pic_w*pic_h*3;
-        /*Read the extra alpha plane into the aux buf.
-          It will be discarded.*/
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=pic_w*pic_h;
-        y4m_convert=y4m_convert_null;
-      }
-      else if(strcmp(chroma_type,"mono")==0){
-        src_c_dec_h=src_c_dec_v=0;
-        dst_c_dec_h=dst_c_dec_v=2;
-        y4m_dst_buf_read_sz=pic_w*pic_h;
-        y4m_aux_buf_sz=y4m_aux_buf_read_sz=0;
-        y4m_convert=y4m_convert_mono_420jpeg;
-      }
-      else{
-        fprintf(stderr,"Unknown chroma sampling type: %s\n",chroma_type);
-        exit(1);
-      }
-      /*The size of the final frame buffers is always computed from the
-         destination chroma decimation type.*/
-      y4m_dst_buf_sz=pic_w*pic_h+2*((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*
-       ((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
-
-      video=test;
-
-      fprintf(stderr,"File %s is %dx%d %.02f fps %s video.\n",
-              f,pic_w,pic_h,(double)video_fps_n/video_fps_d,chroma_type);
-
       return;
     }
   }
-  fprintf(stderr,"Input file %s is neither a WAV nor YUV4MPEG2 file.\n",f);
+  fprintf(stderr,"Input file %s is not a WAV, YUV4MPEG2, or YUV4OGG file.\n",f);
   exit(1);
 
  riff_err:
@@ -913,6 +1029,8 @@
  yuv_err:
   fprintf(stderr,"EOF parsing YUV4MPEG2 file %s.\n",f);
   exit(1);
+ y4o_err:
+  exit(1);
 
 }
 
@@ -924,88 +1042,100 @@
   fprintf(stderr,"\r%c",spinascii[spinner]);
 }
 
-int fetch_and_process_audio(FILE *audio,ogg_page *audiopage,
-                            ogg_stream_state *vo,
-                            vorbis_dsp_state *vd,
-                            vorbis_block *vb,
-                            int audioflag){
-  static ogg_int64_t samples_sofar=0;
+static double last_video_time=0;
+static double last_audio_time=0;
+static ogg_int64_t samples_sofar=0;
+
+/* Audio input buffering is handled by libvorbis, so unlike video we
+   don't need to implement both input and output buffering here.  We
+   have a block of audio, we can immediately push into libvorbis */
+void push_raw_audio_block(unsigned char *readbuffer, int bytesread,
+                          ogg_stream_state *vo,
+                          vorbis_dsp_state *vd,
+                          vorbis_block *vb){
+
+  ogg_int64_t beginsample = audio_hz*begin_sec + audio_hz*begin_usec*.000001;
+  ogg_int64_t endsample = audio_hz*end_sec + audio_hz*end_usec*.000001;
   ogg_packet op;
   int i,j;
-  ogg_int64_t beginsample = audio_hz*begin_sec + audio_hz*begin_usec*.000001;
-  ogg_int64_t endsample = audio_hz*end_sec + audio_hz*end_usec*.000001;
 
-  while(audio && !audioflag){
-    /* process any audio already buffered */
-    spinnit();
-    if(ogg_stream_pageout(vo,audiopage)>0) return 1;
-    if(ogg_stream_eos(vo))return 0;
+  unsigned char *readptr=readbuffer;
+  int sampread=bytesread/audio_b/audio_ch;
+  float **vorbis_buffer;
+  int count=0;
 
-    {
-      /* read and process more audio */
-      signed char readbuffer[4096];
-      signed char *readptr=readbuffer;
-      int toread=4096/2/audio_ch;
-      int bytesread=fread(readbuffer,1,toread*2*audio_ch,audio);
-      int sampread=bytesread/2/audio_ch;
-      float **vorbis_buffer;
-      int count=0;
-
-      if(bytesread<=0 ||
-         (samples_sofar>=endsample && endsample>0)){
-        /* end of file.  this can be done implicitly, but it's
-           easier to see here in non-clever fashion.  Tell the
-           library we're at end of stream so that it can handle the
-           last frame and mark end of stream in the output properly */
-        vorbis_analysis_wrote(vd,0);
+  if(bytesread<=0 || (samples_sofar>=endsample && endsample>0)){
+    /* end of file.  this can be done implicitly, but it's
+       easier to see here in non-clever fashion.  Tell the
+       library we're at end of stream so that it can handle the
+       last frame and mark end of stream in the output properly */
+    vorbis_analysis_wrote(vd,0);
+  }else{
+    if(samples_sofar < beginsample){
+      if(samples_sofar+sampread > beginsample){
+        readptr += (beginsample-samples_sofar)*audio_b*audio_ch;
+        sampread += samples_sofar-beginsample;
+        samples_sofar = sampread+beginsample;
       }else{
-        if(samples_sofar < beginsample){
-          if(samples_sofar+sampread > beginsample){
-            readptr += (beginsample-samples_sofar)*2*audio_ch;
-            sampread += samples_sofar-beginsample;
-            samples_sofar = sampread+beginsample;
-          }else{
-            samples_sofar += sampread;
-            sampread = 0;
-          }
-        }else{
-          samples_sofar += sampread;
-        }
+        samples_sofar += sampread;
+        sampread = 0;
+      }
+    }else{
+      samples_sofar += sampread;
+    }
 
-        if(samples_sofar > endsample && endsample > 0)
-          sampread-= (samples_sofar - endsample);
+    if(samples_sofar > endsample && endsample > 0)
+      sampread-= (samples_sofar - endsample);
 
-        if(sampread>0){
+    if(sampread>0){
 
-          vorbis_buffer=vorbis_analysis_buffer(vd,sampread);
-          /* uninterleave samples */
-          for(i=0;i<sampread;i++){
-            for(j=0;j<audio_ch;j++){
-              vorbis_buffer[j][i]=((readptr[count+1]<<8)|
-                                   (0x00ff&(int)readptr[count]))/32768.f;
-              count+=2;
-            }
+      vorbis_buffer=vorbis_analysis_buffer(vd,sampread);
+      /* uninterleave samples */
+      switch(audio_b){
+      case 2:
+        for(i=0;i<sampread;i++){
+          for(j=0;j<audio_ch;j++){
+            vorbis_buffer[j][i]=(((signed char)readptr[count+1]<<8)|
+                                 (readptr[count]))/32768.f;
+            count+=2;
           }
-
-          vorbis_analysis_wrote(vd,sampread);
         }
+        break;
+      case 3:
+        for(i=0;i<sampread;i++){
+          for(j=0;j<audio_ch;j++){
+            vorbis_buffer[j][i]=(((signed char)readptr[count+2]<<16)|
+                                 (readptr[count+1]<<8)|
+                                 (readptr[count]))/8388608.f;
+            count+=3;
+          }
+        }
+        break;
       }
 
-      while(vorbis_analysis_blockout(vd,vb)==1){
+      vorbis_analysis_wrote(vd,sampread);
+    }
+  }
 
-        /* analysis, assume we want to use bitrate management */
-        vorbis_analysis(vb,NULL);
-        vorbis_bitrate_addblock(vb);
+  /* while we're at it, pull all data through the encoder and into
+     output stream buffering.  The compressed audio in the output
+     buffer takes up less space than the uncompressed audio in the
+     input buffer */
+  while(vorbis_analysis_blockout(vd,vb)==1){
 
-        /* weld packets into the bitstream */
-        while(vorbis_bitrate_flushpacket(vd,&op))
-          ogg_stream_packetin(vo,&op);
+    /* analysis, assume we want to use bitrate management */
+    vorbis_analysis(vb,NULL);
+    vorbis_bitrate_addblock(vb);
 
+    /* weld packets into the bitstream */
+    while(vorbis_bitrate_flushpacket(vd,&op)){
+      if(vorbis_granule_time(vd,op.granulepos)>last_audio_time+MAX_BUFFER_SKEW){
+        fprintf(stderr,"ERROR: Audio / Video buffer skew has exceeded maximum limit.\n");
+        exit(1);
       }
+      ogg_stream_packetin(vo,&op);
     }
   }
-
-  return audioflag;
 }
 
 static int                 frame_state=-1;
@@ -1013,87 +1143,22 @@
 static unsigned char      *yuvframe[3];
 static th_ycbcr_buffer     ycbcr;
 
-int fetch_and_process_video_packet(FILE *video,FILE *twopass_file,int passno,
- th_enc_ctx *td,ogg_packet *op){
-  int                        ret;
-  int                        pic_sz;
-  int                        frame_c_w;
-  int                        frame_c_h;
-  int                        c_w;
-  int                        c_h;
-  int                        c_sz;
-  ogg_int64_t                beginframe;
-  ogg_int64_t                endframe;
-  spinnit();
-  beginframe=(video_fps_n*begin_sec+video_fps_n*begin_usec*.000001)/video_fps_d;
-  endframe=(video_fps_n*end_sec+video_fps_n*end_usec*.000001)/video_fps_d;
-  if(frame_state==-1){
-    /* initialize the double frame buffer */
-    yuvframe[0]=(unsigned char *)malloc(y4m_dst_buf_sz);
-    yuvframe[1]=(unsigned char *)malloc(y4m_dst_buf_sz);
-    yuvframe[2]=(unsigned char *)malloc(y4m_aux_buf_sz);
-    frame_state=0;
-  }
-  pic_sz=pic_w*pic_h;
-  frame_c_w=frame_w/dst_c_dec_h;
-  frame_c_h=frame_h/dst_c_dec_v;
-  c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
-  c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
-  c_sz=c_w*c_h;
-  /* read and process more video */
-  /* video strategy reads one frame ahead so we know when we're
-     at end of stream and can mark last video frame as such
-     (vorbis audio has to flush one frame past last video frame
-     due to overlap and thus doesn't need this extra work */
+void process_video_block(FILE *twopass_file,int passno,
+                         th_enc_ctx *td, ogg_stream_state *to){
+  int pic_sz=pic_w*pic_h;
+  int frame_c_w=frame_w/dst_c_dec_h;
+  int frame_c_h=frame_h/dst_c_dec_v;
+  int c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
+  int c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
+  int c_sz=c_w*c_h;
+  ogg_packet op;
+  int ret;
 
-  /* have two frame buffers full (if possible) before
-     proceeding.  after first pass and until eos, one will
-     always be full when we get here */
-  for(;frame_state<2 && (frames<endframe || endframe<0);){
-    char c,frame[6];
-    int ret=fread(frame,1,6,video);
-    /* match and skip the frame header */
-    if(ret<6)break;
-    if(memcmp(frame,"FRAME",5)){
-      fprintf(stderr,"Loss of framing in YUV input data\n");
-      exit(1);
-    }
-    if(frame[5]!='\n'){
-      int j;
-      for(j=0;j<79;j++)
-        if(fread(&c,1,1,video)&&c=='\n')break;
-      if(j==79){
-        fprintf(stderr,"Error parsing YUV frame header\n");
-        exit(1);
-      }
-    }
-    /*Read the frame data that needs no conversion.*/
-    if(fread(yuvframe[frame_state],1,y4m_dst_buf_read_sz,video)!=
-     y4m_dst_buf_read_sz){
-      fprintf(stderr,"Error reading YUV frame data.\n");
-      exit(1);
-    }
-    /*Read the frame data that does need conversion.*/
-    if(fread(yuvframe[2],1,y4m_aux_buf_read_sz,video)!=y4m_aux_buf_read_sz){
-      fprintf(stderr,"Error reading YUV frame data.\n");
-      exit(1);
-    }
-    /*Now convert the just read frame.*/
-    (*y4m_convert)(yuvframe[frame_state],yuvframe[2]);
-    frames++;
-    if(frames>=beginframe)
-    frame_state++;
-  }
-  /* check to see if there are dupes to flush */
-  if(th_encode_packetout(td,frame_state<1,op)>0)return 1;
-  if(frame_state<1){
-    /* can't get here unless YUV4MPEG stream has no video */
-    fprintf(stderr,"Video input contains no frames.\n");
-    exit(1);
-  }
   /* Theora is a one-frame-in,one-frame-out system; submit a frame
      for compression and pull out the packet */
-  /* in two-pass mode's second pass, we need to submit first-pass data */
+  /* in two-pass mode's second pass, we need to submit first-pass
+     data; this can be pulled on-demand as it's separate local data */
+
   if(passno==2){
     for(;;){
       static unsigned char buffer[80];
@@ -1142,12 +1207,15 @@
   ycbcr[2].stride=c_w;
   ycbcr[2].data=ycbcr[1].data+c_sz;
   th_encode_ycbcr_in(td,ycbcr);
+
+  /* flip frame buffers */
   {
     unsigned char *temp=yuvframe[0];
     yuvframe[0]=yuvframe[1];
     yuvframe[1]=temp;
     frame_state--;
   }
+
   /* in two-pass mode's first pass we need to extract and save the pass data */
   if(passno==1){
     unsigned char *buffer;
@@ -1162,8 +1230,9 @@
     }
     fflush(twopass_file);
   }
-  /* if there was only one frame, it's the last in the stream */
-  ret = th_encode_packetout(td,frame_state<1,op);
+  /* if there was only one frame in the buffer, it was the last in the stream */
+  ret = th_encode_packetout(td,frame_state<1,&op);
+
   if(passno==1 && frame_state<1){
     /* need to read the final (summary) packet */
     unsigned char *buffer;
@@ -1182,26 +1251,274 @@
     }
     fflush(twopass_file);
   }
-  return ret;
+
+  if(ret){
+    if(passno==1){
+      /* first pass does not push pages, so we need to track time seperately */
+      last_video_time = th_granule_time(td,op.granulepos);
+    }else{
+      if(th_granule_time(td,op.granulepos)>last_video_time+MAX_BUFFER_SKEW){
+        fprintf(stderr,"ERROR: Audio / Video buffer skew has exceeded maximum limit.\n");
+        exit(1);
+      }
+      ogg_stream_packetin(to,&op);
+    }
+  }
+
+  /* fetch dupe frames if any */
+  while(th_encode_packetout(td,frame_state<1,&op)>0){
+    if(passno!=1){
+      ogg_stream_packetin(to,&op);
+    }else{
+      /* first pass does not push pages, so we need to track time seperately */
+      last_video_time=th_granule_time(td,op.granulepos);
+    }
+    /* no need to re-check buffer depth guard here */
+  }
 }
 
 
-int fetch_and_process_video(FILE *video,ogg_page *videopage,
- ogg_stream_state *to,th_enc_ctx *td,FILE *twopass_file,int passno,
- int videoflag){
-  ogg_packet op;
-  int ret;
-  /* is there a video page flushed?  If not, work until there is. */
-  while(!videoflag){
-    if(ogg_stream_pageout(to,videopage)>0) return 1;
-    if(ogg_stream_eos(to)) return 0;
-    ret=fetch_and_process_video_packet(video,twopass_file,passno,td,&op);
-    if(ret<=0)return 0;
-    ogg_stream_packetin(to,&op);
+/* This is a triggered push, not an on-demand pull.  Rather than
+   requesting a block of compressed video, what we actually have is a
+   frame of uncompressed video that has to be pushed into buffering
+   right now.  This may require pulling data out of the encoder/input
+   buffer into the output buffer to make space for a frame. */
+void push_raw_video_block(FILE *twopass_file,int passno,
+                          th_enc_ctx *td, ogg_stream_state *to,
+                          y4o_frame_t *p){
+
+  ogg_int64_t beginframe=(video_fps_n*begin_sec+video_fps_n*begin_usec*.000001)/video_fps_d;
+  ogg_int64_t endframe=(video_fps_n*end_sec+video_fps_n*end_usec*.000001)/video_fps_d;
+
+  if(frame_state==-1){
+    /* initialize the double frame buffer */
+    yuvframe[0]=(unsigned char *)malloc(y4x_dst_buf_sz);
+    yuvframe[1]=(unsigned char *)malloc(y4x_dst_buf_sz);
+    yuvframe[2]=(unsigned char *)malloc(y4x_aux_buf_sz);
+    frame_state=0;
   }
-  return videoflag;
+
+  /* Do we actually need to read this frame into buffering?  If it's
+     outside our read range, just pull and discard. */
+  if(frames<beginframe || (frames>=endframe && endframe>=0)){
+    if(y4o_video){
+      /* in the y4o case, the frame header has already been read */
+      y4o_read_frame_data(y4o_audio,p);
+      y4o_free_frame(p);
+    } /* else y4m is not interleaved; do nothing */
+    return;
+  }
+
+  /* if the frame input buffer is full, we need to pull a frame
+     through the encoder to make space before reading a new one into
+     buffering */
+  while(frame_state>=2)
+    process_video_block(twopass_file,passno,td,to);
+
+  /* now we have space to push the video input */
+  /* header read/check */
+  if(y4o_video){
+    /* verify the payload is what we're expecting */
+    if(p->len != y4x_dst_buf_read_sz+y4x_aux_buf_read_sz){
+      fprintf(stderr,"Error in YUV frame size:  Expected %d, got %d.\n",
+              (int)(y4x_dst_buf_read_sz+y4x_aux_buf_read_sz),(int)(p->len));
+      exit(1);
+    }
+  }else{
+    /* y4o header already read, y4m needs to read it still */
+    char c,frame[6];
+    int ret=fread(frame,1,6,video);
+    /* match and skip the frame header */
+    if(ret<6)return;
+    if(memcmp(frame,"FRAME",5)){
+      fprintf(stderr,"Loss of framing in YUV input data\n");
+      exit(1);
+    }
+    if(frame[5]!='\n'){
+      int j;
+      for(j=0;j<79;j++)
+        if(fread(&c,1,1,video)&&c=='\n')break;
+      if(j==79){
+        fprintf(stderr,"Error parsing YUV frame header\n");
+        exit(1);
+      }
+    }
+  }
+
+  /*Read the frame data that needs no conversion.*/
+  if(fread(yuvframe[frame_state],1,y4x_dst_buf_read_sz,video)!=
+     y4x_dst_buf_read_sz){
+    fprintf(stderr,"Error reading YUV frame data.\n");
+    exit(1);
+  }
+  /*Read the frame data that does need conversion.*/
+  if(fread(yuvframe[2],1,y4x_aux_buf_read_sz,video)!=y4x_aux_buf_read_sz){
+    fprintf(stderr,"Error reading YUV frame data.\n");
+    exit(1);
+  }
+  /*Now convert the just read frame.*/
+  (*y4x_convert)(yuvframe[frame_state],yuvframe[2]);
+  frames++;
+  frame_state++;
 }
 
+static unsigned char audioheader[27+255];
+static unsigned char audiobody[255*255];
+
+int fetch_audio(ogg_stream_state *to,
+                ogg_stream_state *vo,
+                th_enc_ctx *td,
+                vorbis_dsp_state *vd,
+                vorbis_block *vb,
+                FILE *twopass_file,
+                int passno,
+                ogg_page *audiopage){
+  ogg_page og;
+  while(audio){
+    /* process any audio already buffered */
+    spinnit();
+    if(ogg_stream_pageout(vo,&og)>0){
+      double t=vorbis_granule_time(vd,ogg_page_granulepos(&og));
+
+      /* Ogg will reclaim the memory associated with the page the next
+         time the stream is accessed.  Unfortunately, because we're
+         using push buffering (to avoid having to implement an async
+         input buffer to handle interleaved Y4O input streams),
+         working ahead to prime the video pipe may cause another
+         stream push in the audio stream before this page is used.
+         Thus we copy the data into local storage.  There are
+         obviously more efficient ways to handle this, but the extra
+         effort would be misplaced here. */
+
+      memcpy(audiopage,&og,sizeof(og));
+      audiopage->header=audioheader;
+      audiopage->body=audiobody;
+      memcpy(audiopage->header,og.header,og.header_len);
+      memcpy(audiopage->body,og.body,og.body_len);
+
+      if(t!=-1)last_audio_time=t;
+      return 1;
+    }
+    if(ogg_stream_eos(vo))return 0;
+
+    /* read and process more audio; because some inputs may have audio
+       and video in the same stream, this may result in needing to
+       push video into buffering as well. */
+
+    if(y4o_audio){
+      /* y4o streams are structured and may have other data types mixed in */
+      y4o_frame_t *p=y4o_read_frame_header(y4o_audio);
+      if(!p){
+        push_raw_audio_block(NULL, 0, vo, vd, vb);
+      }else{
+        if(p->streamno == y4o_audio_stream){
+          y4o_read_frame_data(y4o_audio,p);
+          push_raw_audio_block((unsigned char *)p->data, p->len, vo, vd, vb);
+        }else if(y4o_audio == y4o_video && p->streamno == y4o_video_stream){
+          push_raw_video_block(twopass_file,passno,td,to,p);
+        }else{
+          /* unknown frame type, discard */
+          y4o_read_frame_data(y4o_audio,p);
+          y4o_free_frame(p);
+        }
+      }
+    }else{
+      /* the only other audio source is wav, so it's a raw blob of data */
+      unsigned char readbuffer[4096];
+      int toread=4096/audio_b/audio_ch;
+      int bytesread=fread(readbuffer,1,toread*audio_b*audio_ch,audio);
+
+      push_raw_audio_block(readbuffer, bytesread, vo, vd, vb);
+    }
+  }
+
+  return 0;
+}
+
+static unsigned char videoheader[27+255];
+static unsigned char videobody[255*255];
+
+int fetch_video(ogg_stream_state *to,
+                ogg_stream_state *vo,
+                th_enc_ctx *td,
+                vorbis_dsp_state *vd,
+                vorbis_block *vb,
+                FILE *twopass_file,
+                int passno,
+                ogg_page *videopage){
+
+  ogg_int64_t endframe=(video_fps_n*end_sec+video_fps_n*end_usec*.000001)/video_fps_d;
+  ogg_page og;
+
+  while(video){
+    /* process any audio already buffered */
+    spinnit();
+    if(passno!=1){
+      if(ogg_stream_pageout(to,&og)>0){
+        double t=th_granule_time(td,ogg_page_granulepos(&og));
+
+      /* Ogg will reclaim the memory associated with the page the next
+         time the stream is accessed.  Unfortunately, because we're
+         using push buffering (to avoid having to implement an async
+         input buffer to handle interleaved Y4O input streams),
+         working ahead to prime the video pipe may cause another
+         stream push in the audio stream before this page is used.
+         Thus we copy the data into local storage.  There are
+         obviously more efficient ways to handle this, but the extra
+         effort would be misplaced here. */
+
+        memcpy(videopage,&og,sizeof(og));
+        videopage->header=videoheader;
+        videopage->body=videobody;
+        memcpy(videopage->header,og.header,og.header_len);
+        memcpy(videopage->body,og.body,og.body_len);
+
+        if(t!=-1)last_video_time=t;
+        return 1;
+      }
+      if(ogg_stream_eos(to))return 0;
+    }
+    /* Are we draining the stream at encode end? */
+    if((frames>=endframe && endframe>=0) || feof(video)){
+      if(frame_state>0){ /* this guards startup as well as two-pass
+                            first-pass packet drain */
+        process_video_block(twopass_file,passno,td,to);
+        if(passno==1) return 1;
+        continue;
+      }else{
+        return 0;
+      }
+    }
+
+    /* read and process more video; because some inputs may have audio
+       and video in the same stream, this may result in needing to
+       push audio as well. */
+
+    if(y4o_video){
+      /* y4o streams are structured and may have other data types mixed in */
+      y4o_frame_t *p=y4o_read_frame_header(y4o_video);
+      if(p){
+        if(p->streamno == y4o_video_stream){
+          push_raw_video_block(twopass_file,passno,td,to,p);
+          if(passno==1) return 1;
+        }else if(y4o_audio == y4o_video && p->streamno == y4o_audio_stream && passno!=1){
+          y4o_read_frame_data(y4o_audio,p);
+          push_raw_audio_block((unsigned char *)p->data, p->len, vo, vd, vb);
+        }else{
+          /* unknown frame type, discard */
+          y4o_read_frame_data(y4o_audio,p);
+          y4o_free_frame(p);
+        }
+      }
+    }else{
+      /* the only other video source is y4m */
+      push_raw_video_block(twopass_file,passno,td,to,NULL);
+      if(passno==1) return 1;
+    }
+  }
+  return 0;
+}
+
 static int ilog(unsigned _v){
   int ret;
   for(ret=0;_v;ret++)_v>>=1;
@@ -1230,15 +1547,12 @@
   vorbis_block     vb; /* local working space for packet->PCM decode */
 
   int speed=-1;
-  int audioflag=0;
-  int videoflag=0;
   int akbps=0;
   int vkbps=0;
   int soft_target=0;
 
   ogg_int64_t audio_bytesout=0;
   ogg_int64_t video_bytesout=0;
-  double timebase;
 
   FILE *outfile = stdout;
 
@@ -1256,6 +1570,8 @@
   _setmode( _fileno( stdout ), _O_BINARY );
 #endif
 
+  fprintf(stderr,"\n");
+
   while((c=getopt_long(argc,argv,optstring,options,&long_option_index))!=EOF){
     switch(c){
     case 'o':
@@ -1453,6 +1769,7 @@
     id_file(argv[optind]);
     optind++;
   }
+  fprintf(stderr,"\n");
 
   if(twopass==3){
     /* verify that the input is seekable! */
@@ -1625,7 +1942,7 @@
       ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_BUFFER,
        &buf_delay,sizeof(buf_delay));
       if(ret<0){
-        fprintf(stderr,"Warning: could not set desired buffer delay.\n");
+        fprintf(stderr,"WARNING: could not set desired buffer delay.\n");
       }
     }
     /*Speed should also be set after the current encoder mode is established,
@@ -1727,80 +2044,80 @@
       }
     }
     /* setup complete.  Raw processing loop */
-      switch(passno){
-      case 0: case 2:
-        fprintf(stderr,"\rCompressing....                                          \n");
-        break;
-      case 1:
-        fprintf(stderr,"\rScanning first pass....                                  \n");
-        break;
-      }
-    for(;;){
-      int audio_or_video=-1;
-      if(passno==1){
-        ogg_packet op;
-        int ret=fetch_and_process_video_packet(video,twopass_file,passno,td,&op);
-        if(ret<0)break;
-        if(op.e_o_s)break; /* end of stream */
-        timebase=th_granule_time(td,op.granulepos);
-        audio_or_video=1;
-      }else{
-        double audiotime;
-        double videotime;
-        ogg_page audiopage;
-        ogg_page videopage;
-        /* is there an audio page flushed?  If not, fetch one if possible */
-        audioflag=fetch_and_process_audio(audio,&audiopage,&vo,&vd,&vb,audioflag);
-        /* is there a video page flushed?  If not, fetch one if possible */
-        videoflag=fetch_and_process_video(video,&videopage,&to,td,twopass_file,passno,videoflag);
-        /* no pages of either?  Must be end of stream. */
-        if(!audioflag && !videoflag)break;
-        /* which is earlier; the end of the audio page or the end of the
-           video page? Flush the earlier to stream */
-        audiotime=
-        audioflag?vorbis_granule_time(&vd,ogg_page_granulepos(&audiopage)):-1;
-        videotime=
-        videoflag?th_granule_time(td,ogg_page_granulepos(&videopage)):-1;
-        if(!audioflag){
-          audio_or_video=1;
-        } else if(!videoflag) {
-          audio_or_video=0;
-        } else {
-          if(audiotime<videotime)
-            audio_or_video=0;
-          else
-            audio_or_video=1;
-        }
-        if(audio_or_video==1){
-          /* flush a video page */
-          video_bytesout+=fwrite(videopage.header,1,videopage.header_len,outfile);
-          video_bytesout+=fwrite(videopage.body,1,videopage.body_len,outfile);
-          videoflag=0;
-          timebase=videotime;
+    switch(passno){
+    case 0: case 2:
+      fprintf(stderr,"\rCompressing....                                          \n");
+      break;
+    case 1:
+      fprintf(stderr,"\rScanning first pass....                                  \n");
+      break;
+    }
+
+    {
+      int have_audio_page=0;
+      int have_video_page=0;
+      ogg_page audiopage;
+      ogg_page videopage;
+      double audiotime;
+      double videotime;
+      double timebase=-1;
+
+      for(;;){
+        if(passno==1){
+          if(fetch_video(&to,&vo,td,&vd,&vb,twopass_file,1,NULL)<=0) break;
+          timebase = last_video_time;
         }else{
-          /* flush an audio page */
-          audio_bytesout+=fwrite(audiopage.header,1,audiopage.header_len,outfile);
-          audio_bytesout+=fwrite(audiopage.body,1,audiopage.body_len,outfile);
-          audioflag=0;
-          timebase=audiotime;
+
+          if(!have_video_page)
+            have_video_page = fetch_video(&to,&vo,td,&vd,&vb,twopass_file,passno,&videopage);
+          if(!have_audio_page)
+            have_audio_page = fetch_audio(&to,&vo,td,&vd,&vb,twopass_file,passno,&audiopage);
+
+          /* no pages of either?  Must be end of stream. */
+          if(!have_audio_page && !have_video_page)break;
+
+          /* if we have both audio and video to flush, which is
+             earlier; the end of the audio page or the end of the
+             video page? Flush the earlier to stream. */
+          audiotime=have_audio_page?vorbis_granule_time(&vd,ogg_page_granulepos(&audiopage)):-1;
+          videotime=have_video_page?th_granule_time(td,ogg_page_granulepos(&videopage)):-1;
+
+          if(have_video_page && (!have_audio_page || videotime<audiotime)){
+            /* flush a video page */
+            video_bytesout+=fwrite(videopage.header,1,videopage.header_len,outfile);
+            video_bytesout+=fwrite(videopage.body,1,videopage.body_len,outfile);
+            have_video_page=0;
+            if(videotime>0)vkbps=(int)rint(video_bytesout*8./videotime*.001);
+            timebase=videotime;
+          }else{
+            /* flush an audio page */
+            audio_bytesout+=fwrite(audiopage.header,1,audiopage.header_len,outfile);
+            audio_bytesout+=fwrite(audiopage.body,1,audiopage.body_len,outfile);
+            have_audio_page=0;
+            if(audiotime>0)akbps=(int)rint(audio_bytesout*8./audiotime*.001);
+            timebase=audiotime;
+          }
         }
+
+        if(timebase>0){
+          int hundredths=(int)(timebase*100-(long)timebase*100);
+          int seconds=(long)timebase%60;
+          int minutes=((long)timebase/60)%60;
+          int hours=(long)timebase/3600;
+          fprintf(stderr,
+                  "\r      %d:%02d:%02d.%02d audio: %dkbps video: %dkbps                 ",
+                  hours,minutes,seconds,hundredths,akbps,vkbps);
+        }
       }
-      if(timebase > 0){
-        int hundredths=(int)(timebase*100-(long)timebase*100);
-        int seconds=(long)timebase%60;
-        int minutes=((long)timebase/60)%60;
-        int hours=(long)timebase/3600;
-        if(audio_or_video)vkbps=(int)rint(video_bytesout*8./timebase*.001);
-        else akbps=(int)rint(audio_bytesout*8./timebase*.001);
-        fprintf(stderr,
-                "\r      %d:%02d:%02d.%02d audio: %dkbps video: %dkbps                 ",
-                hours,minutes,seconds,hundredths,akbps,vkbps);
-      }
     }
     if(video)th_encode_free(td);
   }
 
   /* clear out state */
+  if(y4o_video && y4o_video!=y4o_audio)
+    y4o_free(y4o_video);
+  if(y4o_audio)
+    y4o_free(y4o_audio);
   if(audio){
     ogg_stream_clear(&vo);
     vorbis_block_clear(&vb);
@@ -1812,7 +2129,7 @@
   if(video){
     ogg_stream_clear(&to);
     th_comment_clear(&tc);
-    if(video!=stdin)fclose(video);
+    if(video!=stdin && audio!=video)fclose(video);
   }
 
   if(outfile && outfile!=stdout)fclose(outfile);