encode YUV420 to ogg, theora codec

孔皓

2023-12-01

#include <stdio.h>
#include <getopt.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include <math.h>
#include "theora/theoraenc.h"
#include "vorbis/codec.h"
#include "vorbis/vorbisenc.h"

FILE *audio=NULL;
FILE *video=NULL;

int audio_ch=0;
int audio_hz=0;

float audio_q=.1f;
int audio_r=-1;

int frame_w=0;
int frame_h=0;
int pic_w=0;
int pic_h=0;
int pic_x=0;
int pic_y=0;
int video_fps_n=-1;
int video_fps_d=-1;
int video_par_n=-1;
int video_par_d=-1;
int src_c_dec_h=2;
int src_c_dec_v=2;
int dst_c_dec_h=2;
int dst_c_dec_v=2;

size_t y4m_dst_buf_sz;
size_t y4m_dst_buf_read_sz;

int video_r=-1;
int video_q=-1;
ogg_uint32_t keyframe_frequency=0;
int buf_delay=-1;

void init_param()
{
   audio_ch = 2;
   audio_hz = 44100;

   pic_w = 384;
   pic_h = 288;
   video_fps_n = 25;
   video_fps_d = 1;
   video_par_n = 0;
   video_par_d = 0;
    src_c_dec_h = 2;
    dst_c_dec_h = 2;
    src_c_dec_v = 2;
    dst_c_dec_v = 2;
    y4m_dst_buf_read_sz = pic_w*pic_h+2*((pic_w+1)/2)*((pic_h+1)/2);
    y4m_dst_buf_sz = pic_w*pic_h+2*((pic_w+dst_c_dec_h-1)/dst_c_dec_h)*((pic_h+dst_c_dec_v-1)/dst_c_dec_v);
}

int fetch_and_process_audio(FILE *audio,
                           ogg_page *audiopage,
                            ogg_stream_state *vo,
                            vorbis_dsp_state *vd,
                            vorbis_block *vb,
                            int audioflag)
{
   static ogg_int64_t samples_sofar=0;
   ogg_packet op;
   int i,j;
   while(audio && !audioflag)
   {
       if(ogg_stream_pageout(vo,audiopage)>0)
           return 1;
       if(ogg_stream_eos(vo))
           return 0;
       signed char readbuffer[4096];
       signed char *readptr=readbuffer;
       int toread=4096/2/audio_ch;
       int bytesread=fread(readbuffer,1,toread*2*audio_ch,audio);
       int sampread=bytesread/2/audio_ch;
       float **vorbis_buffer;
       int count=0;

       if(bytesread<=0)
       {
           vorbis_analysis_wrote(vd,0);
       }
       else
       {

           samples_sofar += sampread;
           if(sampread>0)
           {
               vorbis_buffer=vorbis_analysis_buffer(vd,sampread);
               /* uninterleave samples */
               for(i=0;i<sampread;i++)
               {
                   for(j=0;j<audio_ch;j++)
                   {
                       vorbis_buffer[j][i]=((readptr[count+1]<<8)|(0x00ff&(int)readptr[count]))/32768.f;
                       count+=2;
                   }
               }
               vorbis_analysis_wrote(vd,sampread);
           }
       }

       while(vorbis_analysis_blockout(vd,vb)==1)
       {
           /* analysis, assume we want to use bitrate management */
           vorbis_analysis(vb,NULL);
           vorbis_bitrate_addblock(vb);
           /* weld packets into the bitstream */
           while(vorbis_bitrate_flushpacket(vd,&op))
               ogg_stream_packetin(vo,&op);
       }
   }
   return audioflag;
}

static int                 frame_state=-1;
static unsigned char      *yuvframe[2];
static th_ycbcr_buffer     ycbcr;

int fetch_and_process_video_packet(FILE *video,th_enc_ctx *td,ogg_packet *op)
{
   int                        ret;
   int                        pic_sz;
   int                        frame_c_w;
   int                        frame_c_h;
   int                        c_w;
   int                        c_h;
   int                        c_sz;

   if(frame_state==-1)
   {
       yuvframe[0]=(unsigned char *)malloc(y4m_dst_buf_sz);
       yuvframe[1]=(unsigned char *)malloc(y4m_dst_buf_sz);
       frame_state=0;
   }
   pic_sz=pic_w*pic_h;
   frame_c_w=frame_w/dst_c_dec_h;
   frame_c_h=frame_h/dst_c_dec_v;
   c_w=(pic_w+dst_c_dec_h-1)/dst_c_dec_h;
   c_h=(pic_h+dst_c_dec_v-1)/dst_c_dec_v;
   c_sz=c_w*c_h;

   for(;frame_state<2;)
   {
       char c,frame[6];
       int ret=fread(frame,1,6,video);
       if(ret<6)break;
       if(memcmp(frame,"FRAME/n",6))
       {
           fprintf(stderr,"Loss of framing in YUV input data/n");
           exit(1);
       }
       /*Read the frame data that needs no conversion.*/
       if(fread(yuvframe[frame_state],1,y4m_dst_buf_read_sz,video)!= y4m_dst_buf_read_sz)
      {
           fprintf(stderr,"Error reading YUV frame data./n");
           exit(1);
       }
       frame_state++;
   }
   ycbcr[0].width=frame_w;
   ycbcr[0].height=frame_h;
   ycbcr[0].stride=pic_w;
   ycbcr[0].data=yuvframe[0]-pic_x-pic_y*pic_w;
   ycbcr[1].width=frame_c_w;
   ycbcr[1].height=frame_c_h;
   ycbcr[1].stride=c_w;
   ycbcr[1].data=yuvframe[0]+pic_sz-(pic_x/dst_c_dec_h)-(pic_y/dst_c_dec_v)*c_w;
   ycbcr[2].width=frame_c_w;
   ycbcr[2].height=frame_c_h;
   ycbcr[2].stride=c_w;
   ycbcr[2].data=ycbcr[1].data+c_sz;
   th_encode_ycbcr_in(td,ycbcr);
   {
       unsigned char *temp=yuvframe[0];
       yuvframe[0]=yuvframe[1];
       yuvframe[1]=temp;
       frame_state--;
   }
   if(frame_state<1)
   {
       printf("The last frame-----------------------------/n");
   }
   /* if there was only one frame, it's the last in the stream */
   ret = th_encode_packetout(td,frame_state<1,op);
   return ret;
}

int fetch_and_process_video(FILE *video,
                           ogg_page *videopage,
                           ogg_stream_state *to,
                           th_enc_ctx *td,
                           int videoflag)
{
   ogg_packet op;
   int ret;
   /* is there a video page flushed? If not, work until there is. */
   while(!videoflag)
   {
       if(ogg_stream_pageout(to,videopage)>0)
           return 1;
       if(ogg_stream_eos(to))
       {
           printf("E/n") ;
           return 0;
       }
       ret=fetch_and_process_video_packet(video,td,&op);
       if(ret<=0)
           return 0;
       ogg_stream_packetin(to,&op);
   }
   return videoflag;
}

static int ilog(unsigned _v)
{
   int ret;
   for(ret=0;_v;ret++)
       _v>>=1;
   return ret;
}

long GetTickCount()
{
   struct timeval tv;

   gettimeofday(&tv, NULL);

   return (tv.tv_sec * 1000 + tv.tv_usec / 1000);
}

int main(int argc,char *argv[])
{
   int c,long_option_index,ret;

   ogg_stream_state to; /* take physical pages, weld into a logical stream of packets */
   ogg_stream_state vo; /* take physical pages, weld into a logical stream of packets */
   ogg_page         og; /* one Ogg bitstream page. Vorbis packets are inside */
   ogg_packet       op; /* one raw packet of data for decode */

   th_enc_ctx      *td;
   th_info          ti;
   th_comment       tc;

   vorbis_info      vi; /* struct that stores all the static vorbis bitstream settings */
   vorbis_comment   vc; /* struct that stores all the user comments */

   vorbis_dsp_state vd; /* central working state for the packet->PCM decoder */
   vorbis_block     vb; /* local working space for packet->PCM decode */

   int speed=-1;
   int audioflag=0;
   int videoflag=0;
   int akbps=0;
   int vkbps=0;

   ogg_int64_t audio_bytesout=0;
   ogg_int64_t video_bytesout=0;
   double timebase;

   FILE *outfile = fopen("out.ogv", "wb");

   fpos_t video_rewind_pos;
   long cur_time = 0;

   audio_q=(float)(3*.099);//-1 to 10, now is 3
   video_q=(int)rint(6.3*5);//0 to 10, now is 5
    keyframe_frequency=64;
   speed=2;

   video = fopen("video.dat", "rb");
   audio = fopen("audio.dat", "rb");
   init_param();

   srand(time(NULL));
   ogg_stream_init(&to,rand()); /* oops, add one ot the above */

   /* initialize Vorbis assuming we have audio to compress. */
   if(audio)
   {
       ogg_stream_init(&vo,rand());
       vorbis_info_init(&vi);
       if(audio_q>-99)
          ret = vorbis_encode_init_vbr(&vi,audio_ch,audio_hz,audio_q);
       else
          ret = vorbis_encode_init(&vi,audio_ch,audio_hz,-1,
                                   (int)(64870*(ogg_int64_t)audio_r>>16),-1);
       if(ret){
          fprintf(stderr,"The Vorbis encoder could not set up a mode according to/n"
                  "the requested quality or bitrate./n/n");
          exit(1);
       }

       vorbis_comment_init(&vc);
       vorbis_analysis_init(&vd,&vi);
       vorbis_block_init(&vd,&vb);
   }
    /* Theora has a divisible-by-sixteen restriction for the encoded frame size */
    /* scale the picture size up to the nearest /16 and calculate offsets */
    frame_w=pic_w+15&~0xF;
    frame_h=pic_h+15&~0xF;
    /*Force the offsets to be even so that chroma samples line up like we
       expect.*/
    pic_x=frame_w-pic_w>>1&~1;
    pic_y=frame_h-pic_h>>1&~1;
    th_info_init(&ti);
    ti.frame_width=frame_w;
    ti.frame_height=frame_h;
    ti.pic_width=pic_w;
    ti.pic_height=pic_h;
    ti.pic_x=pic_x;
    ti.pic_y=pic_y;
    ti.fps_numerator=video_fps_n;
    ti.fps_denominator=video_fps_d;
    ti.aspect_numerator=video_par_n;
    ti.aspect_denominator=video_par_d;
    ti.colorspace=TH_CS_UNSPECIFIED;
    ti.target_bitrate=(int)(64870*(ogg_int64_t)video_r>>16);
    ti.quality=video_q;
    ti.keyframe_granule_shift=ilog(keyframe_frequency-1);
    ti.pixel_fmt=TH_PF_420;

    td=th_encode_alloc(&ti);
    th_info_clear(&ti);
    ret=th_encode_ctl(td,TH_ENCCTL_SET_KEYFRAME_FREQUENCY_FORCE, &keyframe_frequency,sizeof(keyframe_frequency-1));
    if(ret<0)
    {
         fprintf(stderr,"Could not set keyframe interval to %d./n",(int)keyframe_frequency);
    }

    if(buf_delay>=0)
    {
         ret=th_encode_ctl(td,TH_ENCCTL_SET_RATE_BUFFER, &buf_delay,sizeof(buf_delay));
         if(ret<0)
         {
           fprintf(stderr,"Warning: could not set desired buffer delay./n");
         }
    }
    /*Speed should also be set after the current encoder mode is established,
       since the available speed levels may change depending.*/
    if(speed>=0)
    {
       int speed_max;
       int ret;
       ret=th_encode_ctl(td,TH_ENCCTL_GET_SPLEVEL_MAX, &speed_max,sizeof(speed_max));
       if(ret<0)
       {
           fprintf(stderr,"Warning: could not determine maximum speed level./n");
           speed_max=0;
       }
       ret=th_encode_ctl(td,TH_ENCCTL_SET_SPLEVEL,&speed,sizeof(speed));
       if(ret<0)
       {
           fprintf(stderr,"Warning: could not set speed level to %i of %i/n", speed,speed_max);
           if(speed>speed_max)
           {
               fprintf(stderr,"Setting it to %i instead/n",speed_max);
           }
           ret=th_encode_ctl(td,TH_ENCCTL_SET_SPLEVEL, &speed_max,sizeof(speed_max));
           if(ret<0)
           {
               fprintf(stderr,"Warning: could not set speed level to %i of %i/n",speed_max,speed_max);
           }
       }
    }
    /* write the bitstream header packets with proper page interleave */
    th_comment_init(&tc);
    /* first packet will get its own page automatically */
    if(th_encode_flushheader(td,&tc,&op)<=0)
    {
         fprintf(stderr,"Internal Theora library error./n");
         exit(1);
    }

    ogg_stream_packetin(&to,&op);
   if(ogg_stream_pageout(&to,&og)!=1)
   {
       fprintf(stderr,"Internal Ogg library error./n");
       exit(1);
   }
   fwrite(og.header,1,og.header_len,outfile);
   fwrite(og.body,1,og.body_len,outfile);

    /* create the remaining theora headers */
    for(;;)
    {
         ret=th_encode_flushheader(td,&tc,&op);
         if(ret<0)
         {
           fprintf(stderr,"Internal Theora library error./n");
           exit(1);
         }
         else if(!ret)
             break;
         ogg_stream_packetin(&to,&op);
    }
    if(audio)
    {
       ogg_packet header;
       ogg_packet header_comm;
       ogg_packet header_code;
       vorbis_analysis_headerout(&vd,&vc,&header,&header_comm,&header_code);
       ogg_stream_packetin(&vo,&header); /* automatically placed in its own
                                           page */
       if(ogg_stream_pageout(&vo,&og)!=1)
       {
           fprintf(stderr,"Internal Ogg library error./n");
           exit(1);
       }
       fwrite(og.header,1,og.header_len,outfile);
       fwrite(og.body,1,og.body_len,outfile);
       /* remaining vorbis header packets */
       ogg_stream_packetin(&vo,&header_comm);
       ogg_stream_packetin(&vo,&header_code);
    }

   for(;;)
   {
       int result = ogg_stream_flush(&to,&og);
       if(result<0)
       {
           /* can't get here */
           fprintf(stderr,"Internal Ogg library error./n");
          exit(1);
       }
       if(result==0)
           break;
       fwrite(og.header,1,og.header_len,outfile);
       fwrite(og.body,1,og.body_len,outfile);
   }

    if(audio)
    {
       for(;;)
       {
           int result=ogg_stream_flush(&vo,&og);
           if(result<0)
           {
               /* can't get here */
               fprintf(stderr,"Internal Ogg library error./n");
               exit(1);
           }
           if(result==0)break;
           fwrite(og.header,1,og.header_len,outfile);
           fwrite(og.body,1,og.body_len,outfile);
       }
    }

   cur_time = GetTickCount();
   fprintf(stderr, "encoder start/n");

    for(;;)
    {
         int audio_or_video=-1;
        double audiotime;
        double videotime;
        ogg_page audiopage;
        ogg_page videopage;
        audioflag=fetch_and_process_audio(audio,&audiopage,&vo,&vd,&vb,audioflag);
        videoflag=fetch_and_process_video(video,&videopage,&to,td,videoflag);
        if(!audioflag && !videoflag)
           break;
        audiotime= audioflag?vorbis_granule_time(&vd,ogg_page_granulepos(&audiopage)):-1;
        videotime= videoflag?th_granule_time(td,ogg_page_granulepos(&videopage)):-1;
        if(!audioflag)
        {
          audio_or_video=1;
        }
        else if(!videoflag)
        {
          audio_or_video=0;
        }
        else
        {
          if(audiotime<videotime)
            audio_or_video=0;
          else
            audio_or_video=1;
        }
        if(audio_or_video==1){
          /* flush a video page */
          video_bytesout+=fwrite(videopage.header,1,videopage.header_len,outfile);
          video_bytesout+=fwrite(videopage.body,1,videopage.body_len,outfile);
          videoflag=0;
          timebase=videotime;
        }
        else
        {
          /* flush an audio page */
          audio_bytesout+=fwrite(audiopage.header,1,audiopage.header_len,outfile);
          audio_bytesout+=fwrite(audiopage.body,1,audiopage.body_len,outfile);
          audioflag=0;
          timebase=audiotime;
        }

      if(timebase > 0)
      {
        int hundredths=(int)(timebase*100-(long)timebase*100);
        int seconds=(long)timebase%60;
        int minutes=((long)timebase/60)%60;
        int hours=(long)timebase/3600;
        if(audio_or_video)vkbps=(int)rint(video_bytesout*8./timebase*.001);
        else akbps=(int)rint(audio_bytesout*8./timebase*.001);
        fprintf(stderr,
                "/r      %d:%02d:%02d.%02d audio: %dkbps video: %dkbps/n",
                hours,minutes,seconds,hundredths,akbps,vkbps);
      }
    }
    if(video)th_encode_free(td);
   fprintf(stderr, "/nencoder last time = %ld/n", GetTickCount()- cur_time);

   /* clear out state */
   if(audio)
   {
       ogg_stream_clear(&vo);
       vorbis_block_clear(&vb);
       vorbis_dsp_clear(&vd);
       vorbis_comment_clear(&vc);
       vorbis_info_clear(&vi);
       fclose(audio);
   }
   if(video)
   {
       ogg_stream_clear(&to);
       th_comment_clear(&tc);
       fclose(video);
   }

if(outfile)
   fclose(outfile);
return(0);
}

encode YUV420 to ogg, theora codec

相关阅读

相关文章

相关问答