multimedia/ffmpeg2theora/patches/30-avcodec_decode_audio4.diff



diff -Naur ffmpeg2theora-0.29/SConstruct ffmpeg2theora-0.29.patched/SConstruct
--- ffmpeg2theora-0.29/SConstruct	2012-06-25 13:15:16.000000000 -0400
+++ ffmpeg2theora-0.29.patched/SConstruct	2014-05-14 15:02:17.000000000 -0400
@@ -162,6 +162,14 @@
       '-Iffmpeg'
     ])
 
+  if conf.CheckPKG('libavresample'):
+    FFMPEG_LIBS.append('libavresample')
+  else:
+    FFMPEG_LIBS.append('libswresample')
+    env.Append(CCFLAGS=[
+      '-DUSE_SWRESAMPLE'
+    ])
+
   if not conf.CheckPKG(' '.join(FFMPEG_LIBS)): 
     print """
         Could not find %s.
diff -Naur ffmpeg2theora-0.29/src/ffmpeg2theora.c ffmpeg2theora-0.29.patched/src/ffmpeg2theora.c
--- ffmpeg2theora-0.29/src/ffmpeg2theora.c	2014-05-14 14:57:30.000000000 -0400
+++ ffmpeg2theora-0.29.patched/src/ffmpeg2theora.c	2014-05-14 14:59:43.000000000 -0400
@@ -33,6 +33,11 @@
 #include "libswscale/swscale.h"
 #include "libpostproc/postprocess.h"
 
+#include "libavutil/opt.h"
+#include "libavutil/channel_layout.h"
+#include "libavutil/samplefmt.h"
+#include "libswresample_compat.h"
+
 #include "theora/theoraenc.h"
 #include "vorbis/codec.h"
 #include "vorbis/vorbisenc.h"
@@ -537,6 +542,11 @@
     int synced = this->start_time == 0.0;
     AVRational display_aspect_ratio, sample_aspect_ratio;
 
+    struct SwrContext *swr_ctx;
+    uint8_t **dst_audio_data = NULL;
+    int dst_linesize;
+    int src_nb_samples = 1024, dst_nb_samples, max_dst_nb_samples;
+
     if (this->audiostream >= 0 && this->context->nb_streams > this->audiostream) {
         AVCodecContext *enc = this->context->streams[this->audiostream]->codec;
         if (enc->codec_type == AVMEDIA_TYPE_AUDIO) {
@@ -961,22 +971,43 @@
         if (acodec != NULL && avcodec_open2 (aenc, acodec, NULL) >= 0) {
             if (this->sample_rate != sample_rate
                 || this->channels != aenc->channels
-                || aenc->sample_fmt != AV_SAMPLE_FMT_S16) {
-                // values take from libavcodec/resample.c
-                this->audio_resample_ctx = av_audio_resample_init(this->channels,    aenc->channels,
-                                                                  this->sample_rate, sample_rate,
-                                                                  AV_SAMPLE_FMT_S16,    aenc->sample_fmt,
-                                                                  16, 10, 0, 0.8);
-                if (!this->audio_resample_ctx) {
-                    this->channels = aenc->channels;
+                || aenc->sample_fmt != AV_SAMPLE_FMT_FLTP) {
+                swr_ctx = swr_alloc();
+                /* set options */
+                if (aenc->channel_layout) {
+                    av_opt_set_int(swr_ctx, "in_channel_layout",    aenc->channel_layout, 0);
+                } else {
+                    av_opt_set_int(swr_ctx, "in_channel_layout", av_get_default_channel_layout(aenc->channels), 0);
+                }
+                av_opt_set_int(swr_ctx, "in_sample_rate",       aenc->sample_rate, 0);
+                av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", aenc->sample_fmt, 0);
+
+                av_opt_set_int(swr_ctx, "out_channel_layout", av_get_default_channel_layout(this->channels), 0);
+                av_opt_set_int(swr_ctx, "out_sample_rate",       this->sample_rate, 0);
+                av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
+
+                /* initialize the resampling context */
+                if (swr_init(swr_ctx) < 0) {
+                    fprintf(stderr, "Failed to initialize the resampling context\n");
+                    exit(1);
                 }
+
+                max_dst_nb_samples = dst_nb_samples =
+                    av_rescale_rnd(src_nb_samples, this->sample_rate, sample_rate, AV_ROUND_UP);
+
+                if (av_samples_alloc_array_and_samples(&dst_audio_data, &dst_linesize, this->channels,
+                                                         dst_nb_samples, AV_SAMPLE_FMT_FLTP, 0) < 0) {
+                    fprintf(stderr, "Could not allocate destination samples\n");
+                    exit(1);
+                }
+
                 if (!info.frontend && this->sample_rate!=sample_rate)
                     fprintf(stderr, "  Resample: %dHz => %dHz\n", sample_rate,this->sample_rate);
                 if (!info.frontend && this->channels!=aenc->channels)
                     fprintf(stderr, "  Channels: %d => %d\n",aenc->channels,this->channels);
             }
             else{
-                this->audio_resample_ctx=NULL;
+                swr_ctx = NULL;
             }
         }
         else{
@@ -1067,13 +1098,12 @@
         AVPacket pkt;
         AVPacket avpkt;
         int len1;
-        int got_picture;
+        int got_frame;
         int first = 1;
         int audio_eos = 0, video_eos = 0, audio_done = 0, video_done = 0;
         int ret;
-        int16_t *audio_buf=av_malloc(4*MAX_AUDIO_FRAME_SIZE);
-        int16_t *resampled=av_malloc(4*MAX_AUDIO_FRAME_SIZE);
-        int16_t *audio_p=NULL;
+        AVFrame *audio_frame = NULL;
+        uint8_t **audio_p = NULL;
         int no_frames;
         int no_samples;
 
@@ -1369,7 +1399,7 @@
                       first frame decodec in case its not a keyframe
                     */
                     if (pkt.stream_index == this->video_index) {
-                      avcodec_decode_video2(venc, frame, &got_picture, &pkt);
+                      avcodec_decode_video2(venc, frame, &got_frame, &pkt);
                     }
                     av_free_packet (&pkt);
                     continue;
@@ -1388,9 +1418,9 @@
                 while(video_eos || avpkt.size > 0) {
                     int dups = 0;
                     static th_ycbcr_buffer ycbcr;
-                    len1 = avcodec_decode_video2(venc, frame, &got_picture, &avpkt);
+                    len1 = avcodec_decode_video2(venc, frame, &got_frame, &avpkt);
                     if (len1>=0) {
-                        if (got_picture) {
+                        if (got_frame) {
                             // this is disabled by default since it does not work
                             // for all input formats the way it should.
                             if (this->sync == 1 && pkt.dts != AV_NOPTS_VALUE) {
@@ -1427,7 +1457,7 @@
 
                             if (venc_pix_fmt != this->pix_fmt) {
                                 sws_scale(this->sws_colorspace_ctx,
-                                frame->data, frame->linesize, 0, display_height,
+                                (const uint8_t * const*)frame->data, frame->linesize, 0, display_height,
                                 output_tmp->data, output_tmp->linesize);
                             }
                             else{
@@ -1471,7 +1501,7 @@
                             }
                             if (this->sws_scale_ctx) {
                                 sws_scale(this->sws_scale_ctx,
-                                    output_cropped->data,
+                                    (const uint8_t * const*)output_cropped->data,
                                     output_cropped->linesize, 0,
                                     display_height - (this->frame_topBand + this->frame_bottomBand),
                                     output_resized->data,
@@ -1499,7 +1529,7 @@
                     //now output_resized
 
                     if (!first) {
-                        if (got_picture || video_eos) {
+                        if (got_frame || video_eos) {
                             prepare_ycbcr_buffer(this, ycbcr, output_buffered);
                             if(dups>0) {
                                 //this only works if dups < keyint,
@@ -1519,11 +1549,11 @@
                                 info.videotime = this->frame_count / av_q2d(this->framerate);
                         }
                     }
-                    if (got_picture) {
+                    if (got_frame) {
                         first=0;
                         av_picture_copy((AVPicture *)output_buffered, (AVPicture *)output_padded, this->pix_fmt, this->frame_width, this->frame_height);
                     }
-                    if (!got_picture) {
+                    if (!got_frame) {
                         break;
                     }
                 }
@@ -1531,42 +1561,62 @@
             if (info.passno!=1)
               if ((audio_eos && !audio_done) || (ret >= 0 && pkt.stream_index == this->audio_index)) {
                 while((audio_eos && !audio_done) || avpkt.size > 0 ) {
-                    int samples=0;
-                    int samples_out=0;
-                    int data_size = 4*MAX_AUDIO_FRAME_SIZE;
                     int bytes_per_sample = av_get_bytes_per_sample(aenc->sample_fmt);
 
                     if (avpkt.size > 0) {
-                        len1 = avcodec_decode_audio3(astream->codec, audio_buf, &data_size, &avpkt);
+                        if (!audio_frame && !(audio_frame = avcodec_alloc_frame())) {
+                            fprintf(stderr, "Failed to allocate memory\n");
+                            exit(1);
+                        }
+                        len1 = avcodec_decode_audio4(astream->codec, audio_frame, &got_frame, &avpkt);
                         if (len1 < 0) {
                             /* if error, we skip the frame */
                             break;
                         }
-                        avpkt.size -= len1;
-                        avpkt.data += len1;
-                        if (data_size >0) {
-                            samples = data_size / (aenc->channels * bytes_per_sample);
-                            samples_out = samples;
-                            if (this->audio_resample_ctx) {
-                                samples_out = audio_resample(this->audio_resample_ctx, resampled, audio_buf, samples);
-                                audio_p = resampled;
+                        /* Some audio decoders decode only part of the packet, and have to be
+                         * called again with the remainder of the packet data.
+                         * Sample: http://fate-suite.libav.org/lossless-audio/luckynight-partial.shn
+                         * Also, some decoders might over-read the packet. */
+                        len1 = FFMIN(len1, avpkt.size);
+                        if (got_frame) {
+                            dst_nb_samples = audio_frame->nb_samples;
+                            if (swr_ctx) {
+                                dst_nb_samples = av_rescale_rnd(audio_frame->nb_samples,
+                                    this->sample_rate, aenc->sample_rate, AV_ROUND_UP);
+                                if (dst_nb_samples > max_dst_nb_samples) {
+                                    av_free(dst_audio_data[0]);
+                                    if (av_samples_alloc(dst_audio_data, &dst_linesize, this->channels,
+                                                           dst_nb_samples, AV_SAMPLE_FMT_FLTP, 1) < 0) {
+                                        fprintf(stderr, "Error while converting audio\n");
+                                        exit(1);
+                                    }
+                                    max_dst_nb_samples = dst_nb_samples;
+                                }
+                                if (swr_convert(swr_ctx, dst_audio_data, dst_nb_samples,
+                                    (const uint8_t**)audio_frame->extended_data, audio_frame->nb_samples) < 0) {
+                                    fprintf(stderr, "Error while converting audio\n");
+                                    exit(1);
+                                }
+                                audio_p = dst_audio_data;
+                            } else {
+                                audio_p = audio_frame->extended_data;
                             }
-                            else
-                                audio_p = audio_buf;
                         }
+                        avpkt.size -= len1;
+                        avpkt.data += len1;
                     }
-
-                    if (no_samples > 0 && this->sample_count + samples_out > no_samples) {
-                        audio_eos = 1;
-                        samples_out = no_samples - this->sample_count;
-                        if (samples_out <= 0) {
-                            break;
+                    if(got_frame || audio_eos) {
+                        if (no_samples > 0 && this->sample_count + dst_nb_samples > no_samples) {
+                            audio_eos = 1;
+                            dst_nb_samples = no_samples - this->sample_count;
+                            if (dst_nb_samples <= 0) {
+                                break;
+                            }
                         }
+                        oggmux_add_audio(&info, audio_p, dst_nb_samples, audio_eos);
+                        avcodec_free_frame(&audio_frame);
+                        this->sample_count += dst_nb_samples;
                     }
-
-                    oggmux_add_audio(&info, audio_p,
-                        samples_out * (this->channels), samples_out, audio_eos);
-                    this->sample_count += samples_out;
                     if(audio_eos) {
                         audio_done = 1;
                     }
@@ -1751,8 +1801,8 @@
             avcodec_close(venc);
         }
         if (this->audio_index >= 0) {
-            if (this->audio_resample_ctx)
-                audio_resample_close(this->audio_resample_ctx);
+            if (swr_ctx)
+                swr_free(&swr_ctx);
             avcodec_close(aenc);
         }
 
@@ -1773,8 +1823,12 @@
             frame_dealloc(output_cropped_p);
             frame_dealloc(output_padded_p);
         }
-        av_free(audio_buf);
-        av_free(resampled);
+        if (dst_audio_data)
+            av_freep(&dst_audio_data[0]);
+        av_freep(&dst_audio_data);
+        if(swr_ctx) {
+            swr_close(swr_ctx);
+        }
     }
     else{
         fprintf(stderr, "No video or audio stream found.\n");
diff -Naur ffmpeg2theora-0.29/src/ffmpeg2theora.c.orig ffmpeg2theora-0.29.patched/src/ffmpeg2theora.c.orig
--- ffmpeg2theora-0.29/src/ffmpeg2theora.c.orig	2014-05-14 14:57:25.000000000 -0400
+++ ffmpeg2theora-0.29.patched/src/ffmpeg2theora.c.orig	2014-05-14 14:57:30.000000000 -0400
@@ -2772,6 +2772,9 @@
             outputfile_set=1;
         }
         optind++;
+    } else {
+        fprintf(stderr, "ERROR: no input specified\n");
+        exit(1);
     }
     if(optind<argc) {
         fprintf(stderr, "WARNING: Only one input file supported, others will be ignored\n");
diff -Naur ffmpeg2theora-0.29/src/ffmpeg2theora.h ffmpeg2theora-0.29.patched/src/ffmpeg2theora.h
--- ffmpeg2theora-0.29/src/ffmpeg2theora.h	2010-10-10 10:56:00.000000000 -0400
+++ ffmpeg2theora-0.29.patched/src/ffmpeg2theora.h	2014-05-14 14:59:43.000000000 -0400
@@ -62,7 +62,6 @@
     double fps;
     struct SwsContext *sws_colorspace_ctx; /* for image resampling/resizing */
     struct SwsContext *sws_scale_ctx; /* for image resampling/resizing */
-    ReSampleContext *audio_resample_ctx;
     ogg_int32_t aspect_numerator;
     ogg_int32_t aspect_denominator;
     int colorspace;
diff -Naur ffmpeg2theora-0.29/src/libswresample_compat.h ffmpeg2theora-0.29.patched/src/libswresample_compat.h
--- ffmpeg2theora-0.29/src/libswresample_compat.h	1969-12-31 19:00:00.000000000 -0500
+++ ffmpeg2theora-0.29.patched/src/libswresample_compat.h	2014-05-14 14:59:43.000000000 -0400
@@ -0,0 +1,23 @@
+// This header serves to smooth out the differences in FFmpeg and LibAV.
+
+#ifdef USE_SWRESAMPLE
+
+    #include <libswresample/swresample.h>
+
+    //swr does not have the equivalent so this does nothing
+    void swr_close(SwrContext *ctx) {};
+
+#else
+
+    #include <libavresample/avresample.h>
+
+    #define SwrContext AVAudioResampleContext
+    #define swr_init(ctx) avresample_open(ctx)
+    #define swr_close(ctx) avresample_close(ctx)
+    #define swr_free(ctx) avresample_free(ctx)
+    #define swr_alloc() avresample_alloc_context()
+    #define swr_get_delay(ctx, ...) avresample_get_delay(ctx)
+    #define swr_convert(ctx, out, out_count, in, in_count) \
+       avresample_convert(ctx, out, 0, out_count, (uint8_t **)in, 0, in_count)
+
+#endif
diff -Naur ffmpeg2theora-0.29/src/theorautils.c ffmpeg2theora-0.29.patched/src/theorautils.c
--- ffmpeg2theora-0.29/src/theorautils.c	2012-06-21 17:36:01.000000000 -0400
+++ ffmpeg2theora-0.29.patched/src/theorautils.c	2014-05-14 14:59:43.000000000 -0400
@@ -1219,17 +1219,16 @@
 /**
  * adds audio samples to encoding sink
  * @param buffer pointer to buffer
- * @param bytes bytes in buffer
  * @param samples samples in buffer
  * @param e_o_s 1 indicates end of stream.
  */
-void oggmux_add_audio (oggmux_info *info, int16_t * buffer, int bytes, int samples, int e_o_s) {
+void oggmux_add_audio (oggmux_info *info, uint8_t **buffer, int samples, int e_o_s) {
     ogg_packet op;
 
     int i, j, k, count = 0;
     float **vorbis_buffer;
 
-    if (bytes <= 0 && samples <= 0) {
+    if (samples <= 0) {
         /* end of audio stream */
         if (e_o_s)
             vorbis_analysis_wrote (&info->vd, 0);
@@ -1252,7 +1251,7 @@
                         default: k = j;
                     }
                 }
-                vorbis_buffer[k][i] = buffer[count++] / 32768.f;
+                vorbis_buffer[k][i] = ((const float  *)buffer[j])[i];
             }
         }
         vorbis_analysis_wrote (&info->vd, samples);
@@ -1291,8 +1290,8 @@
                 if (op.packetno != 4) {
                     /* We only expect negative start granule in the first content
                        packet, not any of the others... */
-                    fprintf(stderr, "WARNING: vorbis packet %lld has calculated start"
-                            " granule of %lld, but it should be non-negative!",
+                    fprintf(stderr, "WARNING: vorbis packet %" PRId64 " has calculated start"
+                            " granule of %" PRId64 ", but it should be non-negative!",
                             op.packetno, start_granule);
                 }
                 start_granule = 0;
@@ -1302,7 +1301,7 @@
                    allowed by the specification in the last packet only, and the
                    trailing samples should be discarded and not played/indexed. */
                 if (!op.e_o_s) {
-                    fprintf(stderr, "WARNING: vorbis packet %lld (granulepos %lld) starts before"
+                    fprintf(stderr, "WARNING: vorbis packet %" PRId64 " (granulepos %" PRId64 ") starts before"
                             " the end of the preceeding packet!", op.packetno, op.granulepos);
                 }
                 start_granule = info->vorbis_granulepos;
diff -Naur ffmpeg2theora-0.29/src/theorautils.h ffmpeg2theora-0.29.patched/src/theorautils.h
--- ffmpeg2theora-0.29/src/theorautils.h	2011-09-15 16:20:46.000000000 -0400
+++ ffmpeg2theora-0.29.patched/src/theorautils.h	2014-05-14 14:59:43.000000000 -0400
@@ -168,7 +168,7 @@
 extern void oggmux_setup_kate_streams(oggmux_info *info, int n_kate_streams);
 extern void oggmux_init (oggmux_info *info);
 extern void oggmux_add_video (oggmux_info *info, th_ycbcr_buffer ycbcr, int e_o_s);
-extern void oggmux_add_audio (oggmux_info *info, int16_t * readbuffer, int bytesread, int samplesread,int e_o_s);
+extern void oggmux_add_audio (oggmux_info *info, uint8_t **buffer, int samples,int e_o_s);
 #ifdef HAVE_KATE
 extern void oggmux_add_kate_text (oggmux_info *info, int idx, double t0, double t1, const char *text, size_t len, int x1, int x2, int y1, int y2);
 extern void oggmux_add_kate_image (oggmux_info *info, int idx, double t0, double t1, const kate_region *kr, const kate_palette *kp, const kate_bitmap *kb);