当前位置: 首页 > 工具软件 > SoundTouch > 使用案例 >

使用soundtouch开源库实现ffplay倍速播功能

夏侯博
2023-12-01


一、SoundTouch音频处理库的编译

soundtouch项目官网 https://www.surina.net/soundtouch/
SoundTouch git repository: https://codeberg.org/soundtouch/soundtouch.git

windows版本编译步骤

克隆项目代码

工具:git visiual studio 2019

// 下载源码 当前版本 2.3.1
git clone https://codeberg.org/soundtouch/soundtouch.git

修改soundtouch\include\STTypes.h下的宏定义
注意:默认是使用32bit float samples,需要更改成使用16bit integer samples

 #if !(SOUNDTOUCH_INTEGER_SAMPLES || SOUNDTOUCH_FLOAT_SAMPLES)
       
        /// Choose either 32bit floating point or 16bit integer sampletype
        /// by choosing one of the following defines, unless this selection 
        /// has already been done in some other file.
        
        /// Notes:
        /// - In Windows environment, choose the sample format with the
        ///   following defines.
        /// - In GNU environment, the floating point samples are used by 
        ///   default, but integer samples can be chosen by giving the 
        ///   following switch to the configure script:
        ///       ./configure --enable-integer-samples
        ///   However, if you still prefer to select the sample format here 
        ///   also in GNU environment, then please #undef the INTEGER_SAMPLE
        ///   and FLOAT_SAMPLE defines first as in comments above.
        #define SOUNDTOUCH_INTEGER_SAMPLES     1    //< 16bit integer samples
        //#define SOUNDTOUCH_FLOAT_SAMPLES       1    //< 32bit float samples
     
    #endif

vs2019编译soundtouch静态库和动态库

打开vs命令行工具 Developer Command Prompt for VS 2019
切换到源码路径执行命令 make-win.bat
生成的静态库动态库在 lib文件夹下面
vs解决方案在: soundtouch\source\SoundTouch\SoundTouch.sln

二、封装soundtouch库变速接口

对soundtouch库进行c风格api封装,提供变速接口给ffplay调用

头文件:soundtouch_wrap.h
#ifndef IJKSOUNDTOUCHWRAP_H
#define IJKSOUNDTOUCHWRAP_H

#include <stdint.h>

void* soundtouch_create();

int soundtouch_translate(void *handle, short* data, float speed, float pitch,
						int len, int bytes_per_sample, int n_channel, int n_sampleRate);

void soundtouch_destroy(void *handle);

#endif /* IJKSOUNDTOUCHWRAP_H */
实现文件:soundtouch_wrap.cpp
#include "SoundTouch.h"

using namespace std;
using namespace soundtouch;

void* soundtouch_create() 
{
    SoundTouch *handle_ptr = new SoundTouch();
    const char *version = handle_ptr->getVersionString();
    return handle_ptr;
}

int soundtouch_translate(void *handle, short* data, float speed, float pitch,
	int len, int bytes_per_sample, int n_channel, int n_sampleRate) 
{
    SoundTouch *handle_ptr = (SoundTouch*)handle;
    int put_n_sample = len / n_channel;
    int nb = 0;
    int pcm_data_size = 0;
    if (handle_ptr == NULL)
        return 0;

    handle_ptr->setPitch(pitch);
    handle_ptr->setRate(speed);

    handle_ptr->setSampleRate(n_sampleRate);
    handle_ptr->setChannels(n_channel);

    handle_ptr->putSamples((SAMPLETYPE*)data, put_n_sample);

    do {
        nb = handle_ptr->receiveSamples((SAMPLETYPE*)data, n_sampleRate / n_channel);
        pcm_data_size += nb * n_channel * bytes_per_sample;
    } while (nb != 0);

    return pcm_data_size;
}

void soundtouch_destroy(void *handle) 
{
    SoundTouch *handle_ptr = (SoundTouch*)handle;
    if (handle_ptr == NULL)
        return;
    handle_ptr->clear();
    delete handle_ptr;
	handle_ptr = NULL;
}

三、ffplay调用soundtouch实现倍速播放

  • VideoState 结构体添加soundtouch变速播放相关字段
#include "soundtouch_wrap.h"

typedef struct VideoState {
	void* soundTouchHandle;
	short *audio_new_buf;  /* soundtouch buf */
	unsigned int audio_new_buf_size;
	double play_rate;	/* 播放速度默认1.0 */
}
  • 在ffplay的stream_open函数中初始化soundtouch相关字段
static VideoState *stream_open(const char *filename, AVInputFormat *iformat)
{
	VideoState *is;
    is = av_mallocz(sizeof(VideoState));
    if (!is)
        return NULL;
	is->soundTouchHandle = soundtouch_create();
	is->audio_new_buf = NULL;
	is->audio_new_buf_size = 0;
	id->play_rate = 1.0f;
	...
}
  • 在stream_component_close中的case AVMEDIA_TYPE_AUDIO 中关闭soundtouch
	if (is->soundTouchHandle)
	{
		soundtouch_destroy(is->soundTouchHandle);
		is->soundTouchHandle = nullptr;
	}
	if (is->audio_new_buf)
    {
        av_freep(&is->audio_new_buf);
        is->audio_new_buf = NULL;
    }

  • 在audio_decode_frame函数进行倍速播放控制
static int audio_decode_frame(VideoState *is)
{
    int data_size, resampled_data_size;
    int64_t dec_channel_layout;
    av_unused double audio_clock0;
    int wanted_nb_samples;
    Frame *af;
	int translate_time = 1;
    if (is->paused)
        return -1;

reload:
    do {
#if defined(_WIN32)
        while (frame_queue_nb_remaining(&is->sampq) == 0) {
            if ((av_gettime_relative() - is->audio_callback_time) > 1000000LL * is->audio_hw_buf_size / is->audio_tgt.bytes_per_sec / 2)
                return -1;
            av_usleep (1000);
        }
#endif
        if (!(af = frame_queue_peek_readable(&is->sampq)))
            return -1;
        frame_queue_next(&is->sampq);
    } while (af->serial != is->audioq.serial);

    data_size = av_samples_get_buffer_size(NULL, af->frame->channels,
                                           af->frame->nb_samples,
											(AVSampleFormat)af->frame->format, 1);

    dec_channel_layout =
        (af->frame->channel_layout && af->frame->channels == av_get_channel_layout_nb_channels(af->frame->channel_layout)) ?
        af->frame->channel_layout : av_get_default_channel_layout(af->frame->channels);
    wanted_nb_samples = synchronize_audio(is, af->frame->nb_samples);

    if (af->frame->format        != is->audio_src.fmt            ||
        dec_channel_layout       != is->audio_src.channel_layout ||
        af->frame->sample_rate   != is->audio_src.freq           ||
        (wanted_nb_samples       != af->frame->nb_samples && !is->swr_ctx)) {
        swr_free(&is->swr_ctx);
        is->swr_ctx = swr_alloc_set_opts(NULL,
                                         is->audio_tgt.channel_layout, is->audio_tgt.fmt, is->audio_tgt.freq,
                                         dec_channel_layout, (AVSampleFormat)af->frame->format, af->frame->sample_rate,
                                         0, NULL);
        if (!is->swr_ctx || swr_init(is->swr_ctx) < 0) {
            av_log(NULL, AV_LOG_ERROR,
                   "Cannot create sample rate converter for conversion of %d Hz %s %d channels to %d Hz %s %d channels!\n",
                    af->frame->sample_rate, av_get_sample_fmt_name((AVSampleFormat)af->frame->format), af->frame->channels,
                    is->audio_tgt.freq, av_get_sample_fmt_name(is->audio_tgt.fmt), is->audio_tgt.channels);
            swr_free(&is->swr_ctx);
            return -1;
        }
        is->audio_src.channel_layout = dec_channel_layout;
        is->audio_src.channels       = af->frame->channels;
        is->audio_src.freq			 = af->frame->sample_rate;
        is->audio_src.fmt			 = (AVSampleFormat)af->frame->format;
    }

    if (is->swr_ctx) {
        const uint8_t **in = (const uint8_t **)af->frame->extended_data;
        uint8_t **out = &is->audio_buf1;
        int out_count = (int64_t)wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate + 256;
        int out_size  = av_samples_get_buffer_size(NULL, is->audio_tgt.channels, out_count, is->audio_tgt.fmt, 0);
        int len2;
        if (out_size < 0) {
            av_log(NULL, AV_LOG_ERROR, "av_samples_get_buffer_size() failed\n");
            return -1;
        }
        if (wanted_nb_samples != af->frame->nb_samples) {
            if (swr_set_compensation(is->swr_ctx, (wanted_nb_samples - af->frame->nb_samples) * is->audio_tgt.freq / af->frame->sample_rate,
                                        wanted_nb_samples * is->audio_tgt.freq / af->frame->sample_rate) < 0) {
                av_log(NULL, AV_LOG_ERROR, "swr_set_compensation() failed\n");
                return -1;
            }
        }
        av_fast_malloc(&is->audio_buf1, &is->audio_buf1_size, out_size);
        if (!is->audio_buf1)
            return AVERROR(ENOMEM);
        len2 = swr_convert(is->swr_ctx, out, out_count, in, af->frame->nb_samples);
        if (len2 < 0) {
            av_log(NULL, AV_LOG_ERROR, "swr_convert() failed\n");
            return -1;
        }
        if (len2 == out_count) {
            av_log(NULL, AV_LOG_WARNING, "audio buffer is probably too small\n");
            if (swr_init(is->swr_ctx) < 0)
                swr_free(&is->swr_ctx);
        }
        is->audio_buf = is->audio_buf1;
        resampled_data_size = len2 * is->audio_tgt.channels * av_get_bytes_per_sample(is->audio_tgt.fmt);

		//=====================倍速处理 begin==========================
		int bytes_per_sample = av_get_bytes_per_sample(is->audio_tgt.fmt);
		if (is->soundTouchHandle && is->play_rate != 1.0f && !is->abort_request)
		{
			av_fast_malloc(&is->audio_new_buf, &is->audio_new_buf_size, out_size * translate_time);
			if (!is->audio_new_buf) {
				// Allocation failed; buf already freed
				return AVERROR(ENOMEM);
			}
			for (int i = 0; i < (resampled_data_size / 2); i++)
			{
				is->audio_new_buf[i] = (is->audio_buf1[i * 2] | (is->audio_buf1[i * 2 + 1] << 8));
			}
			int ret_len = soundtouch_translate(is->soundTouchHandle, is->audio_new_buf, (float)(is->play_rate), (float)(1.0f / is->play_rate),
				resampled_data_size / 2, bytes_per_sample, is->audio_tgt.channels, af->frame->sample_rate);
			if (ret_len > 0) {
				is->audio_buf = (uint8_t*)is->audio_new_buf;
				resampled_data_size = ret_len;
			}
			else {
				translate_time++;
				goto reload;
			}
		}
		//=====================倍速处理 end======================
    } else {
        is->audio_buf = af->frame->data[0];
        resampled_data_size = data_size;
    }

    audio_clock0 = is->audio_clock;
    /* update the audio clock with the pts */
    if (!isnan(af->pts))
        is->audio_clock = af->pts + (double) af->frame->nb_samples / af->frame->sample_rate;
    else
        is->audio_clock = NAN;
    is->audio_clock_serial = af->serial;
#ifdef DEBUG
    {
        static double last_clock;
        printf("audio: delay=%0.3f clock=%0.3f clock0=%0.3f\n",
               is->audio_clock - last_clock,
               is->audio_clock, audio_clock0);
        last_clock = is->audio_clock;
    }
#endif
    return resampled_data_size;
}

  • 在event_loop函数添加按键消息处理,实现按键控制播放速度切换
static void event_loop(VideoState *cur_stream)
{
			switch (event.type) {
			case SDLK_1:
				cur_stream->play_rate = 1.0;
				break;
			case SDLK_2:
				cur_stream->play_rate = 1.25;
				break;
			case SDLK_3:
				cur_stream->play_rate = 1.5;
				break;
			case SDLK_4:
				cur_stream->play_rate = 2.0;
				break;
			case SDLK_5:
				cur_stream->play_rate = 0.75;
				break;
			case SDLK_6:
				cur_stream->play_rate = 0.5;
				break;
			}
}

到此位置,ffplay增加了倍速播放的功能,通过键盘123456按键切换播放速度。


推荐免费直播学习课程:c/c++Linux后台服务器开发高级架构师学习视频

音视频流媒体权威资料整理,500+份文章,论文,视频,实践项目,协议,业界大神名单

参考

音频变速变调原理及soundtouch代码分析

 类似资料: