Audio Resampler Implement

晋坚
2023-12-01

前些日子无聊实现的一个Audio PCM Resampler的代码,仅仅支持采样率为44.1khz的源数据的向下转换,可转换成8khz/11.025khz/16khz/22.050khz。

接口设计参考android-4.0.3_r1\system\media\audio_utils\include\audio_utils\resampler.h,因此使用方法也和Android的那套接口保持一致。

// File:   resampler.h
// Author: Loon <sepnic@gmail.com>

#ifndef __AUDIO_RESAMPLER_H
#define __AUDIO_RESAMPLER_H


#include <stdint.h>

struct resampler_buffer {
    union {
        void*       raw;
        short*      i16;
        int8_t*     i8;
    };
    size_t frame_count;
};

/* call back interface used by the resampler to get new data */
struct resampler_buffer_provider {
    /**
     *  get a new buffer of data:
     *   as input: buffer->frame_count is the number of frames requested
     *   as output: buffer->frame_count is the number of frames returned
     *              buffer->raw points to data returned
     */
    int (*get_next_buffer)(struct resampler_buffer_provider *provider,
            struct resampler_buffer *buffer);
    /**
     *  release a consumed buffer of data:
     *   as input: buffer->frame_count is the number of frames released
     *             buffer->raw points to data released
     */
    void (*release_buffer)(struct resampler_buffer_provider *provider,
            struct resampler_buffer *buffer);
};

struct resampler_itfe {
    /**
     * reset resampler state
     */
    void (*reset)(struct resampler_itfe *resampler);
    /**
     * resample input from buffer provider and output at most *outFrameCount to out buffer.
     * *outFrameCount is updated with the actual number of frames produced.
     */
    int (*resample_from_provider)(struct resampler_itfe *resampler,
                    int16_t *out,
                    size_t *outFrameCount);
    /**
     * resample at most *inFrameCount frames from in buffer and output at most
     * *outFrameCount to out buffer. *inFrameCount and *outFrameCount are updated respectively
     * with the number of frames remaining in input and written to output.
     */
    int (*resample_from_input)(struct resampler_itfe *resampler,
                    int16_t *in,
                    size_t *inFrameCount,
                    int16_t *out,
                    size_t *outFrameCount);
};

/**
 * create a resampler according to input parameters passed.
 * If resampler_buffer_provider is not NULL only resample_from_provider() can be called.
 * If resampler_buffer_provider is NULL only resample_from_input() can be called.
 */
int create_down_resampler(uint32_t outSampleRate,
          uint32_t channelCount,
          uint32_t frame_count,
          struct resampler_buffer_provider *provider,
          struct resampler_itfe **);

/**
 * release resampler resources.
 */
void release_down_resampler(struct resampler_itfe *);


#endif /** __AUDIO_RESAMPLER_H */


// File:   resampler.c
// Author: Loon <sepnic@gmail.com>

#include <stdio.h>
#include <unistd.h>
#include <sys/types.h>
#include <fcntl.h>
#include <errno.h>
#include "resampler.h"

struct down_resampler {
	struct resampler_itfe itfe;
	
	struct resampler_buffer_provider *provider; // buffer provider installed by client
	uint32_t out_sample_rate;                   // output sampling rate in Hz
	uint32_t channel_count;                     // number of channels (interleaved)
	uint32_t frame_count;                       // number of frames
	
	int16_t *mInLeft;
	int16_t *mInRight;
	int16_t *mTmpLeft;
	int16_t *mTmpRight;
	int16_t *mTmp2Left;
	int16_t *mTmp2Right;
	int16_t *mOutLeft;
	int16_t *mOutRight;
	int mInInBuf;
	int mInTmpBuf;
	int mInTmp2Buf;
	int mOutBufPos;
	int mInOutBuf;
};


/*------------------------------------------------------------------------------*/


/*------------------------------------------------------------------------------*/

/*  DownSampler Algorithm Implement */

/*
 * 2.30 fixed point FIR filter coefficients for conversion 44100 -> 22050.
 * (Works equivalently for 22010 -> 11025 or any other halving, of course.)
 *
 * Transition band from about 18 kHz, passband ripple < 0.1 dB,
 * stopband ripple at about -55 dB, linear phase.
 *
 * Design and display in MATLAB or Octave using:
 *
 * filter = fir1(19, 0.5); filter = round(filter * 2**30); freqz(filter * 2**-30);
 */
static const int32_t filter_22khz_coeff[] = {
    2089257, 2898328, -5820678, -10484531,
    19038724, 30542725, -50469415, -81505260,
    152544464, 478517512, 478517512, 152544464,
    -81505260, -50469415, 30542725, 19038724,
    -10484531, -5820678, 2898328, 2089257,
};
#define NUM_COEFF_22KHZ (sizeof(filter_22khz_coeff) / sizeof(filter_22khz_coeff[0]))
#define OVERLAP_22KHZ (NUM_COEFF_22KHZ - 2)

/*
 * Convolution of signals A and reverse(B). (In our case, the filter response
 * is symmetric, so the reversing doesn't matter.)
 * A is taken to be in 0.16 fixed-point, and B is taken to be in 2.30 fixed-point.
 * The answer will be in 16.16 fixed-point, unclipped.
 *
 * This function would probably be the prime candidate for SIMD conversion if
 * you want more speed.
 */
int32_t fir_convolve(const int16_t* a, const int32_t* b, int num_samples)
{
        int32_t sum = 1 << 13;
        for (int i = 0; i < num_samples; ++i) {
                sum += a[i] * (b[i] >> 16);
        }
        return sum >> 14;
}

/* Clip from 16.16 fixed-point to 0.16 fixed-point. */
int16_t clip(int32_t x)
{
    if (x < -32768) {
        return -32768;
    } else if (x > 32767) {
        return 32767;
    } else {
        return x;
    }
}

/*
 * Convert a chunk from 44 kHz to 22 kHz. Will update num_samples_in and num_samples_out
 * accordingly, since it may leave input samples in the buffer due to overlap.
 *
 * Input and output are taken to be in 0.16 fixed-point.
 */
void resample_2_1(int16_t* input, int16_t* output, int* num_samples_in, int* num_samples_out)
{
    if (*num_samples_in < (int)NUM_COEFF_22KHZ) {
        *num_samples_out = 0;
        return;
    }

    int odd_smp = *num_samples_in & 0x1;
    int num_samples = *num_samples_in - odd_smp - OVERLAP_22KHZ;

    for (int i = 0; i < num_samples; i += 2) {
            output[i / 2] = clip(fir_convolve(input + i, filter_22khz_coeff, NUM_COEFF_22KHZ));
    }

    memmove(input, input + num_samples, (OVERLAP_22KHZ + odd_smp) * sizeof(*input));
    *num_samples_out = num_samples / 2;
    *num_samples_in = OVERLAP_22KHZ + odd_smp;
}

/*
 * 2.30 fixed point FIR filter coefficients for conversion 22050 -> 16000,
 * or 11025 -> 8000.
 *
 * Transition band from about 14 kHz, passband ripple < 0.1 dB,
 * stopband ripple at about -50 dB, linear phase.
 *
 * Design and display in MATLAB or Octave using:
 *
 * filter = fir1(23, 16000 / 22050); filter = round(filter * 2**30); freqz(filter * 2**-30);
 */
static const int32_t filter_16khz_coeff[] = {
    2057290, -2973608, 1880478, 4362037,
    -14639744, 18523609, -1609189, -38502470,
    78073125, -68353935, -59103896, 617555440,
    617555440, -59103896, -68353935, 78073125,
    -38502470, -1609189, 18523609, -14639744,
    4362037, 1880478, -2973608, 2057290,
};
#define NUM_COEFF_16KHZ (sizeof(filter_16khz_coeff) / sizeof(filter_16khz_coeff[0]))
#define OVERLAP_16KHZ (NUM_COEFF_16KHZ - 1)

/*
 * Convert a chunk from 22 kHz to 16 kHz. Will update num_samples_in and
 * num_samples_out accordingly, since it may leave input samples in the buffer
 * due to overlap.
 *
 * This implementation is rather ad-hoc; it first low-pass filters the data
 * into a temporary buffer, and then converts chunks of 441 input samples at a
 * time into 320 output samples by simple linear interpolation. A better
 * implementation would use a polyphase filter bank to do these two operations
 * in one step.
 *
 * Input and output are taken to be in 0.16 fixed-point.
 */

#define RESAMPLE_16KHZ_SAMPLES_IN 441
#define RESAMPLE_16KHZ_SAMPLES_OUT 320

void resample_441_320(int16_t* input, int16_t* output, int* num_samples_in, int* num_samples_out)
{
    const int num_blocks = (*num_samples_in - OVERLAP_16KHZ) / RESAMPLE_16KHZ_SAMPLES_IN;
    if (num_blocks < 1) {
        *num_samples_out = 0;
        return;
    }

    for (int i = 0; i < num_blocks; ++i) {
        uint32_t tmp[RESAMPLE_16KHZ_SAMPLES_IN];
        for (int j = 0; j < RESAMPLE_16KHZ_SAMPLES_IN; ++j) {
            tmp[j] = fir_convolve(input + i * RESAMPLE_16KHZ_SAMPLES_IN + j,
                          filter_16khz_coeff,
                          NUM_COEFF_16KHZ);
        }

        const float step_float = (float)RESAMPLE_16KHZ_SAMPLES_IN / (float)RESAMPLE_16KHZ_SAMPLES_OUT;

        uint32_t in_sample_num = 0;   // 16.16 fixed point
        const uint32_t step = (uint32_t)(step_float * 65536.0f + 0.5f);  // 16.16 fixed point
        for (int j = 0; j < RESAMPLE_16KHZ_SAMPLES_OUT; ++j, in_sample_num += step) {
            const uint32_t whole = in_sample_num >> 16;
            const uint32_t frac = (in_sample_num & 0xffff);  // 0.16 fixed point
            const int32_t s1 = tmp[whole];
            const int32_t s2 = tmp[whole + 1];
            *output++ = clip(s1 + (((s2 - s1) * (int32_t)frac) >> 16));
        }
    }

    const int samples_consumed = num_blocks * RESAMPLE_16KHZ_SAMPLES_IN;
    memmove(input, input + samples_consumed, (*num_samples_in - samples_consumed) * sizeof(*input));
    *num_samples_in -= samples_consumed;
    *num_samples_out = RESAMPLE_16KHZ_SAMPLES_OUT * num_blocks;
}


/*------------------------------------------------------------------------------*/


/*------------------------------------------------------------------------------*/

/* DownSampler Callback Functions */

static void down_resampler_reset(struct resampler_itfe *resampler)
{
	struct down_resampler *rsmp = 
		(struct down_resampler *)((char *)resampler - offsetof(struct down_resampler, itfe));

	if (rsmp == NULL) {
		return;
	}

	rsmp->mInInBuf = 0;
	rsmp->mInTmpBuf = 0;
	rsmp->mInTmp2Buf = 0;
	rsmp->mOutBufPos = 0;
	rsmp->mInOutBuf = 0;
}

// outputs a number of frames less or equal to *outFrameCount and updates *outFrameCount
// with the actual number of frames produced.
static int down_resampler_resample_from_provider(struct resampler_itfe *resampler,
                       int16_t *out,
                       size_t *outFrameCount)
{
	struct down_resampler *rsmp = 
		(struct down_resampler *)((char *)resampler - offsetof(struct down_resampler, itfe));
	
	if (rsmp == NULL || out == NULL || outFrameCount == NULL) {
		return -EINVAL;
	}
	if (rsmp->provider == NULL) {
		*outFrameCount = 0;
		return -ENOSYS;
	}
	
	int16_t *outLeft = rsmp->mTmp2Left;
	int16_t *outRight = rsmp->mTmp2Left;
	if (rsmp->out_sample_rate == 22050) {
		outLeft = rsmp->mTmpLeft;
		outRight = rsmp->mTmpRight;
	} else if (rsmp->out_sample_rate == 8000){
		outLeft = rsmp->mOutLeft;
		outRight = rsmp->mOutRight;
	}

	int outFrames = 0;
	int remaingFrames = *outFrameCount;

	if (rsmp->mInOutBuf) {
		int frames = (remaingFrames > rsmp->mInOutBuf) ? rsmp->mInOutBuf : remaingFrames;

		for (int i = 0; i < frames; ++i) {
			out[i] = outLeft[rsmp->mOutBufPos + i];
		}
		if (rsmp->channel_count == 2) {
			for (int i = 0; i < frames; ++i) {
				out[i * 2] = outLeft[rsmp->mOutBufPos + i];
				out[i * 2 + 1] = outRight[rsmp->mOutBufPos + i];
			}
		}
		remaingFrames -= frames;
		rsmp->mInOutBuf -= frames;
		rsmp->mOutBufPos += frames;
		outFrames += frames;
	}

	while (remaingFrames) {
		struct resampler_buffer buf;
		buf.frame_count =  rsmp->frame_count - rsmp->mInInBuf;
		int ret = rsmp->provider->get_next_buffer(rsmp->provider, &buf);
		if (buf.raw == NULL) {
			*outFrameCount = outFrames;
			return ret;
		}

		for (size_t i = 0; i < buf.frame_count; ++i) {
			rsmp->mInLeft[i + rsmp->mInInBuf] = buf.i16[i];
		}
		if (rsmp->channel_count == 2) {
			for (size_t i = 0; i < buf.frame_count; ++i) {
				rsmp->mInLeft[i + rsmp->mInInBuf] = buf.i16[i * 2];
				rsmp->mInRight[i + rsmp->mInInBuf] = buf.i16[i * 2 + 1];
			}
		}
		rsmp->mInInBuf += buf.frame_count;
		rsmp->provider->release_buffer(rsmp->provider, &buf);

		/* 44010 -> 22050 */
		{
			int samples_in_left = rsmp->mInInBuf;
			int samples_out_left;
			resample_2_1(rsmp->mInLeft, rsmp->mTmpLeft + rsmp->mInTmpBuf, &samples_in_left, &samples_out_left);

			if (rsmp->channel_count == 2) {
				int samples_in_right = rsmp->mInInBuf;
				int samples_out_right;
				resample_2_1(rsmp->mInRight, rsmp->mTmpRight + rsmp->mInTmpBuf, &samples_in_right, &samples_out_right);
			}

			rsmp->mInInBuf = samples_in_left;
			rsmp->mInTmpBuf += samples_out_left;
			rsmp->mInOutBuf = samples_out_left;
		}

		if (rsmp->out_sample_rate == 11025 || rsmp->out_sample_rate == 8000) {
			/* 22050 - > 11025 */
			int samples_in_left = rsmp->mInTmpBuf;
			int samples_out_left;
			resample_2_1(rsmp->mTmpLeft, rsmp->mTmp2Left + rsmp->mInTmp2Buf, &samples_in_left, &samples_out_left);

			if (rsmp->channel_count == 2) {
				int samples_in_right = rsmp->mInTmpBuf;
				int samples_out_right;
				resample_2_1(rsmp->mTmpRight, rsmp->mTmp2Right + rsmp->mInTmp2Buf, &samples_in_right, &samples_out_right);
			}


			rsmp->mInTmpBuf = samples_in_left;
			rsmp->mInTmp2Buf += samples_out_left;
			rsmp->mInOutBuf = samples_out_left;

			if (rsmp->out_sample_rate == 8000) {
				/* 11025 -> 8000*/
				int samples_in_left = rsmp->mInTmp2Buf;
				int samples_out_left;
				resample_441_320(rsmp->mTmp2Left, rsmp->mOutLeft, &samples_in_left, &samples_out_left);

				if (rsmp->channel_count == 2) {
					int samples_in_right = rsmp->mInTmp2Buf;
					int samples_out_right;
					resample_441_320(rsmp->mTmp2Right, rsmp->mOutRight, &samples_in_right, &samples_out_right);
				}

				rsmp->mInTmp2Buf = samples_in_left;
				rsmp->mInOutBuf = samples_out_left;
			} else {
				rsmp->mInTmp2Buf = 0;
			}

		} else if (rsmp->out_sample_rate == 16000) { 
			/* 22050 -> 16000*/
			int samples_in_left = rsmp->mInTmpBuf;
			int samples_out_left;
			resample_441_320(rsmp->mTmpLeft, rsmp->mTmp2Left, &samples_in_left, &samples_out_left);

			if (rsmp->channel_count == 2) {
				int samples_in_right = rsmp->mInTmpBuf;
				int samples_out_right;
				resample_441_320(rsmp->mTmpRight, rsmp->mTmp2Right, &samples_in_right, &samples_out_right);
			}

			rsmp->mInTmpBuf = samples_in_left;
			rsmp->mInOutBuf = samples_out_left;
		} else {
			rsmp->mInTmpBuf = 0;
		}

		int frames = (remaingFrames > rsmp->mInOutBuf) ? rsmp->mInOutBuf : remaingFrames;

		for (int i = 0; i < frames; ++i) {
			out[outFrames + i] = outLeft[i];
		}
		if (rsmp->channel_count == 2) {
			for (int i = 0; i < frames; ++i) {
				out[(outFrames + i) * 2] = outLeft[i];
				out[(outFrames + i) * 2 + 1] = outRight[i];
			}
		}
		remaingFrames -= frames;
		outFrames += frames;
		rsmp->mOutBufPos = frames;
		rsmp->mInOutBuf -= frames;
	}

	return 0;
}

static int down_resampler_resample_from_input(struct resampler_itfe *resampler,
                                  int16_t *in,
                                  size_t *inFrameCount,
                                  int16_t *out,
                                  size_t *outFrameCount)
{
	struct down_resampler *rsmp = 
		(struct down_resampler *)((char *)resampler - offsetof(struct down_resampler, itfe));
	
	if (rsmp == NULL || in == NULL || inFrameCount == NULL ||
		out == NULL || outFrameCount == NULL) {
		return -EINVAL;
	}
	if (rsmp->provider != NULL) {
		*outFrameCount = 0;
		return -ENOSYS;
	}
	
	// Not implement yet!!!
	
	return 0;
}


/*------------------------------------------------------------------------------*/


/*------------------------------------------------------------------------------*/

/* DownSampler Export Interfaces */

int create_down_resampler(uint32_t outSampleRate,
                    uint32_t channelCount,
                    uint32_t frame_count,
                    struct resampler_buffer_provider* provider,
                    struct resampler_itfe **resampler)
{
	int error;
	struct down_resampler *rsmp;
	
	if (resampler == NULL) {
		return -EINVAL;
	}

	if (outSampleRate != 8000 && outSampleRate != 11025 && outSampleRate != 16000 &&
			outSampleRate != 22050) {
		return -EINVAL;
	}
	
	*resampler = NULL;
	
	rsmp = (struct down_resampler *)malloc(sizeof(struct down_resampler));
	if (rsmp == NULL) return -ENOMEM;

	rsmp->itfe.reset = down_resampler_reset;
	rsmp->itfe.resample_from_provider = down_resampler_resample_from_provider;
	rsmp->itfe.resample_from_input = down_resampler_resample_from_input;

	rsmp->provider = provider;
	rsmp->out_sample_rate = outSampleRate;
	rsmp->channel_count = channelCount;
	rsmp->frame_count = frame_count;

	rsmp->mInLeft = malloc(sizeof(int16_t) * frame_count);
	rsmp->mInRight = malloc(sizeof(int16_t) * frame_count);
	rsmp->mTmpLeft = malloc(sizeof(int16_t) * frame_count);
	rsmp->mTmpRight = malloc(sizeof(int16_t) * frame_count);
	rsmp->mTmp2Left = malloc(sizeof(int16_t) * frame_count);
	rsmp->mTmp2Right = malloc(sizeof(int16_t) * frame_count);
	rsmp->mOutLeft = malloc(sizeof(int16_t) * frame_count);
	rsmp->mOutRight = malloc(sizeof(int16_t) * frame_count);
	if (rsmp->mInLeft == NULL || rsmp->mInRight == NULL ||
			rsmp->mTmpLeft == NULL || rsmp->mTmpRight == NULL ||
			rsmp->mTmp2Left == NULL || rsmp->mTmp2Right == NULL ||
			rsmp->mOutLeft == NULL || rsmp->mOutRight == NULL)
		return -ENOMEM;

	down_resampler_reset(&rsmp->itfe);
	
	*resampler = &rsmp->itfe;
	
	return 0;
}

void release_down_resampler(struct resampler_itfe *resampler)
{
	struct down_resampler *rsmp = 
		(struct down_resampler *)((char *)resampler - offsetof(struct down_resampler, itfe));
	
	if (rsmp == NULL) {
		return;
	}
	
	if (rsmp->mInLeft) free(rsmp->mInLeft);
    if (rsmp->mInRight) free(rsmp->mInRight);
    if (rsmp->mTmpLeft) free(rsmp->mTmpLeft);
    if (rsmp->mTmpRight) free(rsmp->mTmpRight);
    if (rsmp->mTmp2Left) free(rsmp->mTmp2Left);
    if (rsmp->mTmp2Right) free(rsmp->mTmp2Right);
    if (rsmp->mOutLeft) free(rsmp->mOutLeft);
    if (rsmp->mOutRight) free(rsmp->mOutRight);
	
	free(rsmp);
}

 类似资料:

相关阅读

相关文章

相关问答