From 60f767713227b5405b855e6e6e2a0475ecd96bcc Mon Sep 17 00:00:00 2001 From: jacqueline Date: Fri, 4 Aug 2023 20:07:44 +1000 Subject: Do our own resampling --- src/audio/resample.cpp | 260 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 260 insertions(+) create mode 100644 src/audio/resample.cpp (limited to 'src/audio/resample.cpp') diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp new file mode 100644 index 00000000..93ea1034 --- /dev/null +++ b/src/audio/resample.cpp @@ -0,0 +1,260 @@ +#include "resample.hpp" + +#include +#include +#include +#include +#include + +#include "esp_log.h" + +#include "sample.hpp" +#include "stream_info.hpp" + +namespace audio { + +static constexpr size_t kFilterSize = 1536; + +constexpr auto calc_deltas(const std::array& filter) + -> std::array { + std::array deltas; + for (size_t n = 0; n < kFilterSize - 1; n++) + deltas[n] = filter[n + 1] - filter[n]; + return deltas; +} + +static const std::array kFilter{ +#include "fir.h" +}; + +static const std::array kFilterDeltas = + calc_deltas(kFilter); + +class Channel { + public: + Channel(uint32_t src_rate, + uint32_t dest_rate, + size_t chunk_size, + size_t skip); + ~Channel(); + + auto output_chunk_size() -> size_t { return output_chunk_size_; } + + auto FlushSamples(cpp::span out) -> size_t; + auto AddSample(sample::Sample, cpp::span out) -> std::size_t; + auto ApplyFilter() -> sample::Sample; + + private: + size_t output_chunk_size_; + size_t skip_; + + uint32_t factor_; /* factor */ + + uint32_t time_; /* time */ + + uint32_t time_per_filter_iteration_; /* output step */ + uint32_t filter_step_; /* filter step */ + uint32_t filter_end_; /* filter end */ + + int32_t unity_scale_; /* unity scale */ + + int32_t samples_per_filter_wing_; /* extra samples */ + int32_t latest_sample_; /* buffer index */ + cpp::span sample_buffer_; /* the buffer */ +}; + +enum { + Nl = 8, /* 2^Nl samples per zero crossing in fir */ + Nη = 8, /* phase bits for filter interpolation */ + kPhaseBits = Nl + Nη, /* phase bits (fract of fixed point) */ + One = 1 << kPhaseBits, +}; + +Channel::Channel(uint32_t irate, uint32_t orate, size_t count, size_t skip) + : skip_(skip) { + factor_ = ((uint64_t)orate << kPhaseBits) / irate; + if (factor_ != One) { + time_per_filter_iteration_ = ((uint64_t)irate << kPhaseBits) / orate; + filter_step_ = 1 << (Nl + Nη); + filter_end_ = kFilterSize << Nη; + samples_per_filter_wing_ = 1 + (filter_end_ / filter_step_); + unity_scale_ = 13128; /* unity scale factor for fir */ + if (factor_ < One) { + unity_scale_ *= factor_; + unity_scale_ >>= kPhaseBits; + filter_step_ *= factor_; + filter_step_ >>= kPhaseBits; + samples_per_filter_wing_ *= time_per_filter_iteration_; + samples_per_filter_wing_ >>= kPhaseBits; + } + latest_sample_ = samples_per_filter_wing_; + time_ = latest_sample_ << kPhaseBits; + + size_t buf_size = samples_per_filter_wing_ * 2 + count; + int32_t* buf = new int32_t[buf_size]; + sample_buffer_ = {buf, buf_size}; + count += buf_size; /* account for buffer accumulation */ + } + output_chunk_size_ = ((uint64_t)count * factor_) >> kPhaseBits; +} + +Channel::~Channel() { + delete sample_buffer_.data(); +} + +auto Channel::ApplyFilter() -> sample::Sample { + uint32_t iteration, p, i; + int32_t *sample, a; + + int64_t value = 0; + + // I did my best, but I'll be honest with you I've no idea about any of this + // maths stuff. + + // Left wing of the filter. + sample = &sample_buffer_[time_ >> kPhaseBits]; + p = time_ & ((1 << kPhaseBits) - 1); + iteration = factor_ < One ? (factor_ * p) >> kPhaseBits : p; + while (iteration < filter_end_) { + i = iteration >> Nη; + a = iteration & ((1 << Nη) - 1); + iteration += filter_step_; + a *= kFilterDeltas[i]; + a >>= Nη; + a += kFilter[i]; + value += static_cast(*--sample) * a; + } + + // Right wing of the filter. + sample = &sample_buffer_[time_ >> kPhaseBits]; + p = (One - p) & ((1 << kPhaseBits) - 1); + iteration = factor_ < One ? (factor_ * p) >> kPhaseBits : p; + if (p == 0) /* skip h[0] as it was already been summed above if p == 0 */ + iteration += filter_step_; + while (iteration < filter_end_) { + i = iteration >> Nη; + a = iteration & ((1 << Nη) - 1); + iteration += filter_step_; + a *= kFilterDeltas[i]; + a >>= Nη; + a += kFilter[i]; + value += static_cast(*sample++) * a; + } + + /* scale */ + value >>= 2; + value *= unity_scale_; + value >>= 27; + + return sample::Clip(value); +} + +auto Channel::FlushSamples(cpp::span out) -> size_t { + size_t zeroes_needed = (2 * samples_per_filter_wing_) - latest_sample_; + size_t produced = 0; + while (zeroes_needed > 0) { + produced += AddSample(0, out.subspan(produced)); + zeroes_needed--; + } + return produced; +} + +auto Channel::AddSample(sample::Sample in, cpp::span out) + -> size_t { + // Add the latest sample to our working buffer. + sample_buffer_[latest_sample_++] = in; + + // If we don't have enough samples to run the filter, then bail out and wait + // for more. + if (latest_sample_ < 2 * samples_per_filter_wing_) { + return 0; + } + + // Apply the filter to the buffered samples. First, we work out how long (in + // samples) we can run the filter for before running out. This isn't as + // trivial as it might look; e.g. depending on the resampling factor we might + // be doubling the number of samples, or halving them. + uint32_t max_time = (latest_sample_ - samples_per_filter_wing_) << kPhaseBits; + size_t samples_output = 0; + while (time_ < max_time) { + out[skip_ * samples_output++] = ApplyFilter(); + time_ += time_per_filter_iteration_; + } + + // If we are approaching the end of our buffer, we need to shift all the data + // in it down to the front to make room for more samples. + int32_t current_sample = time_ >> kPhaseBits; + if (current_sample >= (sample_buffer_.size() - samples_per_filter_wing_)) { + // NB: bit shifting back and forth means we're only modifying `time` by + // whole samples. + time_ -= current_sample << kPhaseBits; + time_ += samples_per_filter_wing_ << kPhaseBits; + + int32_t new_current_sample = time_ >> kPhaseBits; + new_current_sample -= samples_per_filter_wing_; + current_sample -= samples_per_filter_wing_; + + int32_t samples_to_move = latest_sample_ - current_sample; + if (samples_to_move > 0) { + auto samples = sample_buffer_.subspan(current_sample, samples_to_move); + std::copy_backward(samples.begin(), samples.end(), + sample_buffer_.first(new_current_sample).end()); + latest_sample_ = new_current_sample + samples_to_move; + } else { + latest_sample_ = new_current_sample; + } + } + + return samples_output; +} + +static const size_t kChunkSizeSamples = 256; + +Resampler::Resampler(uint32_t source_sample_rate, + uint32_t target_sample_rate, + uint8_t num_channels) + : source_sample_rate_(source_sample_rate), + target_sample_rate_(target_sample_rate), + factor_(((uint64_t)target_sample_rate << kPhaseBits) / + source_sample_rate), + num_channels_(num_channels), + channels_() { + for (int i = 0; i < num_channels; i++) { + channels_.emplace_back(source_sample_rate, target_sample_rate, + kChunkSizeSamples, num_channels); + } +} + +Resampler::~Resampler() {} + +auto Resampler::Process(cpp::span input, + cpp::span output, + bool end_of_data) -> std::pair { + size_t samples_used = 0; + std::vector samples_produced = {num_channels_, 0}; + size_t total_samples_produced = 0; + + size_t slop = (factor_ >> kPhaseBits) + 1; + + uint_fast8_t cur_channel = 0; + + while (input.size() > samples_used && + output.size() > total_samples_produced + slop) { + // Work out where the next set of samples should be placed. + size_t next_output_index = + (samples_produced[cur_channel] * num_channels_) + cur_channel; + + // Generate the next samples + size_t new_samples = channels_[cur_channel].AddSample( + input[samples_used++], output.subspan(next_output_index)); + + samples_produced[cur_channel] += new_samples; + total_samples_produced += new_samples; + + cur_channel = (cur_channel + 1) % num_channels_; + } + + return {samples_used, total_samples_produced}; +} + +} // namespace audio -- cgit v1.2.3 From 4118d880c3f20dbd9304a3f50d6d111f194592c8 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Mon, 7 Aug 2023 09:47:44 +1000 Subject: Fix dangle build issues, do some tweaks to investigate performance --- src/audio/resample.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/audio/resample.cpp') diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp index 93ea1034..6f7e670e 100644 --- a/src/audio/resample.cpp +++ b/src/audio/resample.cpp @@ -231,7 +231,8 @@ auto Resampler::Process(cpp::span input, cpp::span output, bool end_of_data) -> std::pair { size_t samples_used = 0; - std::vector samples_produced = {num_channels_, 0}; + std::vector samples_produced = {}; + samples_produced.resize(num_channels_, 0); size_t total_samples_produced = 0; size_t slop = (factor_ >> kPhaseBits) + 1; -- cgit v1.2.3 From a66c3428063017f2233b6b15d5ce6c920d5c9095 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Mon, 7 Aug 2023 14:26:04 +1000 Subject: Resampling *basically* working? Just cleanup and buffering issues --- src/audio/resample.cpp | 343 +++++++++++++++++++------------------------------ 1 file changed, 134 insertions(+), 209 deletions(-) (limited to 'src/audio/resample.cpp') diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp index 6f7e670e..aa4c8f2a 100644 --- a/src/audio/resample.cpp +++ b/src/audio/resample.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "esp_log.h" @@ -13,249 +14,173 @@ namespace audio { -static constexpr size_t kFilterSize = 1536; +static constexpr char kTag[] = "resample"; -constexpr auto calc_deltas(const std::array& filter) - -> std::array { - std::array deltas; - for (size_t n = 0; n < kFilterSize - 1; n++) - deltas[n] = filter[n + 1] - filter[n]; - return deltas; -} +static constexpr double kLowPassRatio = 0.5; +static constexpr size_t kNumFilters = 8; +static constexpr size_t kTapsPerFilter = 8; -static const std::array kFilter{ -#include "fir.h" -}; - -static const std::array kFilterDeltas = - calc_deltas(kFilter); - -class Channel { - public: - Channel(uint32_t src_rate, - uint32_t dest_rate, - size_t chunk_size, - size_t skip); - ~Channel(); - - auto output_chunk_size() -> size_t { return output_chunk_size_; } - - auto FlushSamples(cpp::span out) -> size_t; - auto AddSample(sample::Sample, cpp::span out) -> std::size_t; - auto ApplyFilter() -> sample::Sample; - - private: - size_t output_chunk_size_; - size_t skip_; - - uint32_t factor_; /* factor */ - - uint32_t time_; /* time */ - - uint32_t time_per_filter_iteration_; /* output step */ - uint32_t filter_step_; /* filter step */ - uint32_t filter_end_; /* filter end */ - - int32_t unity_scale_; /* unity scale */ - - int32_t samples_per_filter_wing_; /* extra samples */ - int32_t latest_sample_; /* buffer index */ - cpp::span sample_buffer_; /* the buffer */ -}; - -enum { - Nl = 8, /* 2^Nl samples per zero crossing in fir */ - Nη = 8, /* phase bits for filter interpolation */ - kPhaseBits = Nl + Nη, /* phase bits (fract of fixed point) */ - One = 1 << kPhaseBits, -}; - -Channel::Channel(uint32_t irate, uint32_t orate, size_t count, size_t skip) - : skip_(skip) { - factor_ = ((uint64_t)orate << kPhaseBits) / irate; - if (factor_ != One) { - time_per_filter_iteration_ = ((uint64_t)irate << kPhaseBits) / orate; - filter_step_ = 1 << (Nl + Nη); - filter_end_ = kFilterSize << Nη; - samples_per_filter_wing_ = 1 + (filter_end_ / filter_step_); - unity_scale_ = 13128; /* unity scale factor for fir */ - if (factor_ < One) { - unity_scale_ *= factor_; - unity_scale_ >>= kPhaseBits; - filter_step_ *= factor_; - filter_step_ >>= kPhaseBits; - samples_per_filter_wing_ *= time_per_filter_iteration_; - samples_per_filter_wing_ >>= kPhaseBits; - } - latest_sample_ = samples_per_filter_wing_; - time_ = latest_sample_ << kPhaseBits; +typedef std::array Filter; +static std::array sFilters{}; +static bool sFiltersInitialised = false; - size_t buf_size = samples_per_filter_wing_ * 2 + count; - int32_t* buf = new int32_t[buf_size]; - sample_buffer_ = {buf, buf_size}; - count += buf_size; /* account for buffer accumulation */ - } - output_chunk_size_ = ((uint64_t)count * factor_) >> kPhaseBits; -} +auto InitFilter(int index) -> void; -Channel::~Channel() { - delete sample_buffer_.data(); -} +Resampler::Resampler(uint32_t source_sample_rate, + uint32_t target_sample_rate, + uint8_t num_channels) + : source_sample_rate_(source_sample_rate), + target_sample_rate_(target_sample_rate), + factor_(static_cast(target_sample_rate) / + static_cast(source_sample_rate)), + num_channels_(num_channels) { + channel_buffers_.resize(num_channels); + channel_buffer_size_ = kTapsPerFilter * 16; -auto Channel::ApplyFilter() -> sample::Sample { - uint32_t iteration, p, i; - int32_t *sample, a; - - int64_t value = 0; - - // I did my best, but I'll be honest with you I've no idea about any of this - // maths stuff. - - // Left wing of the filter. - sample = &sample_buffer_[time_ >> kPhaseBits]; - p = time_ & ((1 << kPhaseBits) - 1); - iteration = factor_ < One ? (factor_ * p) >> kPhaseBits : p; - while (iteration < filter_end_) { - i = iteration >> Nη; - a = iteration & ((1 << Nη) - 1); - iteration += filter_step_; - a *= kFilterDeltas[i]; - a >>= Nη; - a += kFilter[i]; - value += static_cast(*--sample) * a; + for (int i = 0; i < num_channels; i++) { + channel_buffers_[i] = + static_cast(calloc(sizeof(float), channel_buffer_size_)); } - // Right wing of the filter. - sample = &sample_buffer_[time_ >> kPhaseBits]; - p = (One - p) & ((1 << kPhaseBits) - 1); - iteration = factor_ < One ? (factor_ * p) >> kPhaseBits : p; - if (p == 0) /* skip h[0] as it was already been summed above if p == 0 */ - iteration += filter_step_; - while (iteration < filter_end_) { - i = iteration >> Nη; - a = iteration & ((1 << Nη) - 1); - iteration += filter_step_; - a *= kFilterDeltas[i]; - a >>= Nη; - a += kFilter[i]; - value += static_cast(*sample++) * a; + output_offset_ = kTapsPerFilter / 2.0f; + input_index_ = kTapsPerFilter; + + if (!sFiltersInitialised) { + sFiltersInitialised = true; + for (int i = 0; i < kNumFilters + 1; i++) { + InitFilter(i); + } } +} - /* scale */ - value >>= 2; - value *= unity_scale_; - value >>= 27; +Resampler::~Resampler() {} - return sample::Clip(value); -} +auto Resampler::Process(cpp::span input, + cpp::span output, + bool end_of_data) -> std::pair { + size_t samples_used = 0; + size_t samples_produced = 0; + + size_t input_frames = input.size() / num_channels_; + size_t output_frames = output.size() / num_channels_; + + int half_taps = kTapsPerFilter / 2, i; + while (output_frames > 0) { + if (output_offset_ >= input_index_ - half_taps) { + if (input_frames > 0) { + if (input_index_ == channel_buffer_size_) { + for (i = 0; i < num_channels_; ++i) { + memmove(channel_buffers_[i], + channel_buffers_[i] + channel_buffer_size_ - kTapsPerFilter, + kTapsPerFilter * sizeof(float)); + } + + output_offset_ -= channel_buffer_size_ - kTapsPerFilter; + input_index_ -= channel_buffer_size_ - kTapsPerFilter; + } + + for (i = 0; i < num_channels_; ++i) { + channel_buffers_[i][input_index_] = + sample::ToFloat(input[samples_used++]); + } + + input_index_++; + input_frames--; + } else + break; + } else { + for (i = 0; i < num_channels_; i++) { + output[samples_produced++] = sample::FromFloat(Subsample(i)); + } -auto Channel::FlushSamples(cpp::span out) -> size_t { - size_t zeroes_needed = (2 * samples_per_filter_wing_) - latest_sample_; - size_t produced = 0; - while (zeroes_needed > 0) { - produced += AddSample(0, out.subspan(produced)); - zeroes_needed--; + output_offset_ += (1.0f / factor_); + } } - return produced; + + return {samples_used, samples_produced}; } -auto Channel::AddSample(sample::Sample in, cpp::span out) - -> size_t { - // Add the latest sample to our working buffer. - sample_buffer_[latest_sample_++] = in; +auto InitFilter(int index) -> void { + const double a0 = 0.35875; + const double a1 = 0.48829; + const double a2 = 0.14128; + const double a3 = 0.01168; - // If we don't have enough samples to run the filter, then bail out and wait - // for more. - if (latest_sample_ < 2 * samples_per_filter_wing_) { - return 0; - } + double fraction = + static_cast(index) / static_cast(kNumFilters); + double filter_sum = 0.0; - // Apply the filter to the buffered samples. First, we work out how long (in - // samples) we can run the filter for before running out. This isn't as - // trivial as it might look; e.g. depending on the resampling factor we might - // be doubling the number of samples, or halving them. - uint32_t max_time = (latest_sample_ - samples_per_filter_wing_) << kPhaseBits; - size_t samples_output = 0; - while (time_ < max_time) { - out[skip_ * samples_output++] = ApplyFilter(); - time_ += time_per_filter_iteration_; - } + // "dist" is the absolute distance from the sinc maximum to the filter tap to + // be calculated, in radians "ratio" is that distance divided by half the tap + // count such that it reaches π at the window extremes + + // Note that with this scaling, the odd terms of the Blackman-Harris + // calculation appear to be negated with respect to the reference formula + // version. + + Filter& filter = sFilters[index]; + std::array working_buffer{}; + for (int i = 0; i < kTapsPerFilter; ++i) { + double dist = fabs((kTapsPerFilter / 2.0 - 1.0) + fraction - i) * M_PI; + double ratio = dist / (kTapsPerFilter / 2.0); + double value; + + if (dist != 0.0) { + value = sin(dist * kLowPassRatio) / (dist * kLowPassRatio); - // If we are approaching the end of our buffer, we need to shift all the data - // in it down to the front to make room for more samples. - int32_t current_sample = time_ >> kPhaseBits; - if (current_sample >= (sample_buffer_.size() - samples_per_filter_wing_)) { - // NB: bit shifting back and forth means we're only modifying `time` by - // whole samples. - time_ -= current_sample << kPhaseBits; - time_ += samples_per_filter_wing_ << kPhaseBits; - - int32_t new_current_sample = time_ >> kPhaseBits; - new_current_sample -= samples_per_filter_wing_; - current_sample -= samples_per_filter_wing_; - - int32_t samples_to_move = latest_sample_ - current_sample; - if (samples_to_move > 0) { - auto samples = sample_buffer_.subspan(current_sample, samples_to_move); - std::copy_backward(samples.begin(), samples.end(), - sample_buffer_.first(new_current_sample).end()); - latest_sample_ = new_current_sample + samples_to_move; + // Blackman-Harris window + value *= a0 + a1 * cos(ratio) + a2 * cos(2 * ratio) + a3 * cos(3 * ratio); } else { - latest_sample_ = new_current_sample; + value = 1.0; } + + working_buffer[i] = value; + filter_sum += value; } - return samples_output; -} + // filter should have unity DC gain -static const size_t kChunkSizeSamples = 256; + double scaler = 1.0 / filter_sum; + double error = 0.0; -Resampler::Resampler(uint32_t source_sample_rate, - uint32_t target_sample_rate, - uint8_t num_channels) - : source_sample_rate_(source_sample_rate), - target_sample_rate_(target_sample_rate), - factor_(((uint64_t)target_sample_rate << kPhaseBits) / - source_sample_rate), - num_channels_(num_channels), - channels_() { - for (int i = 0; i < num_channels; i++) { - channels_.emplace_back(source_sample_rate, target_sample_rate, - kChunkSizeSamples, num_channels); + for (int i = kTapsPerFilter / 2; i < kTapsPerFilter; + i = kTapsPerFilter - i - (i >= kTapsPerFilter / 2)) { + working_buffer[i] *= scaler; + filter[i] = working_buffer[i] - error; + error += static_cast(filter[i]) - working_buffer[i]; } } -Resampler::~Resampler() {} +auto Resampler::Subsample(int channel) -> float { + float sum1, sum2; -auto Resampler::Process(cpp::span input, - cpp::span output, - bool end_of_data) -> std::pair { - size_t samples_used = 0; - std::vector samples_produced = {}; - samples_produced.resize(num_channels_, 0); - size_t total_samples_produced = 0; + cpp::span source{channel_buffers_[channel], channel_buffer_size_}; - size_t slop = (factor_ >> kPhaseBits) + 1; + int offset_integral = std::floor(output_offset_); + source = source.subspan(offset_integral); + float offset_fractional = output_offset_ - offset_integral; - uint_fast8_t cur_channel = 0; + int filter_index = offset_fractional * kNumFilters; + offset_fractional *= kNumFilters; - while (input.size() > samples_used && - output.size() > total_samples_produced + slop) { - // Work out where the next set of samples should be placed. - size_t next_output_index = - (samples_produced[cur_channel] * num_channels_) + cur_channel; + sum1 = ApplyFilter(sFilters[filter_index], + {source.data() - kTapsPerFilter / 2 + 1, kTapsPerFilter}); - // Generate the next samples - size_t new_samples = channels_[cur_channel].AddSample( - input[samples_used++], output.subspan(next_output_index)); + offset_fractional -= filter_index; - samples_produced[cur_channel] += new_samples; - total_samples_produced += new_samples; + sum2 = ApplyFilter(sFilters[filter_index + 1], + {source.data() - kTapsPerFilter / 2 + 1, kTapsPerFilter}); - cur_channel = (cur_channel + 1) % num_channels_; - } + return (sum2 * offset_fractional) + (sum1 * (1.0f - offset_fractional)); +} - return {samples_used, total_samples_produced}; +auto Resampler::ApplyFilter(cpp::span filter, cpp::span input) + -> float { + float sum = 0.0; + for (int i = 0; i < kTapsPerFilter; i++) { + sum += filter[i] * input[i]; + } + return sum; } } // namespace audio -- cgit v1.2.3 From 6c99f9f2fee0928987fe944c8ed29878064df87a Mon Sep 17 00:00:00 2001 From: jacqueline Date: Tue, 8 Aug 2023 11:36:10 +1000 Subject: Fix resampler issue, do a little performance tuning --- src/audio/resample.cpp | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) (limited to 'src/audio/resample.cpp') diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp index aa4c8f2a..7accd0a1 100644 --- a/src/audio/resample.cpp +++ b/src/audio/resample.cpp @@ -17,8 +17,8 @@ namespace audio { static constexpr char kTag[] = "resample"; static constexpr double kLowPassRatio = 0.5; -static constexpr size_t kNumFilters = 8; -static constexpr size_t kTapsPerFilter = 8; +static constexpr size_t kNumFilters = 64; +static constexpr size_t kTapsPerFilter = 16; typedef std::array Filter; static std::array sFilters{}; @@ -64,12 +64,15 @@ auto Resampler::Process(cpp::span input, size_t input_frames = input.size() / num_channels_; size_t output_frames = output.size() / num_channels_; - int half_taps = kTapsPerFilter / 2, i; + int half_taps = kTapsPerFilter / 2; while (output_frames > 0) { if (output_offset_ >= input_index_ - half_taps) { if (input_frames > 0) { + // Check whether the channel buffers will overflow with the addition of + // this sample. If so, we need to move the remaining contents back to + // the beginning of the buffer. if (input_index_ == channel_buffer_size_) { - for (i = 0; i < num_channels_; ++i) { + for (int i = 0; i < num_channels_; ++i) { memmove(channel_buffers_[i], channel_buffers_[i] + channel_buffer_size_ - kTapsPerFilter, kTapsPerFilter * sizeof(float)); @@ -79,21 +82,23 @@ auto Resampler::Process(cpp::span input, input_index_ -= channel_buffer_size_ - kTapsPerFilter; } - for (i = 0; i < num_channels_; ++i) { + for (int i = 0; i < num_channels_; ++i) { channel_buffers_[i][input_index_] = sample::ToFloat(input[samples_used++]); } input_index_++; input_frames--; - } else + } else { break; + } } else { - for (i = 0; i < num_channels_; i++) { + for (int i = 0; i < num_channels_; i++) { output[samples_produced++] = sample::FromFloat(Subsample(i)); } output_offset_ += (1.0f / factor_); + output_frames--; } } @@ -160,8 +165,20 @@ auto Resampler::Subsample(int channel) -> float { source = source.subspan(offset_integral); float offset_fractional = output_offset_ - offset_integral; - int filter_index = offset_fractional * kNumFilters; + /* +// no interpolate +size_t filter_index = std::floor(offset_fractional * kNumFilters + 0.5f); +//ESP_LOGI(kTag, "selected filter %u of %u", filter_index, kNumFilters); +int start_offset = kTapsPerFilter / 2 + 1; +//ESP_LOGI(kTag, "using offset of %i, length %u", start_offset, kTapsPerFilter); + +return ApplyFilter( + sFilters[filter_index], + {source.data() - start_offset, kTapsPerFilter}); + */ + offset_fractional *= kNumFilters; + int filter_index = std::floor(offset_fractional); sum1 = ApplyFilter(sFilters[filter_index], {source.data() - kTapsPerFilter / 2 + 1, kTapsPerFilter}); -- cgit v1.2.3 From 93ccf11fc506b95221ce0c5eddaed9e0e6c8b3b5 Mon Sep 17 00:00:00 2001 From: jacqueline Date: Tue, 8 Aug 2023 13:47:08 +1000 Subject: Investigate and improve core pinning for resampler --- src/audio/resample.cpp | 116 +++++++++++++++++++++++++------------------------ 1 file changed, 59 insertions(+), 57 deletions(-) (limited to 'src/audio/resample.cpp') diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp index 7accd0a1..430a6a26 100644 --- a/src/audio/resample.cpp +++ b/src/audio/resample.cpp @@ -1,4 +1,17 @@ +/* + * Copyright 2023 jacqueline + * + * SPDX-License-Identifier: GPL-3.0-only + */ #include "resample.hpp" +/* + * This file contains the implementation for a 32-bit floating point resampler. + * It is largely based on David Bryant's ART resampler, which is BSD-licensed, + * and available at https://github.com/dbry/audio-resampler/. + * + * This resampler uses windowed sinc interpolation filters, with an additional + * lowpass filter to reduce aliasing. + */ #include #include @@ -14,13 +27,11 @@ namespace audio { -static constexpr char kTag[] = "resample"; - static constexpr double kLowPassRatio = 0.5; static constexpr size_t kNumFilters = 64; -static constexpr size_t kTapsPerFilter = 16; +static constexpr size_t kFilterSize = 16; -typedef std::array Filter; +typedef std::array Filter; static std::array sFilters{}; static bool sFiltersInitialised = false; @@ -35,15 +46,15 @@ Resampler::Resampler(uint32_t source_sample_rate, static_cast(source_sample_rate)), num_channels_(num_channels) { channel_buffers_.resize(num_channels); - channel_buffer_size_ = kTapsPerFilter * 16; + channel_buffer_size_ = kFilterSize * 16; for (int i = 0; i < num_channels; i++) { channel_buffers_[i] = static_cast(calloc(sizeof(float), channel_buffer_size_)); } - output_offset_ = kTapsPerFilter / 2.0f; - input_index_ = kTapsPerFilter; + output_offset_ = kFilterSize / 2.0f; + input_index_ = kFilterSize; if (!sFiltersInitialised) { sFiltersInitialised = true; @@ -64,7 +75,7 @@ auto Resampler::Process(cpp::span input, size_t input_frames = input.size() / num_channels_; size_t output_frames = output.size() / num_channels_; - int half_taps = kTapsPerFilter / 2; + int half_taps = kFilterSize / 2; while (output_frames > 0) { if (output_offset_ >= input_index_ - half_taps) { if (input_frames > 0) { @@ -74,12 +85,12 @@ auto Resampler::Process(cpp::span input, if (input_index_ == channel_buffer_size_) { for (int i = 0; i < num_channels_; ++i) { memmove(channel_buffers_[i], - channel_buffers_[i] + channel_buffer_size_ - kTapsPerFilter, - kTapsPerFilter * sizeof(float)); + channel_buffers_[i] + channel_buffer_size_ - kFilterSize, + kFilterSize * sizeof(float)); } - output_offset_ -= channel_buffer_size_ - kTapsPerFilter; - input_index_ -= channel_buffer_size_ - kTapsPerFilter; + output_offset_ -= channel_buffer_size_ - kFilterSize; + input_index_ -= channel_buffer_size_ - kFilterSize; } for (int i = 0; i < num_channels_; ++i) { @@ -97,7 +108,11 @@ auto Resampler::Process(cpp::span input, output[samples_produced++] = sample::FromFloat(Subsample(i)); } - output_offset_ += (1.0f / factor_); + // NOTE: floating point division here is potentially slow due to FPU + // limitations. Consider explicitly bunding the xtensa libgcc divsion via + // reciprocal implementation if we care about portability between + // compilers. + output_offset_ += 1.0f / factor_; output_frames--; } } @@ -105,36 +120,34 @@ auto Resampler::Process(cpp::span input, return {samples_used, samples_produced}; } +/* + * Constructs the filter in-place for the given index of sFilters. This only + * needs to be done once, per-filter. 64-bit math is okay here, because filters + * will not be initialised within a performance critical path. + */ auto InitFilter(int index) -> void { - const double a0 = 0.35875; - const double a1 = 0.48829; - const double a2 = 0.14128; - const double a3 = 0.01168; + Filter& filter = sFilters[index]; + std::array working_buffer{}; - double fraction = - static_cast(index) / static_cast(kNumFilters); + double fraction = index / static_cast(kNumFilters); double filter_sum = 0.0; - // "dist" is the absolute distance from the sinc maximum to the filter tap to - // be calculated, in radians "ratio" is that distance divided by half the tap - // count such that it reaches π at the window extremes - - // Note that with this scaling, the odd terms of the Blackman-Harris - // calculation appear to be negated with respect to the reference formula - // version. + for (int i = 0; i < kFilterSize; ++i) { + // "dist" is the absolute distance from the sinc maximum to the filter tap + // to be calculated, in radians. + double dist = fabs((kFilterSize / 2.0 - 1.0) + fraction - i) * M_PI; + // "ratio" is that distance divided by half the tap count such that it + // reaches π at the window extremes + double ratio = dist / (kFilterSize / 2.0); - Filter& filter = sFilters[index]; - std::array working_buffer{}; - for (int i = 0; i < kTapsPerFilter; ++i) { - double dist = fabs((kTapsPerFilter / 2.0 - 1.0) + fraction - i) * M_PI; - double ratio = dist / (kTapsPerFilter / 2.0); double value; - if (dist != 0.0) { value = sin(dist * kLowPassRatio) / (dist * kLowPassRatio); - // Blackman-Harris window - value *= a0 + a1 * cos(ratio) + a2 * cos(2 * ratio) + a3 * cos(3 * ratio); + // Hann window. We could alternatively use a Blackman Harris window, + // however our unusually small filter size makes the Hann window's + // steeper cutoff more important. + value *= 0.5 * (1.0 + cos(ratio)); } else { value = 1.0; } @@ -143,50 +156,39 @@ auto InitFilter(int index) -> void { filter_sum += value; } - // filter should have unity DC gain - + // Filter should have unity DC gain double scaler = 1.0 / filter_sum; double error = 0.0; - for (int i = kTapsPerFilter / 2; i < kTapsPerFilter; - i = kTapsPerFilter - i - (i >= kTapsPerFilter / 2)) { + for (int i = kFilterSize / 2; i < kFilterSize; + i = kFilterSize - i - (i >= kFilterSize / 2)) { working_buffer[i] *= scaler; filter[i] = working_buffer[i] - error; error += static_cast(filter[i]) - working_buffer[i]; } } +/* + * Performs sub-sampling with interpolation for the given channel. Assumes that + * the channel buffer has already been filled with samples. + */ auto Resampler::Subsample(int channel) -> float { - float sum1, sum2; - cpp::span source{channel_buffers_[channel], channel_buffer_size_}; int offset_integral = std::floor(output_offset_); source = source.subspan(offset_integral); float offset_fractional = output_offset_ - offset_integral; - /* -// no interpolate -size_t filter_index = std::floor(offset_fractional * kNumFilters + 0.5f); -//ESP_LOGI(kTag, "selected filter %u of %u", filter_index, kNumFilters); -int start_offset = kTapsPerFilter / 2 + 1; -//ESP_LOGI(kTag, "using offset of %i, length %u", start_offset, kTapsPerFilter); - -return ApplyFilter( - sFilters[filter_index], - {source.data() - start_offset, kTapsPerFilter}); - */ - offset_fractional *= kNumFilters; int filter_index = std::floor(offset_fractional); - sum1 = ApplyFilter(sFilters[filter_index], - {source.data() - kTapsPerFilter / 2 + 1, kTapsPerFilter}); + float sum1 = ApplyFilter(sFilters[filter_index], + {source.data() - kFilterSize / 2 + 1, kFilterSize}); offset_fractional -= filter_index; - sum2 = ApplyFilter(sFilters[filter_index + 1], - {source.data() - kTapsPerFilter / 2 + 1, kTapsPerFilter}); + float sum2 = ApplyFilter(sFilters[filter_index + 1], + {source.data() - kFilterSize / 2 + 1, kFilterSize}); return (sum2 * offset_fractional) + (sum1 * (1.0f - offset_fractional)); } @@ -194,7 +196,7 @@ return ApplyFilter( auto Resampler::ApplyFilter(cpp::span filter, cpp::span input) -> float { float sum = 0.0; - for (int i = 0; i < kTapsPerFilter; i++) { + for (int i = 0; i < kFilterSize; i++) { sum += filter[i] * input[i]; } return sum; -- cgit v1.2.3