diff options
| author | jacqueline <me@jacqueline.id.au> | 2023-08-10 19:12:38 +1000 |
|---|---|---|
| committer | jacqueline <me@jacqueline.id.au> | 2023-08-10 19:12:38 +1000 |
| commit | 958160aa545e3d91b2a4f1a367817e73d298e8a9 (patch) | |
| tree | 190e6591a6dda1f0d9651c7e127666ead2a3373b /src/audio | |
| parent | d8fc77101dcf80a3643a00b3446dca1e390ce997 (diff) | |
| download | tangara-fw-958160aa545e3d91b2a4f1a367817e73d298e8a9.tar.gz | |
Use the libspeexdsp resampler
AFAICT it runs a little slower? but it's fixed point, and has much
better understood audio characteristics.
Diffstat (limited to 'src/audio')
| -rw-r--r-- | src/audio/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/audio/audio_task.cpp | 2 | ||||
| -rw-r--r-- | src/audio/fatfs_source.cpp | 4 | ||||
| -rw-r--r-- | src/audio/include/resample.hpp | 21 | ||||
| -rw-r--r-- | src/audio/resample.cpp | 181 | ||||
| -rw-r--r-- | src/audio/sink_mixer.cpp | 41 |
6 files changed, 48 insertions, 204 deletions
diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt index 02e84c3f..2d332a1e 100644 --- a/src/audio/CMakeLists.txt +++ b/src/audio/CMakeLists.txt @@ -8,6 +8,7 @@ idf_component_register( "stream_event.cpp" "stream_info.cpp" "audio_fsm.cpp" "sink_mixer.cpp" "resample.cpp" "fatfs_source.cpp" INCLUDE_DIRS "include" - REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm" "database" "system_fsm" "playlist") + REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm" + "database" "system_fsm" "playlist" "speexdsp") target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS}) diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp index d880e6b1..797ab7f9 100644 --- a/src/audio/audio_task.cpp +++ b/src/audio/audio_task.cpp @@ -165,7 +165,7 @@ auto AudioTask::BeginDecoding(std::shared_ptr<codecs::IStream> stream) -> bool { current_sink_format_ = IAudioSink::Format{ .sample_rate = open_res->sample_rate_hz, .num_channels = open_res->num_channels, - .bits_per_sample = 32, + .bits_per_sample = 16, }; ESP_LOGI(kTag, "stream started ok"); events::Audio().Dispatch(internal::InputFileOpened{}); diff --git a/src/audio/fatfs_source.cpp b/src/audio/fatfs_source.cpp index 6a9aea47..58986648 100644 --- a/src/audio/fatfs_source.cpp +++ b/src/audio/fatfs_source.cpp @@ -31,7 +31,6 @@ FatfsSource::~FatfsSource() { auto FatfsSource::Read(cpp::span<std::byte> dest) -> ssize_t { if (f_eof(file_.get())) { - ESP_LOGI(kTag, "read from empty file"); return 0; } UINT bytes_read = 0; @@ -40,8 +39,6 @@ auto FatfsSource::Read(cpp::span<std::byte> dest) -> ssize_t { ESP_LOGE(kTag, "error reading from file"); return -1; } - ESP_LOGI(kTag, "read %u bytes into %p (%u)", bytes_read, dest.data(), - dest.size_bytes()); return bytes_read; } @@ -50,7 +47,6 @@ auto FatfsSource::CanSeek() -> bool { } auto FatfsSource::SeekTo(int64_t destination, SeekFrom from) -> void { - ESP_LOGI(kTag, "seeking to %llu", destination); switch (from) { case SeekFrom::kStartOfStream: f_lseek(file_.get(), destination); diff --git a/src/audio/include/resample.hpp b/src/audio/include/resample.hpp index 3855415a..7b114f59 100644 --- a/src/audio/include/resample.hpp +++ b/src/audio/include/resample.hpp @@ -4,6 +4,7 @@ #include <vector> #include "span.hpp" +#include "speex/speex_resampler.h" #include "sample.hpp" @@ -17,28 +18,14 @@ class Resampler { ~Resampler(); - auto source_sample_rate() -> uint32_t { return source_sample_rate_; } - auto target_sample_rate() -> uint32_t { return target_sample_rate_; } - auto channels() -> uint_fast8_t { return num_channels_; } - - auto Process(cpp::span<const sample::Sample> input, + auto Process(cpp::span<sample::Sample> input, cpp::span<sample::Sample> output, bool end_of_data) -> std::pair<size_t, size_t>; private: - auto Subsample(int channel) -> float; - auto ApplyFilter(cpp::span<float> filter, cpp::span<float> input) -> float; - - uint32_t source_sample_rate_; - uint32_t target_sample_rate_; - float factor_; + int err_; + SpeexResamplerState* resampler_; uint8_t num_channels_; - - std::vector<float*> channel_buffers_; - size_t channel_buffer_size_; - - float output_offset_; - int32_t input_index_; }; } // namespace audio
\ No newline at end of file diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp index 430a6a26..bc2c7c51 100644 --- a/src/audio/resample.cpp +++ b/src/audio/resample.cpp @@ -23,183 +23,42 @@ #include "esp_log.h" #include "sample.hpp" +#include "speex/speex_resampler.h" #include "stream_info.hpp" namespace audio { -static constexpr double kLowPassRatio = 0.5; -static constexpr size_t kNumFilters = 64; -static constexpr size_t kFilterSize = 16; - -typedef std::array<float, kFilterSize> Filter; -static std::array<Filter, kNumFilters + 1> sFilters{}; -static bool sFiltersInitialised = false; - -auto InitFilter(int index) -> void; +static constexpr int kQuality = SPEEX_RESAMPLER_QUALITY_MIN; Resampler::Resampler(uint32_t source_sample_rate, uint32_t target_sample_rate, uint8_t num_channels) - : source_sample_rate_(source_sample_rate), - target_sample_rate_(target_sample_rate), - factor_(static_cast<double>(target_sample_rate) / - static_cast<double>(source_sample_rate)), + : err_(0), + resampler_(speex_resampler_init(num_channels, + source_sample_rate, + target_sample_rate, + kQuality, + &err_)), num_channels_(num_channels) { - channel_buffers_.resize(num_channels); - channel_buffer_size_ = kFilterSize * 16; - - for (int i = 0; i < num_channels; i++) { - channel_buffers_[i] = - static_cast<float*>(calloc(sizeof(float), channel_buffer_size_)); - } - - output_offset_ = kFilterSize / 2.0f; - input_index_ = kFilterSize; - - if (!sFiltersInitialised) { - sFiltersInitialised = true; - for (int i = 0; i < kNumFilters + 1; i++) { - InitFilter(i); - } - } + assert(err_ == 0); } -Resampler::~Resampler() {} +Resampler::~Resampler() { + speex_resampler_destroy(resampler_); +} -auto Resampler::Process(cpp::span<const sample::Sample> input, +auto Resampler::Process(cpp::span<sample::Sample> input, cpp::span<sample::Sample> output, bool end_of_data) -> std::pair<size_t, size_t> { - size_t samples_used = 0; - size_t samples_produced = 0; - - size_t input_frames = input.size() / num_channels_; - size_t output_frames = output.size() / num_channels_; - - int half_taps = kFilterSize / 2; - while (output_frames > 0) { - if (output_offset_ >= input_index_ - half_taps) { - if (input_frames > 0) { - // Check whether the channel buffers will overflow with the addition of - // this sample. If so, we need to move the remaining contents back to - // the beginning of the buffer. - if (input_index_ == channel_buffer_size_) { - for (int i = 0; i < num_channels_; ++i) { - memmove(channel_buffers_[i], - channel_buffers_[i] + channel_buffer_size_ - kFilterSize, - kFilterSize * sizeof(float)); - } - - output_offset_ -= channel_buffer_size_ - kFilterSize; - input_index_ -= channel_buffer_size_ - kFilterSize; - } - - for (int i = 0; i < num_channels_; ++i) { - channel_buffers_[i][input_index_] = - sample::ToFloat(input[samples_used++]); - } - - input_index_++; - input_frames--; - } else { - break; - } - } else { - for (int i = 0; i < num_channels_; i++) { - output[samples_produced++] = sample::FromFloat(Subsample(i)); - } - - // NOTE: floating point division here is potentially slow due to FPU - // limitations. Consider explicitly bunding the xtensa libgcc divsion via - // reciprocal implementation if we care about portability between - // compilers. - output_offset_ += 1.0f / factor_; - output_frames--; - } - } - - return {samples_used, samples_produced}; -} - -/* - * Constructs the filter in-place for the given index of sFilters. This only - * needs to be done once, per-filter. 64-bit math is okay here, because filters - * will not be initialised within a performance critical path. - */ -auto InitFilter(int index) -> void { - Filter& filter = sFilters[index]; - std::array<double, kFilterSize> working_buffer{}; + uint32_t samples_used = input.size() / num_channels_; + uint32_t samples_produced = output.size() / num_channels_; - double fraction = index / static_cast<double>(kNumFilters); - double filter_sum = 0.0; - - for (int i = 0; i < kFilterSize; ++i) { - // "dist" is the absolute distance from the sinc maximum to the filter tap - // to be calculated, in radians. - double dist = fabs((kFilterSize / 2.0 - 1.0) + fraction - i) * M_PI; - // "ratio" is that distance divided by half the tap count such that it - // reaches π at the window extremes - double ratio = dist / (kFilterSize / 2.0); - - double value; - if (dist != 0.0) { - value = sin(dist * kLowPassRatio) / (dist * kLowPassRatio); - - // Hann window. We could alternatively use a Blackman Harris window, - // however our unusually small filter size makes the Hann window's - // steeper cutoff more important. - value *= 0.5 * (1.0 + cos(ratio)); - } else { - value = 1.0; - } - - working_buffer[i] = value; - filter_sum += value; - } - - // Filter should have unity DC gain - double scaler = 1.0 / filter_sum; - double error = 0.0; - - for (int i = kFilterSize / 2; i < kFilterSize; - i = kFilterSize - i - (i >= kFilterSize / 2)) { - working_buffer[i] *= scaler; - filter[i] = working_buffer[i] - error; - error += static_cast<double>(filter[i]) - working_buffer[i]; - } -} - -/* - * Performs sub-sampling with interpolation for the given channel. Assumes that - * the channel buffer has already been filled with samples. - */ -auto Resampler::Subsample(int channel) -> float { - cpp::span<float> source{channel_buffers_[channel], channel_buffer_size_}; - - int offset_integral = std::floor(output_offset_); - source = source.subspan(offset_integral); - float offset_fractional = output_offset_ - offset_integral; - - offset_fractional *= kNumFilters; - int filter_index = std::floor(offset_fractional); - - float sum1 = ApplyFilter(sFilters[filter_index], - {source.data() - kFilterSize / 2 + 1, kFilterSize}); - - offset_fractional -= filter_index; - - float sum2 = ApplyFilter(sFilters[filter_index + 1], - {source.data() - kFilterSize / 2 + 1, kFilterSize}); - - return (sum2 * offset_fractional) + (sum1 * (1.0f - offset_fractional)); -} + int err = speex_resampler_process_interleaved_int( + resampler_, input.data(), &samples_used, output.data(), + &samples_produced); + assert(err == 0); -auto Resampler::ApplyFilter(cpp::span<float> filter, cpp::span<float> input) - -> float { - float sum = 0.0; - for (int i = 0; i < kFilterSize; i++) { - sum += filter[i] * input[i]; - } - return sum; + return {samples_used * num_channels_, samples_produced * num_channels_}; } } // namespace audio diff --git a/src/audio/sink_mixer.cpp b/src/audio/sink_mixer.cpp index 9f973d4b..5e712582 100644 --- a/src/audio/sink_mixer.cpp +++ b/src/audio/sink_mixer.cpp @@ -47,10 +47,7 @@ SinkMixer::SinkMixer(IAudioSink* sink) kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)), kSampleBufferLength}; - // Pin to CORE0 because we need the FPU. - // FIXME: A fixed point implementation could run freely on either core, - // which should lead to a big performance increase. - tasks::StartPersistent<tasks::Type::kMixer>(0, [&]() { Main(); }); + tasks::StartPersistent<tasks::Type::kMixer>([&]() { Main(); }); } SinkMixer::~SinkMixer() { @@ -100,7 +97,6 @@ auto SinkMixer::Main() -> void { vTaskDelay(pdMS_TO_TICKS(10)); } - ESP_LOGI(kTag, "configuring sink"); sink_->Configure(new_target); } target_format_ = new_target; @@ -136,6 +132,7 @@ auto SinkMixer::Main() -> void { // bytes we read were half a frame. Either way, we need to calculate the // size of the remainder in bytes. size_t bytes_used = samples_used * sizeof(sample::Sample); + assert(bytes_used <= bytes_in_buffer); leftover_bytes_ = bytes_in_buffer - bytes_used; if (leftover_bytes_ == 0) { leftover_offset_ = 0; @@ -157,20 +154,22 @@ auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos) } size_t samples_used = 0; - while (input.size() < samples_used) { + while (samples_used < input.size()) { cpp::span<sample::Sample> output_source; if (source_format_.sample_rate != target_format_.sample_rate) { if (resampler_ == nullptr) { - ESP_LOGI(kTag, "creating new resampler"); + ESP_LOGI(kTag, "creating new resampler for %lu -> %lu", + source_format_.sample_rate, target_format_.sample_rate); resampler_.reset(new Resampler(source_format_.sample_rate, target_format_.sample_rate, source_format_.num_channels)); } size_t read, written; - std::tie(read, written) = - resampler_->Process(input, resampled_buffer_, is_eos); + std::tie(read, written) = resampler_->Process(input.subspan(samples_used), + resampled_buffer_, is_eos); samples_used += read; + if (read == 0 && written == 0) { // Zero samples used or written. We need more input. break; @@ -181,20 +180,22 @@ auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos) samples_used = input.size(); } - if (target_format_.bits_per_sample == 16) { - // FIXME: The source should have some kind of hint indicating whether it - // needs dither, since some codecs (e.g. opus) apply their own dither. - ApplyDither(output_source, 16); - - cpp::span<int16_t> dest{reinterpret_cast<int16_t*>(output_source.data()), - output_source.size()}; - for (size_t i = 0; i < output_source.size(); i++) { - dest[i] = sample::ToSigned16Bit(output_source[i]); - } + /* + if (target_format_.bits_per_sample == 16) { + // FIXME: The source should have some kind of hint indicating whether it + // needs dither, since some codecs (e.g. opus) apply their own dither. + ApplyDither(output_source, 16); - output_source = output_source.first(output_source.size() / 2); + cpp::span<int16_t> dest{reinterpret_cast<int16_t*>(output_source.data()), + output_source.size()}; + for (size_t i = 0; i < output_source.size(); i++) { + dest[i] = sample::ToSigned16Bit(output_source[i]); } + output_source = output_source.first(output_source.size() / 2); + } + */ + size_t bytes_sent = 0; size_t bytes_to_send = output_source.size_bytes(); while (bytes_sent < bytes_to_send) { |
