Use the libspeexdsp resampler

AFAICT it runs a little slower? but it's fixed point, and has much better understood audio characteristics.
author: jacqueline <me@jacqueline.id.au> 2023-08-10 19:12:38 +1000
committer: jacqueline <me@jacqueline.id.au> 2023-08-10 19:12:38 +1000
commit: 958160aa545e3d91b2a4f1a367817e73d298e8a9 (patch)
tree: 190e6591a6dda1f0d9651c7e127666ead2a3373b /src/audio
parent: d8fc77101dcf80a3643a00b3446dca1e390ce997 (diff)
download: tangara-fw-958160aa545e3d91b2a4f1a367817e73d298e8a9.tar.gz
6 files changed, 48 insertions, 204 deletions
diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt
index 02e84c3f..2d332a1e 100644
--- a/src/audio/CMakeLists.txt
+++ b/src/audio/CMakeLists.txt
@@ -8,6 +8,7 @@ idf_component_register(
   "stream_event.cpp" "stream_info.cpp" "audio_fsm.cpp" "sink_mixer.cpp" "resample.cpp"
   "fatfs_source.cpp"
   INCLUDE_DIRS "include"
-  REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm" "database" "system_fsm" "playlist")
+  REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm"
+  "database" "system_fsm" "playlist" "speexdsp")
 
 target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS})
diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
index d880e6b1..797ab7f9 100644
--- a/src/audio/audio_task.cpp
+++ b/src/audio/audio_task.cpp
@@ -165,7 +165,7 @@ auto AudioTask::BeginDecoding(std::shared_ptr<codecs::IStream> stream) -> bool {
   current_sink_format_ = IAudioSink::Format{
       .sample_rate = open_res->sample_rate_hz,
       .num_channels = open_res->num_channels,
-      .bits_per_sample = 32,
+      .bits_per_sample = 16,
   };
   ESP_LOGI(kTag, "stream started ok");
   events::Audio().Dispatch(internal::InputFileOpened{});
diff --git a/src/audio/fatfs_source.cpp b/src/audio/fatfs_source.cpp
index 6a9aea47..58986648 100644
--- a/src/audio/fatfs_source.cpp
+++ b/src/audio/fatfs_source.cpp
@@ -31,7 +31,6 @@ FatfsSource::~FatfsSource() {
 
 auto FatfsSource::Read(cpp::span<std::byte> dest) -> ssize_t {
   if (f_eof(file_.get())) {
-    ESP_LOGI(kTag, "read from empty file");
     return 0;
   }
   UINT bytes_read = 0;
@@ -40,8 +39,6 @@ auto FatfsSource::Read(cpp::span<std::byte> dest) -> ssize_t {
     ESP_LOGE(kTag, "error reading from file");
     return -1;
   }
-  ESP_LOGI(kTag, "read %u bytes into %p (%u)", bytes_read, dest.data(),
-           dest.size_bytes());
   return bytes_read;
 }
 
@@ -50,7 +47,6 @@ auto FatfsSource::CanSeek() -> bool {
 }
 
 auto FatfsSource::SeekTo(int64_t destination, SeekFrom from) -> void {
-  ESP_LOGI(kTag, "seeking to %llu", destination);
   switch (from) {
     case SeekFrom::kStartOfStream:
       f_lseek(file_.get(), destination);
diff --git a/src/audio/include/resample.hpp b/src/audio/include/resample.hpp
index 3855415a..7b114f59 100644
--- a/src/audio/include/resample.hpp
+++ b/src/audio/include/resample.hpp
@@ -4,6 +4,7 @@
 #include <vector>
 
 #include "span.hpp"
+#include "speex/speex_resampler.h"
 
 #include "sample.hpp"
 
@@ -17,28 +18,14 @@ class Resampler {
 
   ~Resampler();
 
-  auto source_sample_rate() -> uint32_t { return source_sample_rate_; }
-  auto target_sample_rate() -> uint32_t { return target_sample_rate_; }
-  auto channels() -> uint_fast8_t { return num_channels_; }
-
-  auto Process(cpp::span<const sample::Sample> input,
+  auto Process(cpp::span<sample::Sample> input,
                cpp::span<sample::Sample> output,
                bool end_of_data) -> std::pair<size_t, size_t>;
 
  private:
-  auto Subsample(int channel) -> float;
-  auto ApplyFilter(cpp::span<float> filter, cpp::span<float> input) -> float;
-
-  uint32_t source_sample_rate_;
-  uint32_t target_sample_rate_;
-  float factor_;
+  int err_;
+  SpeexResamplerState* resampler_;
   uint8_t num_channels_;
-
-  std::vector<float*> channel_buffers_;
-  size_t channel_buffer_size_;
-
-  float output_offset_;
-  int32_t input_index_;
 };
 
 }  // namespace audio
 \ No newline at end of file
diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp
index 430a6a26..bc2c7c51 100644
--- a/src/audio/resample.cpp
+++ b/src/audio/resample.cpp
@@ -23,183 +23,42 @@
 #include "esp_log.h"
 
 #include "sample.hpp"
+#include "speex/speex_resampler.h"
 #include "stream_info.hpp"
 
 namespace audio {
 
-static constexpr double kLowPassRatio = 0.5;
-static constexpr size_t kNumFilters = 64;
-static constexpr size_t kFilterSize = 16;
-
-typedef std::array<float, kFilterSize> Filter;
-static std::array<Filter, kNumFilters + 1> sFilters{};
-static bool sFiltersInitialised = false;
-
-auto InitFilter(int index) -> void;
+static constexpr int kQuality = SPEEX_RESAMPLER_QUALITY_MIN;
 
 Resampler::Resampler(uint32_t source_sample_rate,
                      uint32_t target_sample_rate,
                      uint8_t num_channels)
-    : source_sample_rate_(source_sample_rate),
-      target_sample_rate_(target_sample_rate),
-      factor_(static_cast<double>(target_sample_rate) /
-              static_cast<double>(source_sample_rate)),
+    : err_(0),
+      resampler_(speex_resampler_init(num_channels,
+                                      source_sample_rate,
+                                      target_sample_rate,
+                                      kQuality,
+                                      &err_)),
       num_channels_(num_channels) {
-  channel_buffers_.resize(num_channels);
-  channel_buffer_size_ = kFilterSize * 16;
-
-  for (int i = 0; i < num_channels; i++) {
-    channel_buffers_[i] =
-        static_cast<float*>(calloc(sizeof(float), channel_buffer_size_));
-  }
-
-  output_offset_ = kFilterSize / 2.0f;
-  input_index_ = kFilterSize;
-
-  if (!sFiltersInitialised) {
-    sFiltersInitialised = true;
-    for (int i = 0; i < kNumFilters + 1; i++) {
-      InitFilter(i);
-    }
-  }
+  assert(err_ == 0);
 }
 
-Resampler::~Resampler() {}
+Resampler::~Resampler() {
+  speex_resampler_destroy(resampler_);
+}
 
-auto Resampler::Process(cpp::span<const sample::Sample> input,
+auto Resampler::Process(cpp::span<sample::Sample> input,
                         cpp::span<sample::Sample> output,
                         bool end_of_data) -> std::pair<size_t, size_t> {
-  size_t samples_used = 0;
-  size_t samples_produced = 0;
-
-  size_t input_frames = input.size() / num_channels_;
-  size_t output_frames = output.size() / num_channels_;
-
-  int half_taps = kFilterSize / 2;
-  while (output_frames > 0) {
-    if (output_offset_ >= input_index_ - half_taps) {
-      if (input_frames > 0) {
-        // Check whether the channel buffers will overflow with the addition of
-        // this sample. If so, we need to move the remaining contents back to
-        // the beginning of the buffer.
-        if (input_index_ == channel_buffer_size_) {
-          for (int i = 0; i < num_channels_; ++i) {
-            memmove(channel_buffers_[i],
-                    channel_buffers_[i] + channel_buffer_size_ - kFilterSize,
-                    kFilterSize * sizeof(float));
-          }
-
-          output_offset_ -= channel_buffer_size_ - kFilterSize;
-          input_index_ -= channel_buffer_size_ - kFilterSize;
-        }
-
-        for (int i = 0; i < num_channels_; ++i) {
-          channel_buffers_[i][input_index_] =
-              sample::ToFloat(input[samples_used++]);
-        }
-
-        input_index_++;
-        input_frames--;
-      } else {
-        break;
-      }
-    } else {
-      for (int i = 0; i < num_channels_; i++) {
-        output[samples_produced++] = sample::FromFloat(Subsample(i));
-      }
-
-      // NOTE: floating point division here is potentially slow due to FPU
-      // limitations. Consider explicitly bunding the xtensa libgcc divsion via
-      // reciprocal implementation if we care about portability between
-      // compilers.
-      output_offset_ += 1.0f / factor_;
-      output_frames--;
-    }
-  }
-
-  return {samples_used, samples_produced};
-}
-
-/*
- * Constructs the filter in-place for the given index of sFilters. This only
- * needs to be done once, per-filter. 64-bit math is okay here, because filters
- * will not be initialised within a performance critical path.
- */
-auto InitFilter(int index) -> void {
-  Filter& filter = sFilters[index];
-  std::array<double, kFilterSize> working_buffer{};
+  uint32_t samples_used = input.size() / num_channels_;
+  uint32_t samples_produced = output.size() / num_channels_;
 
-  double fraction = index / static_cast<double>(kNumFilters);
-  double filter_sum = 0.0;
-
-  for (int i = 0; i < kFilterSize; ++i) {
-    // "dist" is the absolute distance from the sinc maximum to the filter tap
-    //  to be calculated, in radians.
-    double dist = fabs((kFilterSize / 2.0 - 1.0) + fraction - i) * M_PI;
-    // "ratio" is that distance divided by half the tap count such that it
-    // reaches π at the window extremes
-    double ratio = dist / (kFilterSize / 2.0);
-
-    double value;
-    if (dist != 0.0) {
-      value = sin(dist * kLowPassRatio) / (dist * kLowPassRatio);
-
-      // Hann window. We could alternatively use a Blackman Harris window,
-      // however our unusually small filter size makes the Hann window's
-      // steeper cutoff more important.
-      value *= 0.5 * (1.0 + cos(ratio));
-    } else {
-      value = 1.0;
-    }
-
-    working_buffer[i] = value;
-    filter_sum += value;
-  }
-
-  // Filter should have unity DC gain
-  double scaler = 1.0 / filter_sum;
-  double error = 0.0;
-
-  for (int i = kFilterSize / 2; i < kFilterSize;
-       i = kFilterSize - i - (i >= kFilterSize / 2)) {
-    working_buffer[i] *= scaler;
-    filter[i] = working_buffer[i] - error;
-    error += static_cast<double>(filter[i]) - working_buffer[i];
-  }
-}
-
-/*
- * Performs sub-sampling with interpolation for the given channel. Assumes that
- * the channel buffer has already been filled with samples.
- */
-auto Resampler::Subsample(int channel) -> float {
-  cpp::span<float> source{channel_buffers_[channel], channel_buffer_size_};
-
-  int offset_integral = std::floor(output_offset_);
-  source = source.subspan(offset_integral);
-  float offset_fractional = output_offset_ - offset_integral;
-
-  offset_fractional *= kNumFilters;
-  int filter_index = std::floor(offset_fractional);
-
-  float sum1 = ApplyFilter(sFilters[filter_index],
-                           {source.data() - kFilterSize / 2 + 1, kFilterSize});
-
-  offset_fractional -= filter_index;
-
-  float sum2 = ApplyFilter(sFilters[filter_index + 1],
-                           {source.data() - kFilterSize / 2 + 1, kFilterSize});
-
-  return (sum2 * offset_fractional) + (sum1 * (1.0f - offset_fractional));
-}
+  int err = speex_resampler_process_interleaved_int(
+      resampler_, input.data(), &samples_used, output.data(),
+      &samples_produced);
+  assert(err == 0);
 
-auto Resampler::ApplyFilter(cpp::span<float> filter, cpp::span<float> input)
-    -> float {
-  float sum = 0.0;
-  for (int i = 0; i < kFilterSize; i++) {
-    sum += filter[i] * input[i];
-  }
-  return sum;
+  return {samples_used * num_channels_, samples_produced * num_channels_};
 }
 
 }  // namespace audio
diff --git a/src/audio/sink_mixer.cpp b/src/audio/sink_mixer.cpp
index 9f973d4b..5e712582 100644
--- a/src/audio/sink_mixer.cpp
+++ b/src/audio/sink_mixer.cpp
@@ -47,10 +47,7 @@ SinkMixer::SinkMixer(IAudioSink* sink)
           kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
       kSampleBufferLength};
 
-  // Pin to CORE0 because we need the FPU.
-  // FIXME: A fixed point implementation could run freely on either core,
-  // which should lead to a big performance increase.
-  tasks::StartPersistent<tasks::Type::kMixer>(0, [&]() { Main(); });
+  tasks::StartPersistent<tasks::Type::kMixer>([&]() { Main(); });
 }
 
 SinkMixer::~SinkMixer() {
@@ -100,7 +97,6 @@ auto SinkMixer::Main() -> void {
           vTaskDelay(pdMS_TO_TICKS(10));
         }
 
-        ESP_LOGI(kTag, "configuring sink");
         sink_->Configure(new_target);
       }
       target_format_ = new_target;
@@ -136,6 +132,7 @@ auto SinkMixer::Main() -> void {
       // bytes we read were half a frame. Either way, we need to calculate the
       // size of the remainder in bytes.
       size_t bytes_used = samples_used * sizeof(sample::Sample);
+      assert(bytes_used <= bytes_in_buffer);
       leftover_bytes_ = bytes_in_buffer - bytes_used;
       if (leftover_bytes_ == 0) {
         leftover_offset_ = 0;
@@ -157,20 +154,22 @@ auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos)
   }
 
   size_t samples_used = 0;
-  while (input.size() < samples_used) {
+  while (samples_used < input.size()) {
     cpp::span<sample::Sample> output_source;
     if (source_format_.sample_rate != target_format_.sample_rate) {
       if (resampler_ == nullptr) {
-        ESP_LOGI(kTag, "creating new resampler");
+        ESP_LOGI(kTag, "creating new resampler for %lu -> %lu",
+                 source_format_.sample_rate, target_format_.sample_rate);
         resampler_.reset(new Resampler(source_format_.sample_rate,
                                        target_format_.sample_rate,
                                        source_format_.num_channels));
       }
 
       size_t read, written;
-      std::tie(read, written) =
-          resampler_->Process(input, resampled_buffer_, is_eos);
+      std::tie(read, written) = resampler_->Process(input.subspan(samples_used),
+                                                    resampled_buffer_, is_eos);
       samples_used += read;
+
       if (read == 0 && written == 0) {
         // Zero samples used or written. We need more input.
         break;
@@ -181,20 +180,22 @@ auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos)
       samples_used = input.size();
     }
 
-    if (target_format_.bits_per_sample == 16) {
-      // FIXME: The source should have some kind of hint indicating whether it
-      // needs dither, since some codecs (e.g. opus) apply their own dither.
-      ApplyDither(output_source, 16);
-
-      cpp::span<int16_t> dest{reinterpret_cast<int16_t*>(output_source.data()),
-                              output_source.size()};
-      for (size_t i = 0; i < output_source.size(); i++) {
-        dest[i] = sample::ToSigned16Bit(output_source[i]);
-      }
+    /*
+  if (target_format_.bits_per_sample == 16) {
+    // FIXME: The source should have some kind of hint indicating whether it
+    // needs dither, since some codecs (e.g. opus) apply their own dither.
+    ApplyDither(output_source, 16);
 
-      output_source = output_source.first(output_source.size() / 2);
+    cpp::span<int16_t> dest{reinterpret_cast<int16_t*>(output_source.data()),
+                            output_source.size()};
+    for (size_t i = 0; i < output_source.size(); i++) {
+      dest[i] = sample::ToSigned16Bit(output_source[i]);
     }
 
+    output_source = output_source.first(output_source.size() / 2);
+  }
+    */
+
     size_t bytes_sent = 0;
     size_t bytes_to_send = output_source.size_bytes();
     while (bytes_sent < bytes_to_send) {
author	jacqueline <me@jacqueline.id.au>	2023-08-10 19:12:38 +1000
committer	jacqueline <me@jacqueline.id.au>	2023-08-10 19:12:38 +1000
commit	958160aa545e3d91b2a4f1a367817e73d298e8a9 (patch)
tree	190e6591a6dda1f0d9651c7e127666ead2a3373b /src/audio
parent	d8fc77101dcf80a3643a00b3446dca1e390ce997 (diff)
download	tangara-fw-958160aa545e3d91b2a4f1a367817e73d298e8a9.tar.gz