From f3c5eec0251ec98f90d324c88d3519de2e6ee5e0 Mon Sep 17 00:00:00 2001
From: jacqueline <me@jacqueline.id.au>
Date: Wed, 16 Aug 2023 15:11:30 +1000
Subject: Rename the main audio tasks to be more sensible

---
 src/audio/CMakeLists.txt              |   4 +-
 src/audio/audio_converter.cpp         | 201 ++++++++++++++++++++++++++++++++++
 src/audio/audio_decoder.cpp           | 166 ++++++++++++++++++++++++++++
 src/audio/audio_fsm.cpp               |  15 ++-
 src/audio/audio_task.cpp              | 161 ---------------------------
 src/audio/include/audio_converter.hpp |  65 +++++++++++
 src/audio/include/audio_decoder.hpp   |  73 ++++++++++++
 src/audio/include/audio_fsm.hpp       |   6 +-
 src/audio/include/audio_task.hpp      |  65 -----------
 src/audio/include/sink_mixer.hpp      |  63 -----------
 src/audio/sink_mixer.cpp              | 201 ----------------------------------
 11 files changed, 517 insertions(+), 503 deletions(-)
 create mode 100644 src/audio/audio_converter.cpp
 create mode 100644 src/audio/audio_decoder.cpp
 delete mode 100644 src/audio/audio_task.cpp
 create mode 100644 src/audio/include/audio_converter.hpp
 create mode 100644 src/audio/include/audio_decoder.hpp
 delete mode 100644 src/audio/include/audio_task.hpp
 delete mode 100644 src/audio/include/sink_mixer.hpp
 delete mode 100644 src/audio/sink_mixer.cpp
diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt
index de6c9b64..df5622f5 100644
--- a/src/audio/CMakeLists.txt
+++ b/src/audio/CMakeLists.txt
@@ -3,8 +3,8 @@
 # SPDX-License-Identifier: GPL-3.0-only
 
 idf_component_register(
-  SRCS "audio_task.cpp" "fatfs_audio_input.cpp" "i2s_audio_output.cpp"
-  "track_queue.cpp" "audio_fsm.cpp" "sink_mixer.cpp" "resample.cpp"
+  SRCS "audio_decoder.cpp" "fatfs_audio_input.cpp" "i2s_audio_output.cpp"
+  "track_queue.cpp" "audio_fsm.cpp" "audio_converter.cpp" "resample.cpp"
   "fatfs_source.cpp" "bt_audio_output.cpp"
   INCLUDE_DIRS "include"
   REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm"
diff --git a/src/audio/audio_converter.cpp b/src/audio/audio_converter.cpp
new file mode 100644
index 00000000..c540d821
--- /dev/null
+++ b/src/audio/audio_converter.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "audio_converter.hpp"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+
+#include "audio_sink.hpp"
+#include "esp_heap_caps.h"
+#include "esp_log.h"
+#include "freertos/portmacro.h"
+#include "freertos/projdefs.h"
+#include "idf_additions.h"
+
+#include "resample.hpp"
+#include "sample.hpp"
+#include "tasks.hpp"
+
+static constexpr char kTag[] = "mixer";
+
+static constexpr std::size_t kSourceBufferLength = 8 * 1024;
+static constexpr std::size_t kSampleBufferLength = 240 * 2;
+
+namespace audio {
+
+SampleConverter::SampleConverter()
+    : commands_(xQueueCreate(1, sizeof(Args))),
+      resampler_(nullptr),
+      source_(xStreamBufferCreateWithCaps(kSourceBufferLength,
+                                          1,
+                                          MALLOC_CAP_SPIRAM)) {
+  input_buffer_ = {
+      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
+          kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
+      kSampleBufferLength};
+  input_buffer_as_bytes_ = {reinterpret_cast<std::byte*>(input_buffer_.data()),
+                            input_buffer_.size_bytes()};
+
+  resampled_buffer_ = {
+      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
+          kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
+      kSampleBufferLength};
+
+  tasks::StartPersistent<tasks::Type::kMixer>([&]() { Main(); });
+}
+
+SampleConverter::~SampleConverter() {
+  vQueueDelete(commands_);
+  vStreamBufferDelete(source_);
+}
+
+auto SampleConverter::SetOutput(std::shared_ptr<IAudioOutput> output) -> void {
+  // FIXME: We should add synchronisation here, but we should be careful about
+  // not impacting performance given that the output will change only very
+  // rarely (if ever).
+  sink_ = output;
+}
+
+auto SampleConverter::ConvertSamples(cpp::span<sample::Sample> input,
+                                     const IAudioOutput::Format& format,
+                                     bool is_eos) -> void {
+  Args args{
+      .format = format,
+      .samples_available = input.size(),
+      .is_end_of_stream = is_eos,
+  };
+  xQueueSend(commands_, &args, portMAX_DELAY);
+
+  cpp::span<std::byte> input_as_bytes = {
+      reinterpret_cast<std::byte*>(input.data()), input.size_bytes()};
+  size_t bytes_sent = 0;
+  while (bytes_sent < input_as_bytes.size()) {
+    bytes_sent +=
+        xStreamBufferSend(source_, input_as_bytes.subspan(bytes_sent).data(),
+                          input_as_bytes.size() - bytes_sent, portMAX_DELAY);
+  }
+}
+
+auto SampleConverter::Main() -> void {
+  for (;;) {
+    Args args;
+    while (!xQueueReceive(commands_, &args, portMAX_DELAY)) {
+    }
+    if (args.format != source_format_) {
+      resampler_.reset();
+      source_format_ = args.format;
+      leftover_bytes_ = 0;
+      leftover_offset_ = 0;
+
+      auto new_target = sink_->PrepareFormat(args.format);
+      if (new_target != target_format_) {
+        // The new format is different to the old one. Wait for the sink to
+        // drain before continuing.
+        while (!xStreamBufferIsEmpty(sink_->stream())) {
+          ESP_LOGI(kTag, "waiting for sink stream to drain...");
+          // TODO(jacqueline): Get the sink drain ISR to notify us of this
+          // via semaphore instead of busy-ish waiting.
+          vTaskDelay(pdMS_TO_TICKS(10));
+        }
+
+        sink_->Configure(new_target);
+      }
+      target_format_ = new_target;
+    }
+
+    // Loop until we finish reading all the bytes indicated. There might be
+    // leftovers from each iteration, and from this process as a whole,
+    // depending on the resampling stage.
+    size_t bytes_read = 0;
+    size_t bytes_to_read = args.samples_available * sizeof(sample::Sample);
+    while (bytes_read < bytes_to_read) {
+      // First top up the input buffer, taking care not to overwrite anything
+      // remaining from a previous iteration.
+      size_t bytes_read_this_it = xStreamBufferReceive(
+          source_,
+          input_buffer_as_bytes_.subspan(leftover_offset_ + leftover_bytes_)
+              .data(),
+          std::min(input_buffer_as_bytes_.size() - leftover_offset_ -
+                       leftover_bytes_,
+                   bytes_to_read - bytes_read),
+          portMAX_DELAY);
+      bytes_read += bytes_read_this_it;
+
+      // Calculate the number of whole samples that are now in the input buffer.
+      size_t bytes_in_buffer = bytes_read_this_it + leftover_bytes_;
+      size_t samples_in_buffer = bytes_in_buffer / sizeof(sample::Sample);
+
+      size_t samples_used = HandleSamples(
+          input_buffer_.subspan(leftover_offset_).first(samples_in_buffer),
+          args.is_end_of_stream && bytes_read == bytes_to_read);
+
+      // Maybe the resampler didn't consume everything. Maybe the last few
+      // bytes we read were half a frame. Either way, we need to calculate the
+      // size of the remainder in bytes.
+      size_t bytes_used = samples_used * sizeof(sample::Sample);
+      assert(bytes_used <= bytes_in_buffer);
+      leftover_bytes_ = bytes_in_buffer - bytes_used;
+      if (leftover_bytes_ == 0) {
+        leftover_offset_ = 0;
+      } else {
+        leftover_offset_ += bytes_used;
+      }
+    }
+  }
+}
+
+auto SampleConverter::HandleSamples(cpp::span<sample::Sample> input,
+                                    bool is_eos) -> size_t {
+  if (source_format_ == target_format_) {
+    // The happiest possible case: the input format matches the output
+    // format already.
+    std::size_t bytes_sent = xStreamBufferSend(
+        sink_->stream(), input.data(), input.size_bytes(), portMAX_DELAY);
+    return bytes_sent / sizeof(sample::Sample);
+  }
+
+  size_t samples_used = 0;
+  while (samples_used < input.size()) {
+    cpp::span<sample::Sample> output_source;
+    if (source_format_.sample_rate != target_format_.sample_rate) {
+      if (resampler_ == nullptr) {
+        ESP_LOGI(kTag, "creating new resampler for %lu -> %lu",
+                 source_format_.sample_rate, target_format_.sample_rate);
+        resampler_.reset(new Resampler(source_format_.sample_rate,
+                                       target_format_.sample_rate,
+                                       source_format_.num_channels));
+      }
+
+      size_t read, written;
+      std::tie(read, written) = resampler_->Process(input.subspan(samples_used),
+                                                    resampled_buffer_, is_eos);
+      samples_used += read;
+
+      if (read == 0 && written == 0) {
+        // Zero samples used or written. We need more input.
+        break;
+      }
+      output_source = resampled_buffer_.first(written);
+    } else {
+      output_source = input;
+      samples_used = input.size();
+    }
+
+    size_t bytes_sent = 0;
+    size_t bytes_to_send = output_source.size_bytes();
+    while (bytes_sent < bytes_to_send) {
+      bytes_sent += xStreamBufferSend(
+          sink_->stream(),
+          reinterpret_cast<std::byte*>(output_source.data()) + bytes_sent,
+          bytes_to_send - bytes_sent, portMAX_DELAY);
+    }
+  }
+  return samples_used;
+}
+
+}  // namespace audio
diff --git a/src/audio/audio_decoder.cpp b/src/audio/audio_decoder.cpp
new file mode 100644
index 00000000..03f81124
--- /dev/null
+++ b/src/audio/audio_decoder.cpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "audio_decoder.hpp"
+
+#include <cstdint>
+#include <cstdlib>
+
+#include <algorithm>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <deque>
+#include <memory>
+#include <variant>
+
+#include "cbor.h"
+#include "esp_err.h"
+#include "esp_heap_caps.h"
+#include "esp_log.h"
+#include "freertos/portmacro.h"
+#include "freertos/projdefs.h"
+#include "freertos/queue.h"
+#include "freertos/ringbuf.h"
+#include "span.hpp"
+
+#include "audio_converter.hpp"
+#include "audio_events.hpp"
+#include "audio_fsm.hpp"
+#include "audio_sink.hpp"
+#include "audio_source.hpp"
+#include "codec.hpp"
+#include "event_queue.hpp"
+#include "fatfs_audio_input.hpp"
+#include "sample.hpp"
+#include "tasks.hpp"
+#include "track.hpp"
+#include "types.hpp"
+#include "ui_fsm.hpp"
+
+namespace audio {
+
+static const char* kTag = "audio_dec";
+
+static constexpr std::size_t kCodecBufferLength = 240 * 4;
+
+Timer::Timer(const codecs::ICodec::OutputFormat& format)
+    : current_seconds_(0),
+      current_sample_in_second_(0),
+      samples_per_second_(format.sample_rate_hz * format.num_channels),
+      total_duration_seconds_(format.total_samples.value_or(0) /
+                              format.num_channels / format.sample_rate_hz) {}
+
+auto Timer::AddSamples(std::size_t samples) -> void {
+  bool incremented = false;
+  current_sample_in_second_ += samples;
+  while (current_sample_in_second_ >= samples_per_second_) {
+    current_seconds_++;
+    current_sample_in_second_ -= samples_per_second_;
+    incremented = true;
+  }
+
+  if (incremented) {
+    if (total_duration_seconds_ < current_seconds_) {
+      total_duration_seconds_ = current_seconds_;
+    }
+
+    PlaybackUpdate ev{.seconds_elapsed = current_seconds_,
+                      .seconds_total = total_duration_seconds_};
+    events::Audio().Dispatch(ev);
+    events::Ui().Dispatch(ev);
+  }
+}
+
+auto Decoder::Start(std::shared_ptr<IAudioSource> source,
+                    std::shared_ptr<SampleConverter> sink) -> Decoder* {
+  Decoder* task = new Decoder(source, sink);
+  tasks::StartPersistent<tasks::Type::kAudio>([=]() { task->Main(); });
+  return task;
+}
+
+Decoder::Decoder(std::shared_ptr<IAudioSource> source,
+                 std::shared_ptr<SampleConverter> mixer)
+    : source_(source),
+      converter_(mixer),
+      codec_(),
+      timer_(),
+      current_format_() {
+  codec_buffer_ = {
+      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
+          kCodecBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
+      kCodecBufferLength};
+}
+
+void Decoder::Main() {
+  for (;;) {
+    if (source_->HasNewStream() || !stream_) {
+      std::shared_ptr<codecs::IStream> new_stream = source_->NextStream();
+      if (new_stream && BeginDecoding(new_stream)) {
+        stream_ = new_stream;
+      } else {
+        continue;
+      }
+    }
+
+    if (ContinueDecoding()) {
+      events::Audio().Dispatch(internal::InputFileFinished{});
+      stream_.reset();
+    }
+  }
+}
+
+auto Decoder::BeginDecoding(std::shared_ptr<codecs::IStream> stream) -> bool {
+  codec_.reset(codecs::CreateCodecForType(stream->type()).value_or(nullptr));
+  if (!codec_) {
+    ESP_LOGE(kTag, "no codec found");
+    return false;
+  }
+
+  auto open_res = codec_->OpenStream(stream);
+  if (open_res.has_error()) {
+    ESP_LOGE(kTag, "codec failed to start: %s",
+             codecs::ICodec::ErrorString(open_res.error()).c_str());
+    return false;
+  }
+
+  if (open_res->total_samples) {
+    timer_.reset(new Timer(open_res.value()));
+  } else {
+    timer_.reset();
+  }
+
+  current_sink_format_ = IAudioOutput::Format{
+      .sample_rate = open_res->sample_rate_hz,
+      .num_channels = open_res->num_channels,
+      .bits_per_sample = 16,
+  };
+  ESP_LOGI(kTag, "stream started ok");
+  events::Audio().Dispatch(internal::InputFileOpened{});
+  return true;
+}
+
+auto Decoder::ContinueDecoding() -> bool {
+  auto res = codec_->DecodeTo(codec_buffer_);
+  if (res.has_error()) {
+    return true;
+  }
+
+  if (res->samples_written > 0) {
+    converter_->ConvertSamples(codec_buffer_.first(res->samples_written),
+                               current_sink_format_.value(),
+                               res->is_stream_finished);
+  }
+
+  if (timer_) {
+    timer_->AddSamples(res->samples_written);
+  }
+
+  return res->is_stream_finished;
+}
+
+}  // namespace audio
diff --git a/src/audio/audio_fsm.cpp b/src/audio/audio_fsm.cpp
index 1ea670af..e68eedaf 100644
--- a/src/audio/audio_fsm.cpp
+++ b/src/audio/audio_fsm.cpp
@@ -15,8 +15,9 @@
 #include "freertos/portmacro.h"
 #include "freertos/projdefs.h"
 
+#include "audio_converter.hpp"
+#include "audio_decoder.hpp"
 #include "audio_events.hpp"
-#include "audio_task.hpp"
 #include "bluetooth.hpp"
 #include "bt_audio_output.hpp"
 #include "event_queue.hpp"
@@ -24,7 +25,6 @@
 #include "future_fetcher.hpp"
 #include "i2s_audio_output.hpp"
 #include "i2s_dac.hpp"
-#include "sink_mixer.hpp"
 #include "system_events.hpp"
 #include "track.hpp"
 #include "track_queue.hpp"
@@ -37,10 +37,9 @@ drivers::IGpios* AudioState::sIGpios;
 std::shared_ptr<drivers::I2SDac> AudioState::sDac;
 std::weak_ptr<database::Database> AudioState::sDatabase;
 
-std::unique_ptr<AudioTask> AudioState::sTask;
-
 std::shared_ptr<FatfsAudioInput> AudioState::sFileSource;
-std::shared_ptr<SinkMixer> AudioState::sMixer;
+std::unique_ptr<Decoder> AudioState::sDecoder;
+std::shared_ptr<SampleConverter> AudioState::sSampleConverter;
 std::shared_ptr<IAudioOutput> AudioState::sOutput;
 
 TrackQueue* AudioState::sTrackQueue;
@@ -65,10 +64,10 @@ auto AudioState::Init(drivers::IGpios* gpio_expander,
   sOutput.reset(new I2SAudioOutput(sIGpios, sDac));
   // sOutput.reset(new BluetoothAudioOutput(bluetooth));
 
-  sMixer.reset(new SinkMixer());
-  sMixer->SetOutput(sOutput);
+  sSampleConverter.reset(new SampleConverter());
+  sSampleConverter->SetOutput(sOutput);
 
-  AudioTask::Start(sFileSource, sMixer);
+  Decoder::Start(sFileSource, sSampleConverter);
 
   return true;
 }
diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
deleted file mode 100644
index 99b1c170..00000000
--- a/src/audio/audio_task.cpp
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#include "audio_task.hpp"
-
-#include <cstdint>
-#include <cstdlib>
-
-#include <algorithm>
-#include <cmath>
-#include <cstddef>
-#include <cstdint>
-#include <cstring>
-#include <deque>
-#include <memory>
-#include <variant>
-
-#include "cbor.h"
-#include "esp_err.h"
-#include "esp_heap_caps.h"
-#include "esp_log.h"
-#include "freertos/portmacro.h"
-#include "freertos/projdefs.h"
-#include "freertos/queue.h"
-#include "freertos/ringbuf.h"
-#include "span.hpp"
-
-#include "audio_events.hpp"
-#include "audio_fsm.hpp"
-#include "audio_sink.hpp"
-#include "audio_source.hpp"
-#include "codec.hpp"
-#include "event_queue.hpp"
-#include "fatfs_audio_input.hpp"
-#include "sample.hpp"
-#include "sink_mixer.hpp"
-#include "tasks.hpp"
-#include "track.hpp"
-#include "types.hpp"
-#include "ui_fsm.hpp"
-
-namespace audio {
-
-static const char* kTag = "audio_dec";
-
-static constexpr std::size_t kCodecBufferLength = 240 * 4;
-
-Timer::Timer(const codecs::ICodec::OutputFormat& format)
-    : current_seconds_(0),
-      current_sample_in_second_(0),
-      samples_per_second_(format.sample_rate_hz * format.num_channels),
-      total_duration_seconds_(format.total_samples.value_or(0) /
-                              format.num_channels / format.sample_rate_hz) {}
-
-auto Timer::AddSamples(std::size_t samples) -> void {
-  bool incremented = false;
-  current_sample_in_second_ += samples;
-  while (current_sample_in_second_ >= samples_per_second_) {
-    current_seconds_++;
-    current_sample_in_second_ -= samples_per_second_;
-    incremented = true;
-  }
-
-  if (incremented) {
-    if (total_duration_seconds_ < current_seconds_) {
-      total_duration_seconds_ = current_seconds_;
-    }
-
-    PlaybackUpdate ev{.seconds_elapsed = current_seconds_,
-                      .seconds_total = total_duration_seconds_};
-    events::Audio().Dispatch(ev);
-    events::Ui().Dispatch(ev);
-  }
-}
-
-auto AudioTask::Start(std::shared_ptr<IAudioSource> source,
-                      std::shared_ptr<SinkMixer> sink) -> AudioTask* {
-  AudioTask* task = new AudioTask(source, sink);
-  tasks::StartPersistent<tasks::Type::kAudio>([=]() { task->Main(); });
-  return task;
-}
-
-AudioTask::AudioTask(std::shared_ptr<IAudioSource> source,
-                     std::shared_ptr<SinkMixer> mixer)
-    : source_(source), mixer_(mixer), codec_(), timer_(), current_format_() {
-  codec_buffer_ = {
-      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
-          kCodecBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
-      kCodecBufferLength};
-}
-
-void AudioTask::Main() {
-  for (;;) {
-    if (source_->HasNewStream() || !stream_) {
-      std::shared_ptr<codecs::IStream> new_stream = source_->NextStream();
-      if (new_stream && BeginDecoding(new_stream)) {
-        stream_ = new_stream;
-      } else {
-        continue;
-      }
-    }
-
-    if (ContinueDecoding()) {
-      events::Audio().Dispatch(internal::InputFileFinished{});
-      stream_.reset();
-    }
-  }
-}
-
-auto AudioTask::BeginDecoding(std::shared_ptr<codecs::IStream> stream) -> bool {
-  codec_.reset(codecs::CreateCodecForType(stream->type()).value_or(nullptr));
-  if (!codec_) {
-    ESP_LOGE(kTag, "no codec found");
-    return false;
-  }
-
-  auto open_res = codec_->OpenStream(stream);
-  if (open_res.has_error()) {
-    ESP_LOGE(kTag, "codec failed to start: %s",
-             codecs::ICodec::ErrorString(open_res.error()).c_str());
-    return false;
-  }
-
-  if (open_res->total_samples) {
-    timer_.reset(new Timer(open_res.value()));
-  } else {
-    timer_.reset();
-  }
-
-  current_sink_format_ = IAudioOutput::Format{
-      .sample_rate = open_res->sample_rate_hz,
-      .num_channels = open_res->num_channels,
-      .bits_per_sample = 16,
-  };
-  ESP_LOGI(kTag, "stream started ok");
-  events::Audio().Dispatch(internal::InputFileOpened{});
-  return true;
-}
-
-auto AudioTask::ContinueDecoding() -> bool {
-  auto res = codec_->DecodeTo(codec_buffer_);
-  if (res.has_error()) {
-    return true;
-  }
-
-  if (res->samples_written > 0) {
-    mixer_->MixAndSend(codec_buffer_.first(res->samples_written),
-                       current_sink_format_.value(), res->is_stream_finished);
-  }
-
-  if (timer_) {
-    timer_->AddSamples(res->samples_written);
-  }
-
-  return res->is_stream_finished;
-}
-
-}  // namespace audio
diff --git a/src/audio/include/audio_converter.hpp b/src/audio/include/audio_converter.hpp
new file mode 100644
index 00000000..81532969
--- /dev/null
+++ b/src/audio/include/audio_converter.hpp
@@ -0,0 +1,65 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "audio_sink.hpp"
+#include "audio_source.hpp"
+#include "codec.hpp"
+#include "resample.hpp"
+#include "sample.hpp"
+
+namespace audio {
+
+/*
+ * Handle to a persistent task that converts samples between formats (sample
+ * rate, channels, bits per sample), in order to put samples in the preferred
+ * format of the current output device. The resulting samples are forwarded
+ * to the output device's sink stream.
+ */
+class SampleConverter {
+ public:
+  SampleConverter();
+  ~SampleConverter();
+
+  auto SetOutput(std::shared_ptr<IAudioOutput>) -> void;
+
+  auto ConvertSamples(cpp::span<sample::Sample>,
+                      const IAudioOutput::Format& format,
+                      bool is_eos) -> void;
+
+ private:
+  auto Main() -> void;
+
+  auto SetTargetFormat(const IAudioOutput::Format& format) -> void;
+  auto HandleSamples(cpp::span<sample::Sample>, bool) -> size_t;
+
+  struct Args {
+    IAudioOutput::Format format;
+    size_t samples_available;
+    bool is_end_of_stream;
+  };
+  QueueHandle_t commands_;
+
+  std::unique_ptr<Resampler> resampler_;
+
+  StreamBufferHandle_t source_;
+  cpp::span<sample::Sample> input_buffer_;
+  cpp::span<std::byte> input_buffer_as_bytes_;
+
+  cpp::span<sample::Sample> resampled_buffer_;
+
+  std::shared_ptr<IAudioOutput> sink_;
+  IAudioOutput::Format source_format_;
+  IAudioOutput::Format target_format_;
+  size_t leftover_bytes_;
+  size_t leftover_offset_;
+};
+
+}  // namespace audio
diff --git a/src/audio/include/audio_decoder.hpp b/src/audio/include/audio_decoder.hpp
new file mode 100644
index 00000000..1759f6e4
--- /dev/null
+++ b/src/audio/include/audio_decoder.hpp
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+
+#include "audio_converter.hpp"
+#include "audio_sink.hpp"
+#include "audio_source.hpp"
+#include "codec.hpp"
+#include "track.hpp"
+#include "types.hpp"
+
+namespace audio {
+
+/*
+ * Sample-based timer for the current elapsed playback time.
+ */
+class Timer {
+ public:
+  Timer(const codecs::ICodec::OutputFormat& format);
+
+  auto AddSamples(std::size_t) -> void;
+
+ private:
+  uint32_t current_seconds_;
+  uint32_t current_sample_in_second_;
+  uint32_t samples_per_second_;
+
+  uint32_t total_duration_seconds_;
+};
+
+/*
+ * Handle to a persistent task that takes bytes from the given source, decodes
+ * them into sample::Sample (normalised to 16 bit signed PCM), and then
+ * forwards the resulting stream to the given converter.
+ */
+class Decoder {
+ public:
+  static auto Start(std::shared_ptr<IAudioSource> source,
+                    std::shared_ptr<SampleConverter> converter) -> Decoder*;
+
+  auto Main() -> void;
+
+  Decoder(const Decoder&) = delete;
+  Decoder& operator=(const Decoder&) = delete;
+
+ private:
+  Decoder(std::shared_ptr<IAudioSource> source,
+          std::shared_ptr<SampleConverter> converter);
+
+  auto BeginDecoding(std::shared_ptr<codecs::IStream>) -> bool;
+  auto ContinueDecoding() -> bool;
+
+  std::shared_ptr<IAudioSource> source_;
+  std::shared_ptr<SampleConverter> converter_;
+
+  std::shared_ptr<codecs::IStream> stream_;
+  std::unique_ptr<codecs::ICodec> codec_;
+  std::unique_ptr<Timer> timer_;
+
+  std::optional<codecs::ICodec::OutputFormat> current_format_;
+  std::optional<IAudioOutput::Format> current_sink_format_;
+
+  cpp::span<sample::Sample> codec_buffer_;
+};
+
+}  // namespace audio
diff --git a/src/audio/include/audio_fsm.hpp b/src/audio/include/audio_fsm.hpp
index 6c785426..430bc298 100644
--- a/src/audio/include/audio_fsm.hpp
+++ b/src/audio/include/audio_fsm.hpp
@@ -13,8 +13,8 @@
 #include "audio_sink.hpp"
 #include "tinyfsm.hpp"
 
+#include "audio_decoder.hpp"
 #include "audio_events.hpp"
-#include "audio_task.hpp"
 #include "bt_audio_output.hpp"
 #include "database.hpp"
 #include "display.hpp"
@@ -68,9 +68,9 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   static std::shared_ptr<drivers::I2SDac> sDac;
   static std::weak_ptr<database::Database> sDatabase;
 
-  static std::unique_ptr<AudioTask> sTask;
   static std::shared_ptr<FatfsAudioInput> sFileSource;
-  static std::shared_ptr<SinkMixer> sMixer;
+  static std::unique_ptr<Decoder> sDecoder;
+  static std::shared_ptr<SampleConverter> sSampleConverter;
   static std::shared_ptr<IAudioOutput> sOutput;
 
   static TrackQueue* sTrackQueue;
diff --git a/src/audio/include/audio_task.hpp b/src/audio/include/audio_task.hpp
deleted file mode 100644
index 08c5769c..00000000
--- a/src/audio/include/audio_task.hpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "audio_sink.hpp"
-#include "audio_source.hpp"
-#include "codec.hpp"
-#include "sink_mixer.hpp"
-#include "track.hpp"
-#include "types.hpp"
-
-namespace audio {
-
-class Timer {
- public:
-  Timer(const codecs::ICodec::OutputFormat& format);
-
-  auto AddSamples(std::size_t) -> void;
-
- private:
-  uint32_t current_seconds_;
-  uint32_t current_sample_in_second_;
-  uint32_t samples_per_second_;
-
-  uint32_t total_duration_seconds_;
-};
-
-class AudioTask {
- public:
-  static auto Start(std::shared_ptr<IAudioSource> source,
-                    std::shared_ptr<SinkMixer> mixer) -> AudioTask*;
-
-  auto Main() -> void;
-
-  AudioTask(const AudioTask&) = delete;
-  AudioTask& operator=(const AudioTask&) = delete;
-
- private:
-  AudioTask(std::shared_ptr<IAudioSource> source,
-            std::shared_ptr<SinkMixer> mixer);
-
-  auto BeginDecoding(std::shared_ptr<codecs::IStream>) -> bool;
-  auto ContinueDecoding() -> bool;
-
-  std::shared_ptr<IAudioSource> source_;
-  std::shared_ptr<SinkMixer> mixer_;
-
-  std::shared_ptr<codecs::IStream> stream_;
-  std::unique_ptr<codecs::ICodec> codec_;
-  std::unique_ptr<Timer> timer_;
-
-  std::optional<codecs::ICodec::OutputFormat> current_format_;
-  std::optional<IAudioOutput::Format> current_sink_format_;
-
-  cpp::span<sample::Sample> codec_buffer_;
-};
-
-}  // namespace audio
diff --git a/src/audio/include/sink_mixer.hpp b/src/audio/include/sink_mixer.hpp
deleted file mode 100644
index d046f835..00000000
--- a/src/audio/include/sink_mixer.hpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#pragma once
-
-#include <cstdint>
-#include <memory>
-
-#include "audio_sink.hpp"
-#include "audio_source.hpp"
-#include "codec.hpp"
-#include "resample.hpp"
-#include "sample.hpp"
-
-namespace audio {
-
-/*
- * Handles the final downmix + resample + quantisation stage of audio,
- * generation sending the result directly to an IAudioOutput.
- */
-class SinkMixer {
- public:
-  SinkMixer();
-  ~SinkMixer();
-
-  auto SetOutput(std::shared_ptr<IAudioOutput>) -> void;
-
-  auto MixAndSend(cpp::span<sample::Sample>,
-                  const IAudioOutput::Format& format,
-                  bool is_eos) -> void;
-
- private:
-  auto Main() -> void;
-
-  auto SetTargetFormat(const IAudioOutput::Format& format) -> void;
-  auto HandleSamples(cpp::span<sample::Sample>, bool) -> size_t;
-
-  struct Args {
-    IAudioOutput::Format format;
-    size_t samples_available;
-    bool is_end_of_stream;
-  };
-  QueueHandle_t commands_;
-
-  std::unique_ptr<Resampler> resampler_;
-
-  StreamBufferHandle_t source_;
-  cpp::span<sample::Sample> input_buffer_;
-  cpp::span<std::byte> input_buffer_as_bytes_;
-
-  cpp::span<sample::Sample> resampled_buffer_;
-
-  std::shared_ptr<IAudioOutput> sink_;
-  IAudioOutput::Format source_format_;
-  IAudioOutput::Format target_format_;
-  size_t leftover_bytes_;
-  size_t leftover_offset_;
-};
-
-}  // namespace audio
diff --git a/src/audio/sink_mixer.cpp b/src/audio/sink_mixer.cpp
deleted file mode 100644
index ad7198dc..00000000
--- a/src/audio/sink_mixer.cpp
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Copyright 2023 jacqueline <me@jacqueline.id.au>
- *
- * SPDX-License-Identifier: GPL-3.0-only
- */
-
-#include "sink_mixer.hpp"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-
-#include "audio_sink.hpp"
-#include "esp_heap_caps.h"
-#include "esp_log.h"
-#include "freertos/portmacro.h"
-#include "freertos/projdefs.h"
-#include "idf_additions.h"
-
-#include "resample.hpp"
-#include "sample.hpp"
-#include "tasks.hpp"
-
-static constexpr char kTag[] = "mixer";
-
-static constexpr std::size_t kSourceBufferLength = 8 * 1024;
-static constexpr std::size_t kSampleBufferLength = 240 * 2;
-
-namespace audio {
-
-SinkMixer::SinkMixer()
-    : commands_(xQueueCreate(1, sizeof(Args))),
-      resampler_(nullptr),
-      source_(xStreamBufferCreateWithCaps(kSourceBufferLength,
-                                          1,
-                                          MALLOC_CAP_SPIRAM)) {
-  input_buffer_ = {
-      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
-          kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
-      kSampleBufferLength};
-  input_buffer_as_bytes_ = {reinterpret_cast<std::byte*>(input_buffer_.data()),
-                            input_buffer_.size_bytes()};
-
-  resampled_buffer_ = {
-      reinterpret_cast<sample::Sample*>(heap_caps_calloc(
-          kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)),
-      kSampleBufferLength};
-
-  tasks::StartPersistent<tasks::Type::kMixer>([&]() { Main(); });
-}
-
-SinkMixer::~SinkMixer() {
-  vQueueDelete(commands_);
-  vStreamBufferDelete(source_);
-}
-
-auto SinkMixer::SetOutput(std::shared_ptr<IAudioOutput> output) -> void {
-  // FIXME: We should add synchronisation here, but we should be careful about
-  // not impacting performance given that the output will change only very
-  // rarely (if ever).
-  sink_ = output;
-}
-
-auto SinkMixer::MixAndSend(cpp::span<sample::Sample> input,
-                           const IAudioOutput::Format& format,
-                           bool is_eos) -> void {
-  Args args{
-      .format = format,
-      .samples_available = input.size(),
-      .is_end_of_stream = is_eos,
-  };
-  xQueueSend(commands_, &args, portMAX_DELAY);
-
-  cpp::span<std::byte> input_as_bytes = {
-      reinterpret_cast<std::byte*>(input.data()), input.size_bytes()};
-  size_t bytes_sent = 0;
-  while (bytes_sent < input_as_bytes.size()) {
-    bytes_sent +=
-        xStreamBufferSend(source_, input_as_bytes.subspan(bytes_sent).data(),
-                          input_as_bytes.size() - bytes_sent, portMAX_DELAY);
-  }
-}
-
-auto SinkMixer::Main() -> void {
-  for (;;) {
-    Args args;
-    while (!xQueueReceive(commands_, &args, portMAX_DELAY)) {
-    }
-    if (args.format != source_format_) {
-      resampler_.reset();
-      source_format_ = args.format;
-      leftover_bytes_ = 0;
-      leftover_offset_ = 0;
-
-      auto new_target = sink_->PrepareFormat(args.format);
-      if (new_target != target_format_) {
-        // The new format is different to the old one. Wait for the sink to
-        // drain before continuing.
-        while (!xStreamBufferIsEmpty(sink_->stream())) {
-          ESP_LOGI(kTag, "waiting for sink stream to drain...");
-          // TODO(jacqueline): Get the sink drain ISR to notify us of this
-          // via semaphore instead of busy-ish waiting.
-          vTaskDelay(pdMS_TO_TICKS(10));
-        }
-
-        sink_->Configure(new_target);
-      }
-      target_format_ = new_target;
-    }
-
-    // Loop until we finish reading all the bytes indicated. There might be
-    // leftovers from each iteration, and from this process as a whole,
-    // depending on the resampling stage.
-    size_t bytes_read = 0;
-    size_t bytes_to_read = args.samples_available * sizeof(sample::Sample);
-    while (bytes_read < bytes_to_read) {
-      // First top up the input buffer, taking care not to overwrite anything
-      // remaining from a previous iteration.
-      size_t bytes_read_this_it = xStreamBufferReceive(
-          source_,
-          input_buffer_as_bytes_.subspan(leftover_offset_ + leftover_bytes_)
-              .data(),
-          std::min(input_buffer_as_bytes_.size() - leftover_offset_ -
-                       leftover_bytes_,
-                   bytes_to_read - bytes_read),
-          portMAX_DELAY);
-      bytes_read += bytes_read_this_it;
-
-      // Calculate the number of whole samples that are now in the input buffer.
-      size_t bytes_in_buffer = bytes_read_this_it + leftover_bytes_;
-      size_t samples_in_buffer = bytes_in_buffer / sizeof(sample::Sample);
-
-      size_t samples_used = HandleSamples(
-          input_buffer_.subspan(leftover_offset_).first(samples_in_buffer),
-          args.is_end_of_stream && bytes_read == bytes_to_read);
-
-      // Maybe the resampler didn't consume everything. Maybe the last few
-      // bytes we read were half a frame. Either way, we need to calculate the
-      // size of the remainder in bytes.
-      size_t bytes_used = samples_used * sizeof(sample::Sample);
-      assert(bytes_used <= bytes_in_buffer);
-      leftover_bytes_ = bytes_in_buffer - bytes_used;
-      if (leftover_bytes_ == 0) {
-        leftover_offset_ = 0;
-      } else {
-        leftover_offset_ += bytes_used;
-      }
-    }
-  }
-}
-
-auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos)
-    -> size_t {
-  if (source_format_ == target_format_) {
-    // The happiest possible case: the input format matches the output
-    // format already.
-    std::size_t bytes_sent = xStreamBufferSend(
-        sink_->stream(), input.data(), input.size_bytes(), portMAX_DELAY);
-    return bytes_sent / sizeof(sample::Sample);
-  }
-
-  size_t samples_used = 0;
-  while (samples_used < input.size()) {
-    cpp::span<sample::Sample> output_source;
-    if (source_format_.sample_rate != target_format_.sample_rate) {
-      if (resampler_ == nullptr) {
-        ESP_LOGI(kTag, "creating new resampler for %lu -> %lu",
-                 source_format_.sample_rate, target_format_.sample_rate);
-        resampler_.reset(new Resampler(source_format_.sample_rate,
-                                       target_format_.sample_rate,
-                                       source_format_.num_channels));
-      }
-
-      size_t read, written;
-      std::tie(read, written) = resampler_->Process(input.subspan(samples_used),
-                                                    resampled_buffer_, is_eos);
-      samples_used += read;
-
-      if (read == 0 && written == 0) {
-        // Zero samples used or written. We need more input.
-        break;
-      }
-      output_source = resampled_buffer_.first(written);
-    } else {
-      output_source = input;
-      samples_used = input.size();
-    }
-
-    size_t bytes_sent = 0;
-    size_t bytes_to_send = output_source.size_bytes();
-    while (bytes_sent < bytes_to_send) {
-      bytes_sent += xStreamBufferSend(
-          sink_->stream(),
-          reinterpret_cast<std::byte*>(output_source.data()) + bytes_sent,
-          bytes_to_send - bytes_sent, portMAX_DELAY);
-    }
-  }
-  return samples_used;
-}
-
-}  // namespace audio
-- 
cgit v1.2.3