13 files changed, 484 insertions, 354 deletions
diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt
index 91d3f319..2d98198b 100644
--- a/src/codecs/CMakeLists.txt
+++ b/src/codecs/CMakeLists.txt
@@ -4,6 +4,7 @@
 
 idf_component_register(
   SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "opus.cpp" "vorbis.cpp"
+       "source_buffer.cpp"
   INCLUDE_DIRS "include"
   REQUIRES "result" "span" "libmad" "libfoxenflac" "tremor" "opusfile")
 
diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp
index 9ac20097..a4c1a5cf 100644
--- a/src/codecs/codec.cpp
+++ b/src/codecs/codec.cpp
@@ -10,10 +10,10 @@
 #include <optional>
 
 #include "foxenflac.hpp"
-#include "opus.hpp"
 #include "mad.hpp"
-#include "vorbis.hpp"
+#include "opus.hpp"
 #include "types.hpp"
+#include "vorbis.hpp"
 
 namespace codecs {
 
diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp
index b676f82a..cc110920 100644
--- a/src/codecs/foxenflac.cpp
+++ b/src/codecs/foxenflac.cpp
@@ -19,23 +19,34 @@ namespace codecs {
 static const char kTag[] = "flac";
 
 FoxenFlacDecoder::FoxenFlacDecoder()
-    : flac_(FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, 2)) {}
+    : input_(), buffer_(), flac_(FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, 2)) {}
 
 FoxenFlacDecoder::~FoxenFlacDecoder() {
   free(flac_);
 }
 
-auto FoxenFlacDecoder::BeginStream(const cpp::span<const std::byte> input)
-    -> Result<OutputFormat> {
-  uint32_t bytes_used = input.size_bytes();
-  fx_flac_state_t state =
-      fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
-                      &bytes_used, NULL, NULL);
+auto FoxenFlacDecoder::OpenStream(std::shared_ptr<IStream> input)
+    -> cpp::result<OutputFormat, Error> {
+  input_ = input;
+
+  bool eof = false;
+  fx_flac_state_t state;
+  do {
+    eof = buffer_.Refill(input_.get());
+    buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t {
+      uint32_t bytes_used = buf.size();
+      state =
+          fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(buf.data()),
+                          &bytes_used, NULL, NULL);
+      return bytes_used;
+    });
+  } while (state != FLAC_END_OF_METADATA && !eof);
+
   if (state != FLAC_END_OF_METADATA) {
     if (state == FLAC_ERR) {
-      return {bytes_used, cpp::fail(Error::kMalformedData)};
+      return cpp::fail(Error::kMalformedData);
     } else {
-      return {bytes_used, cpp::fail(Error::kOutOfInput)};
+      return cpp::fail(Error::kOutOfInput);
     }
   }
 
@@ -43,14 +54,12 @@ auto FoxenFlacDecoder::BeginStream(const cpp::span<const std::byte> input)
   int64_t fs = fx_flac_get_streaminfo(flac_, FLAC_KEY_SAMPLE_RATE);
   if (channels == FLAC_INVALID_METADATA_KEY ||
       fs == FLAC_INVALID_METADATA_KEY) {
-    return {bytes_used, cpp::fail(Error::kMalformedData)};
+    return cpp::fail(Error::kMalformedData);
   }
 
   OutputFormat format{
       .num_channels = static_cast<uint8_t>(channels),
       .sample_rate_hz = static_cast<uint32_t>(fs),
-      .duration_seconds = {},
-      .bits_per_second = {},
   };
 
   uint64_t num_samples = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_SAMPLES);
@@ -58,38 +67,32 @@ auto FoxenFlacDecoder::BeginStream(const cpp::span<const std::byte> input)
     format.duration_seconds = num_samples / fs;
   }
 
-  return {bytes_used, format};
+  return format;
 }
 
-auto FoxenFlacDecoder::ContinueStream(cpp::span<const std::byte> input,
-                                      cpp::span<sample::Sample> output)
-    -> Result<OutputInfo> {
-  cpp::span<int32_t> output_as_samples{
-      reinterpret_cast<int32_t*>(output.data()), output.size_bytes() / 4};
-  uint32_t bytes_read = input.size_bytes();
-  uint32_t samples_written = output_as_samples.size();
-
-  fx_flac_state_t state =
-      fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
-                      &bytes_read, output_as_samples.data(), &samples_written);
-  if (state == FLAC_ERR) {
-    return {bytes_read, cpp::fail(Error::kMalformedData)};
-  }
+auto FoxenFlacDecoder::DecodeTo(cpp::span<sample::Sample> output)
+    -> cpp::result<OutputInfo, Error> {
+  bool is_eof = buffer_.Refill(input_.get());
 
-  if (samples_written > 0) {
-    return {bytes_read,
-            OutputInfo{.samples_written = samples_written,
-                       .is_finished_writing = state == FLAC_END_OF_FRAME}};
+  fx_flac_state_t state;
+  uint32_t samples_written = output.size();
+
+  buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t {
+    uint32_t bytes_read = buf.size_bytes();
+    state = fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(buf.data()),
+                            &bytes_read, output.data(), &samples_written);
+    return bytes_read;
+  });
+  if (state == FLAC_ERR) {
+    return cpp::fail(Error::kMalformedData);
   }
 
-  // No error, but no samples written. We must be out of data.
-  return {bytes_read, cpp::fail(Error::kOutOfInput)};
+  return OutputInfo{.samples_written = samples_written,
+                    .is_stream_finished = samples_written == 0 && is_eof};
 }
 
-auto FoxenFlacDecoder::SeekStream(cpp::span<const std::byte> input,
-                                  std::size_t target_sample) -> Result<void> {
-  // TODO(jacqueline): Implement me.
-  return {0, {}};
+auto FoxenFlacDecoder::SeekTo(size_t target) -> cpp::result<void, Error> {
+  return {};
 }
 
 }  // namespace codecs
diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp
index 32ebef69..ece3d4fe 100644
--- a/src/codecs/include/codec.hpp
+++ b/src/codecs/include/codec.hpp
@@ -24,6 +24,34 @@
 namespace codecs {
 
 /*
+ * Interface for an abstract source of file-like data.
+ */
+class IStream {
+ public:
+  IStream(StreamType t) : t_(t) {}
+  virtual ~IStream() {}
+
+  auto type() -> StreamType { return t_; }
+
+  virtual auto Read(cpp::span<std::byte> dest) -> ssize_t = 0;
+
+  virtual auto CanSeek() -> bool = 0;
+
+  enum class SeekFrom {
+    kStartOfStream,
+    kEndOfStream,
+    kCurrentPosition,
+  };
+
+  virtual auto SeekTo(int64_t destination, SeekFrom from) -> void = 0;
+
+  virtual auto CurrentPosition() -> int64_t = 0;
+
+ protected:
+  StreamType t_;
+};
+
+/*
  * Common interface to be implemented by all audio decoders.
  */
 class ICodec {
@@ -63,32 +91,30 @@ class ICodec {
   struct OutputFormat {
     uint8_t num_channels;
     uint32_t sample_rate_hz;
-
     std::optional<uint32_t> duration_seconds;
-    std::optional<uint32_t> bits_per_second;
+
+    bool operator==(const OutputFormat&) const = default;
   };
 
   /*
    * Decodes metadata or headers from the given input stream, and returns the
    * format for the samples that will be decoded from it.
    */
-  virtual auto BeginStream(cpp::span<const std::byte> input)
-      -> Result<OutputFormat> = 0;
+  virtual auto OpenStream(std::shared_ptr<IStream> input)
+      -> cpp::result<OutputFormat, Error> = 0;
 
   struct OutputInfo {
     std::size_t samples_written;
-    bool is_finished_writing;
+    bool is_stream_finished;
   };
 
   /*
    * Writes PCM samples to the given output buffer.
    */
-  virtual auto ContinueStream(cpp::span<const std::byte> input,
-                              cpp::span<sample::Sample> output)
-      -> Result<OutputInfo> = 0;
+  virtual auto DecodeTo(cpp::span<sample::Sample> destination)
+      -> cpp::result<OutputInfo, Error> = 0;
 
-  virtual auto SeekStream(cpp::span<const std::byte> input,
-                          std::size_t target_sample) -> Result<void> = 0;
+  virtual auto SeekTo(size_t target_sample) -> cpp::result<void, Error> = 0;
 };
 
 auto CreateCodecForType(StreamType type) -> std::optional<ICodec*>;
diff --git a/src/codecs/include/foxenflac.hpp b/src/codecs/include/foxenflac.hpp
index abfa6d80..7522d967 100644
--- a/src/codecs/include/foxenflac.hpp
+++ b/src/codecs/include/foxenflac.hpp
@@ -15,6 +15,7 @@
 
 #include "foxen/flac.h"
 #include "sample.hpp"
+#include "source_buffer.hpp"
 #include "span.hpp"
 
 #include "codec.hpp"
@@ -26,13 +27,21 @@ class FoxenFlacDecoder : public ICodec {
   FoxenFlacDecoder();
   ~FoxenFlacDecoder();
 
-  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
-  auto ContinueStream(cpp::span<const std::byte>, cpp::span<sample::Sample>)
-      -> Result<OutputInfo> override;
-  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-      -> Result<void> override;
+  auto OpenStream(std::shared_ptr<IStream> input)
+      -> cpp::result<OutputFormat, Error> override;
+
+  auto DecodeTo(cpp::span<sample::Sample> destination)
+      -> cpp::result<OutputInfo, Error> override;
+
+  auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override;
+
+  FoxenFlacDecoder(const FoxenFlacDecoder&) = delete;
+  FoxenFlacDecoder& operator=(const FoxenFlacDecoder&) = delete;
 
  private:
+  std::shared_ptr<IStream> input_;
+  SourceBuffer buffer_;
+
   fx_flac_t* flac_;
 };
 
diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp
index b81e4acb..2a8813e9 100644
--- a/src/codecs/include/mad.hpp
+++ b/src/codecs/include/mad.hpp
@@ -14,6 +14,7 @@
 
 #include "mad.h"
 #include "sample.hpp"
+#include "source_buffer.hpp"
 #include "span.hpp"
 
 #include "codec.hpp"
@@ -25,33 +26,31 @@ class MadMp3Decoder : public ICodec {
   MadMp3Decoder();
   ~MadMp3Decoder();
 
-  /*
-   * Returns the output format for the next frame in the stream. MP3 streams
-   * may represent multiple distinct tracks, with different bitrates, and so we
-   * handle the stream only on a frame-by-frame basis.
-   */
-  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+  auto OpenStream(std::shared_ptr<IStream> input)
+      -> cpp::result<OutputFormat, Error> override;
 
-  /*
-   * Writes samples for the current frame.
-   */
-  auto ContinueStream(cpp::span<const std::byte> input,
-                      cpp::span<sample::Sample> output)
-      -> Result<OutputInfo> override;
+  auto DecodeTo(cpp::span<sample::Sample> destination)
+      -> cpp::result<OutputInfo, Error> override;
 
-  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-      -> Result<void> override;
+  auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override;
+
+  MadMp3Decoder(const MadMp3Decoder&) = delete;
+  MadMp3Decoder& operator=(const MadMp3Decoder&) = delete;
 
  private:
   auto GetVbrLength(const mad_header& header) -> std::optional<uint32_t>;
+  auto GetBytesUsed() -> std::size_t;
+
+  std::shared_ptr<IStream> input_;
+  SourceBuffer buffer_;
 
   mad_stream stream_;
   mad_frame frame_;
   mad_synth synth_;
 
   int current_sample_;
-
-  auto GetBytesUsed(std::size_t) -> std::size_t;
+  bool is_eof_;
+  bool is_eos_;
 };
 
 }  // namespace codecs
diff --git a/src/codecs/include/opus.hpp b/src/codecs/include/opus.hpp
index 051cd0b9..45b1b07a 100644
--- a/src/codecs/include/opus.hpp
+++ b/src/codecs/include/opus.hpp
@@ -26,30 +26,21 @@ class XiphOpusDecoder : public ICodec {
   XiphOpusDecoder();
   ~XiphOpusDecoder();
 
-  /*
-   * Returns the output format for the next frame in the stream. MP3 streams
-   * may represent multiple distinct tracks, with different bitrates, and so we
-   * handle the stream only on a frame-by-frame basis.
-   */
-  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
-
-  /*
-   * Writes samples for the current frame.
-   */
-  auto ContinueStream(cpp::span<const std::byte> input,
-                      cpp::span<sample::Sample> output)
-      -> Result<OutputInfo> override;
-
-  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-      -> Result<void> override;
-
-  auto ReadCallback() -> cpp::span<const std::byte>;
-  auto AfterReadCallback(size_t bytes_read) -> void;
+  auto OpenStream(std::shared_ptr<IStream> input)
+      -> cpp::result<OutputFormat, Error> override;
+
+  auto DecodeTo(cpp::span<sample::Sample> destination)
+      -> cpp::result<OutputInfo, Error> override;
+
+  auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override;
+
+  XiphOpusDecoder(const XiphOpusDecoder&) = delete;
+  XiphOpusDecoder& operator=(const XiphOpusDecoder&) = delete;
 
  private:
+  std::shared_ptr<IStream> input_;
   OggOpusFile* opus_;
-  cpp::span<const std::byte> input_;
-  size_t pos_in_input_;
+  uint8_t num_channels_;
 };
 
 }  // namespace codecs
diff --git a/src/codecs/include/source_buffer.hpp b/src/codecs/include/source_buffer.hpp
new file mode 100644
index 00000000..d0d7635a
--- /dev/null
+++ b/src/codecs/include/source_buffer.hpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+
+#include "span.hpp"
+
+#include "codec.hpp"
+
+namespace codecs {
+
+class SourceBuffer {
+ public:
+  SourceBuffer();
+  ~SourceBuffer();
+
+  auto Refill(IStream* src) -> bool;
+  auto AddBytes(std::function<size_t(cpp::span<std::byte>)> writer) -> void;
+  auto ConsumeBytes(std::function<size_t(cpp::span<std::byte>)> reader) -> void;
+
+  SourceBuffer(const SourceBuffer&) = delete;
+  SourceBuffer& operator=(const SourceBuffer&) = delete;
+
+ private:
+  const cpp::span<std::byte> buffer_;
+  size_t bytes_in_buffer_;
+  size_t offset_of_bytes_;
+};
+
+}  // namespace codecs
diff --git a/src/codecs/include/vorbis.hpp b/src/codecs/include/vorbis.hpp
index ab15af19..2f93c37e 100644
--- a/src/codecs/include/vorbis.hpp
+++ b/src/codecs/include/vorbis.hpp
@@ -28,30 +28,20 @@ class TremorVorbisDecoder : public ICodec {
   TremorVorbisDecoder();
   ~TremorVorbisDecoder();
 
-  /*
-   * Returns the output format for the next frame in the stream. MP3 streams
-   * may represent multiple distinct tracks, with different bitrates, and so we
-   * handle the stream only on a frame-by-frame basis.
-   */
-  auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
-
-  /*
-   * Writes samples for the current frame.
-   */
-  auto ContinueStream(cpp::span<const std::byte> input,
-                      cpp::span<sample::Sample> output)
-      -> Result<OutputInfo> override;
-
-  auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
-      -> Result<void> override;
-
-  auto ReadCallback() -> cpp::span<const std::byte>;
-  auto AfterReadCallback(size_t bytes_read) -> void;
+  auto OpenStream(std::shared_ptr<IStream> input)
+      -> cpp::result<OutputFormat, Error> override;
+
+  auto DecodeTo(cpp::span<sample::Sample> destination)
+      -> cpp::result<OutputInfo, Error> override;
+
+  auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override;
+
+  TremorVorbisDecoder(const TremorVorbisDecoder&) = delete;
+  TremorVorbisDecoder& operator=(const TremorVorbisDecoder&) = delete;
 
  private:
+  std::shared_ptr<IStream> input_;
   OggVorbis_File vorbis_;
-  cpp::span<const std::byte> input_;
-  size_t pos_in_input_;
 };
 
 }  // namespace codecs
diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp
index a2739bcd..ce3a9cac 100644
--- a/src/codecs/mad.cpp
+++ b/src/codecs/mad.cpp
@@ -22,7 +22,10 @@
 
 namespace codecs {
 
-MadMp3Decoder::MadMp3Decoder() {
+static constexpr char kTag[] = "mad";
+
+MadMp3Decoder::MadMp3Decoder()
+    : input_(), buffer_(), current_sample_(-1), is_eof_(false), is_eos_(false) {
   mad_stream_init(&stream_);
   mad_frame_init(&frame_);
   mad_synth_init(&synth_);
@@ -33,185 +36,145 @@ MadMp3Decoder::~MadMp3Decoder() {
   mad_synth_finish(&synth_);
 }
 
-auto MadMp3Decoder::GetBytesUsed(std::size_t buffer_size) -> std::size_t {
+auto MadMp3Decoder::GetBytesUsed() -> std::size_t {
   if (stream_.next_frame) {
-    std::size_t remaining = stream_.bufend - stream_.next_frame;
-    return buffer_size - remaining;
+    return stream_.next_frame - stream_.buffer;
   } else {
     return stream_.bufend - stream_.buffer;
   }
 }
 
-auto MadMp3Decoder::BeginStream(const cpp::span<const std::byte> input)
-    -> Result<OutputFormat> {
-  mad_stream_buffer(&stream_,
-                    reinterpret_cast<const unsigned char*>(input.data()),
-                    input.size_bytes());
-  // Whatever was last synthesized is now invalid, so ensure we don't try to
-  // send it.
-  current_sample_ = -1;
+auto MadMp3Decoder::OpenStream(std::shared_ptr<IStream> input)
+    -> cpp::result<OutputFormat, ICodec::Error> {
+  input_ = input;
 
   // To get the output format for MP3 streams, we simply need to decode the
   // first frame header.
   mad_header header;
   mad_header_init(&header);
-  while (mad_header_decode(&header, &stream_) < 0) {
-    if (MAD_RECOVERABLE(stream_.error)) {
-      // Recoverable errors are usually malformed parts of the stream.
-      // We can recover from them by just retrying the decode.
-      continue;
-    }
-    if (stream_.error == MAD_ERROR_BUFLEN) {
-      return {GetBytesUsed(input.size_bytes()), cpp::fail(Error::kOutOfInput)};
-    }
-    return {GetBytesUsed(input.size_bytes()), cpp::fail(Error::kMalformedData)};
+  bool eof = false;
+  bool got_header = false;
+  while (!eof && !got_header) {
+    eof = buffer_.Refill(input_.get());
+
+    buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t {
+      mad_stream_buffer(&stream_,
+                        reinterpret_cast<const unsigned char*>(buf.data()),
+                        buf.size_bytes());
+
+      while (mad_header_decode(&header, &stream_) < 0) {
+        if (MAD_RECOVERABLE(stream_.error)) {
+          // Recoverable errors are usually malformed parts of the stream.
+          // We can recover from them by just retrying the decode.
+          continue;
+        }
+        if (stream_.error == MAD_ERROR_BUFLEN) {
+          return GetBytesUsed();
+        }
+        eof = true;
+        return 0;
+      }
+
+      got_header = true;
+      return GetBytesUsed();
+    });
+  }
+
+  if (!got_header) {
+    return cpp::fail(ICodec::Error::kMalformedData);
   }
 
   uint8_t channels = MAD_NCHANNELS(&header);
   OutputFormat output{
       .num_channels = channels,
       .sample_rate_hz = header.samplerate,
-      .duration_seconds = {},
-      .bits_per_second = {},
   };
 
   auto vbr_length = GetVbrLength(header);
   if (vbr_length) {
     output.duration_seconds = vbr_length;
-  } else {
-    output.bits_per_second = header.bitrate;
   }
-
-  return {GetBytesUsed(input.size_bytes()), output};
+  return output;
 }
 
-auto MadMp3Decoder::ContinueStream(cpp::span<const std::byte> input,
-                                   cpp::span<sample::Sample> output)
-    -> Result<OutputInfo> {
-  std::size_t bytes_read = 0;
-  if (current_sample_ < 0) {
-    mad_stream_buffer(&stream_,
-                      reinterpret_cast<const unsigned char*>(input.data()),
-                      input.size());
-
-    // Decode the next frame. To signal errors, this returns -1 and
-    // stashes an error code in the stream structure.
-    while (mad_frame_decode(&frame_, &stream_) < 0) {
-      if (MAD_RECOVERABLE(stream_.error)) {
-        // Recoverable errors are usually malformed parts of the stream.
-        // We can recover from them by just retrying the decode.
-        continue;
-      }
-      if (stream_.error == MAD_ERROR_BUFLEN) {
-        // The decoder ran out of bytes before it completed a frame. We
-        // need to return back to the caller to give us more data.
-        return {GetBytesUsed(input.size_bytes()),
-                cpp::fail(Error::kOutOfInput)};
+auto MadMp3Decoder::DecodeTo(cpp::span<sample::Sample> output)
+    -> cpp::result<OutputInfo, Error> {
+  if (current_sample_ < 0 && !is_eos_) {
+    if (!is_eof_) {
+      is_eof_ = buffer_.Refill(input_.get());
+      if (is_eof_) {
+        buffer_.AddBytes([&](cpp::span<std::byte> buf) -> size_t {
+          if (buf.size() < 8) {
+            is_eof_ = false;
+            return 0;
+          }
+          ESP_LOGI(kTag, "adding MAD_HEADER_GUARD");
+          std::fill_n(buf.begin(), 8, std::byte(0));
+          return 8;
+        });
       }
-      // The error is unrecoverable. Give up.
-      return {GetBytesUsed(input.size_bytes()),
-              cpp::fail(Error::kMalformedData)};
     }
 
-    // We've successfully decoded a frame! Now synthesize samples to write out.
-    mad_synth_frame(&synth_, &frame_);
-    current_sample_ = 0;
-    bytes_read = GetBytesUsed(input.size_bytes());
+    buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t {
+      mad_stream_buffer(&stream_,
+                        reinterpret_cast<const unsigned char*>(buf.data()),
+                        buf.size());
+
+      // Decode the next frame. To signal errors, this returns -1 and
+      // stashes an error code in the stream structure.
+      while (mad_frame_decode(&frame_, &stream_) < 0) {
+        if (MAD_RECOVERABLE(stream_.error)) {
+          // Recoverable errors are usually malformed parts of the stream.
+          // We can recover from them by just retrying the decode.
+          continue;
+        }
+        if (stream_.error == MAD_ERROR_BUFLEN) {
+          if (is_eof_) {
+            ESP_LOGI(kTag, "BUFLEN while eof; this is eos");
+            is_eos_ = true;
+          }
+          return GetBytesUsed();
+        }
+        // The error is unrecoverable. Give up.
+        is_eof_ = true;
+        is_eos_ = true;
+        return 0;
+      }
+
+      // We've successfully decoded a frame! Now synthesize samples to write
+      // out.
+      mad_synth_frame(&synth_, &frame_);
+      current_sample_ = 0;
+      return GetBytesUsed();
+    });
   }
 
   size_t output_sample = 0;
-  while (current_sample_ < synth_.pcm.length) {
-    if (output_sample + synth_.pcm.channels >= output.size()) {
-      // We can't fit the next full frame into the buffer.
-      return {bytes_read, OutputInfo{.samples_written = output_sample,
-                                     .is_finished_writing = false}};
-    }
+  if (current_sample_ >= 0) {
+    while (current_sample_ < synth_.pcm.length) {
+      if (output_sample + synth_.pcm.channels >= output.size()) {
+        // We can't fit the next full frame into the buffer.
+        return OutputInfo{.samples_written = output_sample,
+                          .is_stream_finished = false};
+      }
 
-    for (int channel = 0; channel < synth_.pcm.channels; channel++) {
-      output[output_sample++] =
-          sample::FromMad(synth_.pcm.samples[channel][current_sample_]);
+      for (int channel = 0; channel < synth_.pcm.channels; channel++) {
+        output[output_sample++] =
+            sample::FromMad(synth_.pcm.samples[channel][current_sample_]);
+      }
+      current_sample_++;
     }
-    current_sample_++;
   }
 
   // We wrote everything! Reset, ready for the next frame.
   current_sample_ = -1;
-  return {bytes_read, OutputInfo{.samples_written = output_sample,
-                                 .is_finished_writing = true}};
+  return OutputInfo{.samples_written = output_sample,
+                    .is_stream_finished = is_eos_};
 }
 
-auto MadMp3Decoder::SeekStream(cpp::span<const std::byte> input,
-                               std::size_t target_sample) -> Result<void> {
-  mad_stream_buffer(&stream_,
-                    reinterpret_cast<const unsigned char*>(input.data()),
-                    input.size());
-  std::size_t current_sample = 0;
-  std::size_t samples_per_frame = 0;
-  while (true) {
-    current_sample += samples_per_frame;
-
-    // First, decode the header for this frame.
-    mad_header header;
-    mad_header_init(&header);
-    while (mad_header_decode(&header, &stream_) < 0) {
-      if (MAD_RECOVERABLE(stream_.error)) {
-        // Recoverable errors are usually malformed parts of the stream.
-        // We can recover from them by just retrying the decode.
-        continue;
-      } else {
-        // Don't bother checking for other errors; if the first part of the
-        // stream doesn't even contain a header then something's gone wrong.
-        return {GetBytesUsed(input.size_bytes()),
-                cpp::fail(Error::kMalformedData)};
-      }
-    }
-
-    // Calculate samples per frame if we haven't already.
-    if (samples_per_frame == 0) {
-      samples_per_frame = 32 * MAD_NSBSAMPLES(&header);
-    }
-
-    // Work out how close we are to the target.
-    std::size_t samples_to_go = target_sample - current_sample;
-    std::size_t frames_to_go = samples_to_go / samples_per_frame;
-    if (frames_to_go > 3) {
-      // The target is far in the distance. Keep skipping through headers only.
-      continue;
-    }
-
-    // The target is within the next few frames. We should decode these, as per
-    // the LAME FAQ (https://lame.sourceforge.io/tech-FAQ.txt):
-    // > The MP3 data for frame N is not stored in frame N, but can be spread
-    // > over several frames.  In a typical case, the data for frame N will
-    // > have 20% of it stored in frame N-1 and 80% stored in frame N.
-    while (mad_frame_decode(&frame_, &stream_) < 0) {
-      if (MAD_RECOVERABLE(stream_.error)) {
-        continue;
-      }
-      if (stream_.error == MAD_ERROR_BUFLEN) {
-        return {GetBytesUsed(input.size_bytes()),
-                cpp::fail(Error::kOutOfInput)};
-      }
-      // The error is unrecoverable. Give up.
-      return {GetBytesUsed(input.size_bytes()),
-              cpp::fail(Error::kMalformedData)};
-    }
-
-    if (frames_to_go <= 1) {
-      // The target is within the next couple of frames. We should start
-      // synthesizing a frame early because this guy says so:
-      // https://lists.mars.org/hyperkitty/list/mad-dev@lists.mars.org/message/UZSHXZTIZEF7FZ4KFOR65DUCKAY2OCUT/
-      mad_synth_frame(&synth_, &frame_);
-    }
-
-    if (frames_to_go == 0) {
-      // The target is actually within this frame! Set up for the ContinueStream
-      // call.
-      current_sample_ =
-          (target_sample > current_sample) ? target_sample - current_sample : 0;
-      return {GetBytesUsed(input.size_bytes()), {}};
-    }
-  }
+auto MadMp3Decoder::SeekTo(std::size_t target_sample)
+    -> cpp::result<void, Error> {
+  return {};
 }
 
 /*
diff --git a/src/codecs/opus.cpp b/src/codecs/opus.cpp
index a71c5fc0..70ec9e45 100644
--- a/src/codecs/opus.cpp
+++ b/src/codecs/opus.cpp
@@ -8,6 +8,7 @@
 
 #include <stdint.h>
 #include <sys/_stdint.h>
+#include <sys/unistd.h>
 
 #include <cstdint>
 #include <cstring>
@@ -27,23 +28,49 @@ namespace codecs {
 
 static constexpr char kTag[] = "opus";
 
-int read_cb(void* instance, unsigned char* ptr, int nbytes) {
-  XiphOpusDecoder* dec = reinterpret_cast<XiphOpusDecoder*>(instance);
-  auto input = dec->ReadCallback();
-  size_t amount_to_read = std::min<size_t>(nbytes, input.size_bytes());
-  std::memcpy(ptr, input.data(), amount_to_read);
-  dec->AfterReadCallback(amount_to_read);
-  return amount_to_read;
+static int read_cb(void* src, unsigned char* ptr, int nbytes) {
+  IStream* source = reinterpret_cast<IStream*>(src);
+  return source->Read(
+      {reinterpret_cast<std::byte*>(ptr), static_cast<size_t>(nbytes)});
+}
+
+static int seek_cb(void* src, int64_t offset, int whence) {
+  IStream* source = reinterpret_cast<IStream*>(src);
+  if (!source->CanSeek()) {
+    return -1;
+  }
+  IStream::SeekFrom from;
+  switch (whence) {
+    case SEEK_CUR:
+      from = IStream::SeekFrom::kCurrentPosition;
+      break;
+    case SEEK_END:
+      from = IStream::SeekFrom::kEndOfStream;
+      break;
+    case SEEK_SET:
+      from = IStream::SeekFrom::kStartOfStream;
+      break;
+    default:
+      return -1;
+  }
+  source->SeekTo(offset, from);
+  return 0;
+}
+
+static int64_t tell_cb(void* src) {
+  IStream* source = reinterpret_cast<IStream*>(src);
+  return source->CurrentPosition();
 }
 
 static const OpusFileCallbacks kCallbacks{
     .read = read_cb,
-    .seek = NULL,
-    .tell = NULL,  // Not seekable
+    .seek = seek_cb,
+    .tell = tell_cb,
     .close = NULL,
 };
 
-XiphOpusDecoder::XiphOpusDecoder() : opus_(nullptr) {}
+XiphOpusDecoder::XiphOpusDecoder()
+    : input_(nullptr), opus_(nullptr), num_channels_() {}
 
 XiphOpusDecoder::~XiphOpusDecoder() {
   if (opus_ != nullptr) {
@@ -51,12 +78,12 @@ XiphOpusDecoder::~XiphOpusDecoder() {
   }
 }
 
-auto XiphOpusDecoder::BeginStream(const cpp::span<const std::byte> input)
-    -> Result<OutputFormat> {
+auto XiphOpusDecoder::OpenStream(std::shared_ptr<IStream> input)
+    -> cpp::result<OutputFormat, Error> {
+  input_ = input;
+
   int res;
-  opus_ = op_open_callbacks(
-      this, &kCallbacks, reinterpret_cast<const unsigned char*>(input.data()),
-      input.size(), &res);
+  opus_ = op_open_callbacks(input.get(), &kCallbacks, nullptr, 0, &res);
 
   if (res < 0) {
     std::string err;
@@ -64,60 +91,72 @@ auto XiphOpusDecoder::BeginStream(const cpp::span<const std::byte> input)
       case OP_EREAD:
         err = "OP_EREAD";
         break;
+      case OP_EFAULT:
+        err = "OP_EFAULT";
+        break;
+      case OP_EIMPL:
+        err = "OP_EIMPL";
+        break;
+      case OP_EINVAL:
+        err = "OP_EINVAL";
+        break;
+      case OP_ENOTFORMAT:
+        err = "OP_ENOTFORMAT";
+        break;
+      case OP_EBADHEADER:
+        err = "OP_EBADHEADER";
+        break;
+      case OP_EVERSION:
+        err = "OP_EVERSION";
+        break;
+      case OP_EBADLINK:
+        err = "OP_EBADLINK";
+        break;
+      case OP_EBADTIMESTAMP:
+        err = "OP_BADTIMESTAMP";
+        break;
       default:
         err = "unknown";
     }
     ESP_LOGE(kTag, "error beginning stream: %s", err.c_str());
-    return {input.size(), cpp::fail(Error::kMalformedData)};
+    return cpp::fail(Error::kMalformedData);
   }
 
-  return {input.size(), OutputFormat{
-                            .num_channels = 2,
-                            .sample_rate_hz = 48000,
-                        }};
+  num_channels_ = std::min<uint8_t>(2, op_channel_count(opus_, -1));
+
+  return OutputFormat{
+      .num_channels = num_channels_,
+      .sample_rate_hz = 48000,
+  };
 }
 
-auto XiphOpusDecoder::ContinueStream(cpp::span<const std::byte> input,
-                                     cpp::span<sample::Sample> output)
-    -> Result<OutputInfo> {
+auto XiphOpusDecoder::DecodeTo(cpp::span<sample::Sample> output)
+    -> cpp::result<OutputInfo, Error> {
   cpp::span<int16_t> staging_buffer{
       reinterpret_cast<int16_t*>(output.subspan(output.size() / 2).data()),
       output.size_bytes() / 2};
 
-  input_ = input;
-  pos_in_input_ = 0;
-
-  int bytes_written =
+  int samples_written =
       op_read_stereo(opus_, staging_buffer.data(), staging_buffer.size());
-  if (bytes_written < 0) {
-    ESP_LOGE(kTag, "read failed %i", bytes_written);
-    return {pos_in_input_, cpp::fail(Error::kMalformedData)};
-  } else if (bytes_written == 0) {
-    return {pos_in_input_, cpp::fail(Error::kOutOfInput)};
+
+  if (samples_written < 0) {
+    ESP_LOGE(kTag, "read failed %i", samples_written);
+    return cpp::fail(Error::kMalformedData);
   }
 
-  for (int i = 0; i < bytes_written / 2; i++) {
+  samples_written *= num_channels_;
+  for (int i = 0; i < samples_written; i++) {
     output[i] = sample::FromSigned(staging_buffer[i], 16);
   }
 
-  return {pos_in_input_,
-          OutputInfo{
-              .samples_written = static_cast<size_t>(bytes_written / 2),
-              .is_finished_writing = bytes_written == 0,
-          }};
+  return OutputInfo{
+      .samples_written = static_cast<size_t>(samples_written / 2),
+      .is_stream_finished = samples_written == 0,
+  };
 }
 
-auto XiphOpusDecoder::SeekStream(cpp::span<const std::byte> input,
-                                 std::size_t target_sample) -> Result<void> {
+auto XiphOpusDecoder::SeekTo(size_t target) -> cpp::result<void, Error> {
   return {};
 }
 
-auto XiphOpusDecoder::ReadCallback() -> cpp::span<const std::byte> {
-  return input_.subspan(pos_in_input_);
-}
-
-auto XiphOpusDecoder::AfterReadCallback(size_t bytes_read) -> void {
-  pos_in_input_ += bytes_read;
-}
-
 }  // namespace codecs
diff --git a/src/codecs/source_buffer.cpp b/src/codecs/source_buffer.cpp
new file mode 100644
index 00000000..5955523e
--- /dev/null
+++ b/src/codecs/source_buffer.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "source_buffer.hpp"
+#include <sys/_stdint.h>
+
+#include <algorithm>
+#include <cstring>
+
+#include "esp_heap_caps.h"
+#include "esp_log.h"
+
+#include "codec.hpp"
+
+namespace codecs {
+
+static constexpr char kTag[] = "dec_buf";
+static constexpr size_t kBufferSize = 1024 * 8;
+
+SourceBuffer::SourceBuffer()
+    : buffer_(reinterpret_cast<std::byte*>(
+                  heap_caps_malloc(kBufferSize, MALLOC_CAP_SPIRAM)),
+              kBufferSize),
+      bytes_in_buffer_(0),
+      offset_of_bytes_(0) {
+  assert(buffer_.data() != nullptr);
+}
+
+SourceBuffer::~SourceBuffer() {
+  free(buffer_.data());
+}
+
+auto SourceBuffer::Refill(IStream* src) -> bool {
+  if (bytes_in_buffer_ == buffer_.size_bytes()) {
+    return false;
+  }
+  bool eof = false;
+  AddBytes([&](cpp::span<std::byte> buf) -> size_t {
+    size_t bytes_read = src->Read(buf);
+    eof = bytes_read == 0;
+    return bytes_read;
+  });
+  return eof;
+}
+
+auto SourceBuffer::AddBytes(std::function<size_t(cpp::span<std::byte>)> writer)
+    -> void {
+  if (offset_of_bytes_ > 0) {
+    std::memmove(buffer_.data(), buffer_.data() + offset_of_bytes_,
+                 bytes_in_buffer_);
+    offset_of_bytes_ = 0;
+  }
+  size_t added_bytes = std::invoke(writer, buffer_.subspan(bytes_in_buffer_));
+  assert(bytes_in_buffer_ + added_bytes <= buffer_.size_bytes());
+  bytes_in_buffer_ += added_bytes;
+}
+
+auto SourceBuffer::ConsumeBytes(
+    std::function<size_t(cpp::span<std::byte>)> reader) -> void {
+  size_t bytes_consumed = std::invoke(
+      reader, buffer_.subspan(offset_of_bytes_).first(bytes_in_buffer_));
+  assert(bytes_consumed <= bytes_in_buffer_);
+
+  bytes_in_buffer_ -= bytes_consumed;
+  if (bytes_in_buffer_ == 0) {
+    offset_of_bytes_ = 0;
+  } else {
+    offset_of_bytes_ += bytes_consumed;
+  }
+}
+
+}  // namespace codecs
diff --git a/src/codecs/vorbis.cpp b/src/codecs/vorbis.cpp
index 88ffbec4..6fa3256a 100644
--- a/src/codecs/vorbis.cpp
+++ b/src/codecs/vorbis.cpp
@@ -34,43 +34,59 @@ namespace codecs {
 
 static constexpr char kTag[] = "vorbis";
 
-size_t read_cb(void* ptr, size_t size, size_t nmemb, void* instance) {
-  TremorVorbisDecoder* dec = reinterpret_cast<TremorVorbisDecoder*>(instance);
-  auto input = dec->ReadCallback();
-  size_t amount_to_read = std::min<size_t>(size * nmemb, input.size_bytes());
-  std::memcpy(ptr, input.data(), amount_to_read);
-  dec->AfterReadCallback(amount_to_read);
-  return amount_to_read;
+static size_t read_cb(void* ptr, size_t size, size_t nmemb, void* instance) {
+  IStream* source = reinterpret_cast<IStream*>(instance);
+  return source->Read({reinterpret_cast<std::byte*>(ptr), size * nmemb});
 }
 
-int seek_cb(void* instance, ogg_int64_t offset, int whence) {
-  // Seeking is handled separately.
-  return -1;
+static int seek_cb(void* instance, ogg_int64_t offset, int whence) {
+  IStream* source = reinterpret_cast<IStream*>(instance);
+  if (!source->CanSeek()) {
+    return -1;
+  }
+  IStream::SeekFrom from;
+  switch (whence) {
+    case SEEK_CUR:
+      from = IStream::SeekFrom::kCurrentPosition;
+      break;
+    case SEEK_END:
+      from = IStream::SeekFrom::kEndOfStream;
+      break;
+    case SEEK_SET:
+      from = IStream::SeekFrom::kStartOfStream;
+      break;
+    default:
+      return -1;
+  }
+  source->SeekTo(offset, from);
+  return 0;
 }
 
-int close_cb(void* instance) {
+static int close_cb(void* src) {
   return 0;
 }
 
+static long tell_cb(void* src) {
+  IStream* source = reinterpret_cast<IStream*>(src);
+  return source->CurrentPosition();
+}
+
 static const ov_callbacks kCallbacks{
     .read_func = read_cb,
     .seek_func = seek_cb,
     .close_func = close_cb,
-    .tell_func = NULL,  // Not seekable
+    .tell_func = tell_cb,  // Not seekable
 };
 
-TremorVorbisDecoder::TremorVorbisDecoder()
-    : vorbis_(), input_(), pos_in_input_(0) {}
+TremorVorbisDecoder::TremorVorbisDecoder() : input_(), vorbis_() {}
 
 TremorVorbisDecoder::~TremorVorbisDecoder() {
   ov_clear(&vorbis_);
 }
 
-auto TremorVorbisDecoder::BeginStream(const cpp::span<const std::byte> input)
-    -> Result<OutputFormat> {
-  int res = ov_open_callbacks(this, &vorbis_,
-                              reinterpret_cast<const char*>(input.data()),
-                              input.size(), kCallbacks);
+auto TremorVorbisDecoder::OpenStream(std::shared_ptr<IStream> input)
+    -> cpp::result<OutputFormat, Error> {
+  int res = ov_open_callbacks(input.get(), &vorbis_, NULL, 0, kCallbacks);
   if (res < 0) {
     std::string err;
     switch (res) {
@@ -93,70 +109,51 @@ auto TremorVorbisDecoder::BeginStream(const cpp::span<const std::byte> input)
         err = "unknown";
     }
     ESP_LOGE(kTag, "error beginning stream: %s", err.c_str());
-    return {input.size(), cpp::fail(Error::kMalformedData)};
+    return cpp::fail(Error::kMalformedData);
   }
 
   vorbis_info* info = ov_info(&vorbis_, -1);
   if (info == NULL) {
     ESP_LOGE(kTag, "failed to get stream info");
-    return {input.size(), cpp::fail(Error::kMalformedData)};
+    return cpp::fail(Error::kMalformedData);
   }
 
-  return {input.size(),
-          OutputFormat{
-              .num_channels = static_cast<uint8_t>(info->channels),
-              .sample_rate_hz = static_cast<uint32_t>(info->rate),
-              .bits_per_second = info->bitrate_nominal,
-          }};
+  return OutputFormat{
+      .num_channels = static_cast<uint8_t>(info->channels),
+      .sample_rate_hz = static_cast<uint32_t>(info->rate),
+  };
 }
 
-auto TremorVorbisDecoder::ContinueStream(cpp::span<const std::byte> input,
-                                         cpp::span<sample::Sample> output)
-    -> Result<OutputInfo> {
+auto TremorVorbisDecoder::DecodeTo(cpp::span<sample::Sample> output)
+    -> cpp::result<OutputInfo, Error> {
   cpp::span<int16_t> staging_buffer{
       reinterpret_cast<int16_t*>(output.subspan(output.size() / 2).data()),
       output.size_bytes() / 2};
 
-  input_ = input;
-  pos_in_input_ = 0;
-
   int bitstream;
   long bytes_written =
       ov_read(&vorbis_, reinterpret_cast<char*>(staging_buffer.data()),
               staging_buffer.size_bytes(), &bitstream);
   if (bytes_written == OV_HOLE) {
     ESP_LOGE(kTag, "got OV_HOLE");
-    return {pos_in_input_, cpp::fail(Error::kMalformedData)};
+    return cpp::fail(Error::kMalformedData);
   } else if (bytes_written == OV_EBADLINK) {
     ESP_LOGE(kTag, "got OV_EBADLINK");
-    return {pos_in_input_, cpp::fail(Error::kMalformedData)};
-  } else if (bytes_written == 0) {
-    return {pos_in_input_, cpp::fail(Error::kOutOfInput)};
+    return cpp::fail(Error::kMalformedData);
   }
 
   for (int i = 0; i < bytes_written / 2; i++) {
     output[i] = sample::FromSigned(staging_buffer[i], 16);
   }
 
-  return {pos_in_input_,
-          OutputInfo{
-              .samples_written = static_cast<size_t>(bytes_written / 2),
-              .is_finished_writing = bytes_written == 0,
-          }};
+  return OutputInfo{
+      .samples_written = static_cast<size_t>(bytes_written / 2),
+      .is_stream_finished = bytes_written == 0,
+  };
 }
 
-auto TremorVorbisDecoder::SeekStream(cpp::span<const std::byte> input,
-                                     std::size_t target_sample)
-    -> Result<void> {
+auto TremorVorbisDecoder::SeekTo(size_t target) -> cpp::result<void, Error> {
   return {};
 }
 
-auto TremorVorbisDecoder::ReadCallback() -> cpp::span<const std::byte> {
-  return input_.subspan(pos_in_input_);
-}
-
-auto TremorVorbisDecoder::AfterReadCallback(size_t bytes_read) -> void {
-  pos_in_input_ += bytes_read;
-}
-
 }  // namespace codecs