summaryrefslogtreecommitdiff
path: root/src/audio/audio_task.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/audio/audio_task.cpp')
-rw-r--r--src/audio/audio_task.cpp388
1 files changed, 205 insertions, 183 deletions
diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
index babe6849..dbe5d50e 100644
--- a/src/audio/audio_task.cpp
+++ b/src/audio/audio_task.cpp
@@ -9,23 +9,29 @@
#include <stdlib.h>
#include <algorithm>
+#include <cmath>
#include <cstddef>
#include <cstdint>
+#include <cstring>
#include <deque>
#include <memory>
#include <variant>
+#include "audio_decoder.hpp"
#include "audio_events.hpp"
#include "audio_fsm.hpp"
#include "audio_sink.hpp"
#include "cbor.h"
+#include "codec.hpp"
#include "esp_err.h"
#include "esp_heap_caps.h"
#include "esp_log.h"
#include "event_queue.hpp"
+#include "fatfs_audio_input.hpp"
#include "freertos/portmacro.h"
#include "freertos/projdefs.h"
#include "freertos/queue.h"
+#include "freertos/ringbuf.h"
#include "pipeline.hpp"
#include "span.hpp"
@@ -41,193 +47,209 @@
namespace audio {
-namespace task {
-
-static const char* kTag = "task";
-
-// The default amount of time to wait between pipeline iterations for a single
-// track.
-static constexpr uint_fast16_t kDefaultDelayTicks = pdMS_TO_TICKS(5);
-static constexpr uint_fast16_t kMaxDelayTicks = pdMS_TO_TICKS(10);
-static constexpr uint_fast16_t kMinDelayTicks = pdMS_TO_TICKS(1);
-
-void AudioTaskMain(std::unique_ptr<Pipeline> pipeline, IAudioSink* sink) {
- // The stream format for bytes currently in the sink buffer.
- std::optional<StreamInfo::Format> output_format;
-
- // How long to wait between pipeline iterations. This is reset for each track,
- // and readjusted on the fly to maintain a reasonable amount playback buffer.
- // Buffering too much will mean we process samples inefficiently, wasting CPU
- // time, whilst buffering too little will affect the quality of the output.
- uint_fast16_t delay_ticks = kDefaultDelayTicks;
-
- std::vector<Pipeline*> all_elements = pipeline->GetIterationOrder();
-
- float current_sample_in_second = 0;
- uint32_t previous_second = 0;
- uint32_t current_second = 0;
-
- bool previously_had_work = false;
- events::EventQueue& event_queue = events::EventQueue::GetInstance();
- while (1) {
- // First, see if we actually have any pipeline work to do in this iteration.
- bool has_work = false;
- // We always have work to do if there's still bytes to be sunk.
- has_work = all_elements.back()->OutStream().info->bytes_in_stream > 0;
- if (!has_work) {
- for (Pipeline* p : all_elements) {
- has_work = p->OutputElement()->NeedsToProcess();
- if (has_work) {
- break;
- }
- }
- }
-
- if (!has_work) {
- has_work = !xStreamBufferIsEmpty(sink->buffer());
- }
-
- if (previously_had_work && !has_work) {
- events::Dispatch<internal::AudioPipelineIdle, AudioState>({});
- }
- previously_had_work = has_work;
-
- // See if there's any new events.
- event_queue.ServiceAudio(has_work ? delay_ticks : portMAX_DELAY);
-
- if (!has_work) {
- // See if we've been given work by this event.
- for (Pipeline* p : all_elements) {
- has_work = p->OutputElement()->NeedsToProcess();
- if (has_work) {
- delay_ticks = kDefaultDelayTicks;
- break;
- }
- }
- if (!has_work) {
- continue;
- }
- }
-
- // We have work to do! Allow each element in the pipeline to process one
- // chunk. We iterate from input nodes first, so this should result in
- // samples in the output buffer.
-
- for (int i = 0; i < all_elements.size(); i++) {
- std::vector<RawStream> raw_in_streams;
- all_elements.at(i)->InStreams(&raw_in_streams);
- RawStream raw_out_stream = all_elements.at(i)->OutStream();
-
- // Crop the input and output streams to the ranges that are safe to
- // touch. For the input streams, this is the region that contains
- // data. For the output stream, this is the region that does *not*
- // already contain data.
- std::vector<InputStream> in_streams;
- std::for_each(raw_in_streams.begin(), raw_in_streams.end(),
- [&](RawStream& s) { in_streams.emplace_back(&s); });
- OutputStream out_stream(&raw_out_stream);
-
- all_elements.at(i)->OutputElement()->Process(in_streams, &out_stream);
- }
-
- RawStream raw_sink_stream = all_elements.back()->OutStream();
- InputStream sink_stream(&raw_sink_stream);
-
- if (sink_stream.info().bytes_in_stream == 0) {
- if (sink_stream.is_producer_finished()) {
- sink_stream.mark_consumer_finished();
-
- if (current_second > 0 || current_sample_in_second > 0) {
- events::Dispatch<internal::InputFileFinished, AudioState>({});
- }
-
- current_second = 0;
- previous_second = 0;
- current_sample_in_second = 0;
- } else {
- // The user is probably about to hear a skip :(
- ESP_LOGW(kTag, "!! audio sink is underbuffered !!");
- }
- // No new bytes to sink, so skip sinking completely.
- continue;
- }
-
- if (!output_format || output_format != sink_stream.info().format) {
- // The format of the stream within the sink stream has changed. We
- // need to reconfigure the sink, but shouldn't do so until we've fully
- // drained the current buffer.
- if (xStreamBufferIsEmpty(sink->buffer())) {
- ESP_LOGI(kTag, "reconfiguring dac");
- output_format = sink_stream.info().format;
- sink->Configure(*output_format);
- } else {
- ESP_LOGI(kTag, "waiting to reconfigure");
- continue;
- }
- }
-
- // We've reconfigured the sink, or it was already configured correctly.
- // Send through some data.
- std::size_t bytes_sunk =
- xStreamBufferSend(sink->buffer(), sink_stream.data().data(),
- sink_stream.data().size_bytes(), 0);
-
- if (std::holds_alternative<StreamInfo::Pcm>(*output_format)) {
- StreamInfo::Pcm pcm = std::get<StreamInfo::Pcm>(*output_format);
-
- float samples_sunk = bytes_sunk;
- samples_sunk /= pcm.channels;
-
- // Samples must be aligned to 16 bits. The number of actual bytes per
- // sample is therefore the bps divided by 16, rounded up (align to word),
- // times two (convert to bytes).
- uint8_t bytes_per_sample = ((pcm.bits_per_sample + 16 - 1) / 16) * 2;
- samples_sunk /= bytes_per_sample;
-
- current_sample_in_second += samples_sunk;
- while (current_sample_in_second >= pcm.sample_rate) {
- current_second++;
- current_sample_in_second -= pcm.sample_rate;
- }
- if (previous_second != current_second) {
- events::Dispatch<PlaybackUpdate, AudioState, ui::UiState>({
- .seconds_elapsed = current_second,
- .seconds_total =
- sink_stream.info().duration_seconds.value_or(current_second),
- });
- }
- previous_second = current_second;
- }
-
- // Adjust how long we wait for the next iteration if we're getting too far
- // ahead or behind.
- float sunk_percent = static_cast<float>(bytes_sunk) /
- static_cast<float>(sink_stream.info().bytes_in_stream);
-
- if (sunk_percent > 0.66f) {
- // We're sinking a lot of the output buffer per iteration, so we need to
- // be running faster.
- delay_ticks--;
- } else if (sunk_percent < 0.33f) {
- // We're not sinking much of the output buffer per iteration, so we can
- // slow down to save some cycles.
- delay_ticks++;
- }
- delay_ticks = std::clamp(delay_ticks, kMinDelayTicks, kMaxDelayTicks);
-
- // Finally, actually mark the bytes we sunk as consumed.
- if (bytes_sunk > 0) {
- sink_stream.consume(bytes_sunk);
- }
+static const char* kTag = "audio_dec";
+
+static constexpr std::size_t kSampleBufferSize = 16 * 1024;
+
+Timer::Timer(StreamInfo::Pcm format)
+ : format_(format),
+ last_seconds_(0),
+ total_duration_seconds_(0),
+ current_seconds_(0) {}
+
+auto Timer::SetLengthSeconds(uint32_t len) -> void {
+ total_duration_seconds_ = len;
+}
+
+auto Timer::SetLengthBytes(uint32_t len) -> void {
+ total_duration_seconds_ = 0;
+}
+
+auto Timer::AddBytes(std::size_t bytes) -> void {
+ float samples_sunk = bytes;
+ samples_sunk /= format_.channels;
+
+ // Samples must be aligned to 16 bits. The number of actual bytes per
+ // sample is therefore the bps divided by 16, rounded up (align to word),
+ // times two (convert to bytes).
+ uint8_t bytes_per_sample = ((format_.bits_per_sample + 16 - 1) / 16) * 2;
+ samples_sunk /= bytes_per_sample;
+
+ current_seconds_ += samples_sunk / format_.sample_rate;
+
+ uint32_t rounded = std::round(current_seconds_);
+ if (rounded != last_seconds_) {
+ last_seconds_ = rounded;
+ events::Dispatch<PlaybackUpdate, AudioState, ui::UiState>(PlaybackUpdate{
+ .seconds_elapsed = rounded,
+ .seconds_total =
+ total_duration_seconds_ == 0 ? rounded : total_duration_seconds_});
}
}
-auto StartPipeline(Pipeline* pipeline, IAudioSink* sink) -> void {
- ESP_LOGI(kTag, "starting audio pipeline task");
- tasks::StartPersistent<tasks::Type::kAudio>(
- [=]() { AudioTaskMain(std::unique_ptr<Pipeline>(pipeline), sink); });
+auto AudioTask::Start(IAudioSource* source, IAudioSink* sink) -> AudioTask* {
+ AudioTask* task = new AudioTask(source, sink);
+ tasks::StartPersistent<tasks::Type::kAudio>([=]() { task->Main(); });
+ return task;
}
-} // namespace task
+AudioTask::AudioTask(IAudioSource* source, IAudioSink* sink)
+ : source_(source),
+ sink_(sink),
+ codec_(),
+ timer_(),
+ is_new_stream_(false),
+ current_input_format_(),
+ current_output_format_(),
+ sample_buffer_(reinterpret_cast<std::byte*>(
+ heap_caps_malloc(kSampleBufferSize,
+ MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT))),
+ sample_buffer_len_(kSampleBufferSize) {}
+
+void AudioTask::Main() {
+ for (;;) {
+ source_->Read(
+ [this](StreamInfo::Format format) -> bool {
+ if (current_input_format_ && format == *current_input_format_) {
+ // This is the continuation of previous data. We can handle it if
+ // we are able to decode it, or if it doesn't need decoding.
+ return current_output_format_ == format || codec_ != nullptr;
+ }
+ // This must be a new stream of data. Reset everything to prepare to
+ // handle it.
+ current_input_format_ = format;
+ is_new_stream_ = true;
+ codec_.reset();
+ timer_.reset();
+
+ // What kind of data does this new stream contain?
+ if (std::holds_alternative<StreamInfo::Pcm>(format)) {
+ // It's already decoded! We can handle this immediately if it
+ // matches what we're currently sending to the sink. Otherwise, we
+ // will need to wait for the sink to drain before we can reconfigure
+ // it.
+ if (current_output_format_ && format == *current_output_format_) {
+ return true;
+ } else if (xStreamBufferIsEmpty(sink_->stream())) {
+ return true;
+ } else {
+ return false;
+ }
+ } else if (std::holds_alternative<StreamInfo::Encoded>(format)) {
+ // The stream has some kind of encoding. Whether or not we can
+ // handle it is entirely down to whether or not we have a codec for
+ // it.
+ auto encoding = std::get<StreamInfo::Encoded>(format);
+ auto codec = codecs::CreateCodecForType(encoding.type);
+ if (codec) {
+ ESP_LOGI(kTag, "successfully created codec for stream");
+ codec_.reset(*codec);
+ return true;
+ } else {
+ ESP_LOGE(kTag, "stream has unknown encoding");
+ return false;
+ }
+ } else {
+ // programmer error / skill issue :(
+ ESP_LOGE(kTag, "stream has unknown format");
+ current_input_format_ = format;
+ return false;
+ }
+ },
+ [this](cpp::span<const std::byte> bytes) -> size_t {
+ // PCM streams are simple, so handle them first.
+ if (std::holds_alternative<StreamInfo::Pcm>(*current_input_format_)) {
+ // First we need to reconfigure the sink for this sample format.
+ // TODO(jacqueline): We should verify whether or not the sink can
+ // actually deal with this format first.
+ if (current_input_format_ != current_output_format_) {
+ current_output_format_ = current_input_format_;
+ sink_->Configure(*current_output_format_);
+ timer_.reset(new Timer(
+ std::get<StreamInfo::Pcm>(*current_output_format_)));
+ }
+ // Stream the raw samples directly to the sink.
+ xStreamBufferSend(sink_->stream(), bytes.data(), bytes.size_bytes(),
+ portMAX_DELAY);
+ timer_->AddBytes(bytes.size_bytes());
+ return bytes.size_bytes();
+ }
+ // Else, assume it's an encoded stream.
+
+ size_t bytes_used = 0;
+ if (is_new_stream_) {
+ // This is a new stream! First order of business is verifying that
+ // we can indeed decode it.
+ auto res = codec_->BeginStream(bytes);
+ bytes_used += res.first;
+
+ if (res.second.has_error()) {
+ if (res.second.error() != codecs::ICodec::Error::kOutOfInput) {
+ // Decoding the header failed, so we can't actually deal with
+ // this stream after all. It could be malformed.
+ ESP_LOGE(kTag, "error beginning stream");
+ codec_.reset();
+ }
+ return bytes_used;
+ }
+ is_new_stream_ = false;
+
+ codecs::ICodec::OutputFormat format = res.second.value();
+ StreamInfo::Pcm pcm{
+ .channels = format.num_channels,
+ .bits_per_sample = format.bits_per_sample,
+ .sample_rate = format.sample_rate_hz,
+ };
+ StreamInfo::Format new_format{pcm};
+ timer_.reset(new Timer{pcm});
+ if (format.duration_seconds) {
+ timer_->SetLengthSeconds(*format.duration_seconds);
+ }
+
+ // Now that we have the output format for decoded samples from this
+ // stream, we need to see if they are compatible with what's already
+ // in the sink stream.
+ if (new_format != current_output_format_) {
+ // The new format is different to the old one. Wait for the sink
+ // to drain before continuing.
+ while (!xStreamBufferIsEmpty(sink_->stream())) {
+ ESP_LOGI(kTag, "waiting for sink stream to drain...");
+ // TODO(jacqueline): Get the sink drain ISR to notify us of this
+ // via semaphore instead of busy-ish waiting.
+ vTaskDelay(pdMS_TO_TICKS(100));
+ }
+ }
+
+ ESP_LOGI(kTag, "configuring sink");
+ current_output_format_ = new_format;
+ sink_->Configure(new_format);
+ timer_.reset(
+ new Timer(std::get<StreamInfo::Pcm>(*current_output_format_)));
+ }
+
+ // At this point the decoder has been initialised, and the sink has
+ // been correctly configured. All that remains is to throw samples
+ // into the sink as fast as possible.
+ while (bytes_used < bytes.size_bytes()) {
+ auto res =
+ codec_->ContinueStream(bytes.subspan(bytes_used),
+ {sample_buffer_, sample_buffer_len_});
+
+ bytes_used += res.first;
+
+ if (res.second.has_error()) {
+ return bytes_used;
+ } else {
+ xStreamBufferSend(sink_->stream(), sample_buffer_,
+ res.second->bytes_written, portMAX_DELAY);
+ timer_->AddBytes(res.second->bytes_written);
+ }
+ }
+
+ return bytes_used;
+ },
+ portMAX_DELAY);
+ }
+}
} // namespace audio