summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/audio/audio_decoder.cpp125
-rw-r--r--src/audio/audio_task.cpp2
-rw-r--r--src/audio/fatfs_audio_input.cpp8
-rw-r--r--src/audio/include/audio_decoder.hpp1
-rw-r--r--src/audio/include/stream_info.hpp4
-rw-r--r--src/codecs/CMakeLists.txt2
-rw-r--r--src/codecs/codec.cpp7
-rw-r--r--src/codecs/foxenflac.cpp80
-rw-r--r--src/codecs/include/codec.hpp60
-rw-r--r--src/codecs/include/foxenflac.hpp38
-rw-r--r--src/codecs/include/mad.hpp24
-rw-r--r--src/codecs/include/stbvorbis.hpp42
-rw-r--r--src/codecs/include/types.hpp2
-rw-r--r--src/codecs/mad.cpp179
-rw-r--r--src/codecs/stbvorbis.cpp128
-rw-r--r--src/database/tag_parser.cpp10
-rw-r--r--src/tasks/tasks.cpp2
17 files changed, 577 insertions, 137 deletions
diff --git a/src/audio/audio_decoder.cpp b/src/audio/audio_decoder.cpp
index eb19b75f..310f5740 100644
--- a/src/audio/audio_decoder.cpp
+++ b/src/audio/audio_decoder.cpp
@@ -14,6 +14,7 @@
#include <memory>
#include <variant>
+#include "codec.hpp"
#include "freertos/FreeRTOS.h"
#include "esp_heap_caps.h"
@@ -50,6 +51,9 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
// Reuse the existing codec if we can. This will help with gapless playback,
// since we can potentially just continue to decode as we were before,
// without any setup overhead.
+ // TODO(jacqueline): Reconsider this. It makes a lot of things harder to smash
+ // streams together at this layer.
+ /*
if (current_codec_ != nullptr && current_input_format_) {
auto cur_encoding = std::get<StreamInfo::Encoded>(*current_input_format_);
if (cur_encoding.type == encoded.type) {
@@ -58,6 +62,7 @@ auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
return true;
}
}
+ */
current_input_format_ = info.format;
ESP_LOGI(kTag, "creating new decoder");
@@ -80,68 +85,94 @@ auto AudioDecoder::Process(const std::vector<InputStream>& inputs,
OutputStream* output) -> void {
auto input = inputs.begin();
const StreamInfo& info = input->info();
- if (std::holds_alternative<std::monostate>(info.format) ||
- info.bytes_in_stream == 0) {
- // TODO(jacqueline): should we clear the stream format?
- // output->prepare({});
- return;
- }
+ // Check the input stream's format has changed (or, by extension, if this is
+ // the first stream).
if (!current_input_format_ || *current_input_format_ != info.format) {
- // The input stream has changed! Immediately throw everything away and
- // start from scratch.
+ ESP_LOGI(kTag, "beginning new stream");
has_samples_to_send_ = false;
ProcessStreamInfo(info);
+ auto res = current_codec_->BeginStream(input->data());
+ input->consume(res.first);
+ if (res.second.has_error()) {
+ // TODO(jacqueline): Handle errors.
+ return;
+ }
+
+ // The stream started successfully. Record what format the samples are in.
+ codecs::ICodec::OutputFormat format = res.second.value();
+ current_output_format_ = StreamInfo::Pcm{
+ .channels = format.num_channels,
+ .bits_per_sample = format.bits_per_sample,
+ .sample_rate = format.sample_rate_hz,
+ };
+
+ if (info.seek_to_seconds) {
+ seek_to_sample_ = *info.seek_to_seconds * format.sample_rate_hz;
+ } else {
+ seek_to_sample_.reset();
+ }
}
- current_codec_->SetInput(input->data());
+ while (seek_to_sample_) {
+ ESP_LOGI(kTag, "seeking forwards...");
+ auto res = current_codec_->SeekStream(input->data(), *seek_to_sample_);
+ input->consume(res.first);
+ if (res.second.has_error()) {
+ auto err = res.second.error();
+ if (err == codecs::ICodec::Error::kOutOfInput) {
+ return;
+ } else {
+ // TODO(jacqueline): Handle errors.
+ seek_to_sample_.reset();
+ }
+ } else {
+ seek_to_sample_.reset();
+ }
+ }
+ has_input_remaining_ = true;
while (true) {
- if (has_samples_to_send_) {
- auto format = current_codec_->GetOutputFormat();
- if (format.has_value()) {
- current_output_format_ = StreamInfo::Pcm{
- .channels = format->num_channels,
- .bits_per_sample = format->bits_per_sample,
- .sample_rate = format->sample_rate_hz,
- };
-
- if (!output->prepare(*current_output_format_)) {
- break;
- }
-
- auto write_res = current_codec_->WriteOutputSamples(output->data());
- output->add(write_res.first);
- has_samples_to_send_ = !write_res.second;
-
- if (has_samples_to_send_) {
- // We weren't able to fit all the generated samples into the output
- // buffer. Stop trying; we'll finish up during the next pass.
- break;
- }
- }
+ // TODO(jacqueline): Pass through seek info here?
+ if (!output->prepare(*current_output_format_)) {
+ ESP_LOGI(kTag, "waiting for buffer to become free");
+ break;
}
- auto res = current_codec_->ProcessNextFrame();
- if (res.has_error()) {
- // TODO(jacqueline): Handle errors.
+ auto res = current_codec_->ContinueStream(input->data(), output->data());
+ input->consume(res.first);
+ if (res.second.has_error()) {
+ if (res.second.error() == codecs::ICodec::Error::kOutOfInput) {
+ ESP_LOGW(kTag, "out of input");
+ ESP_LOGW(kTag, "(%u bytes left)", input->data().size_bytes());
+ has_input_remaining_ = false;
+ // We can't be halfway through sending samples if the codec is asking
+ // for more input.
+ has_samples_to_send_ = false;
+ input->mark_incomplete();
+ } else {
+ // TODO(jacqueline): Handle errors.
+ ESP_LOGE(kTag, "codec return fatal error");
+ }
return;
}
- has_input_remaining_ = !res.value();
- if (!has_input_remaining_) {
- // We're out of useable data in this buffer. Finish immediately; there's
- // nothing to send.
- input->mark_incomplete();
- break;
- } else {
- has_samples_to_send_ = true;
+ ESP_LOGI(kTag, "enc read: %u", res.first);
+
+ codecs::ICodec::OutputInfo out_info = res.second.value();
+ output->add(out_info.bytes_written);
+ has_samples_to_send_ = !out_info.is_finished_writing;
+
+ ESP_LOGI(kTag, "enc wrote: %u", out_info.bytes_written);
+ if (out_info.is_finished_writing) {
+ ESP_LOGI(kTag, "(write finished)");
}
- }
- std::size_t pos = current_codec_->GetInputPosition();
- if (pos > 0) {
- input->consume(pos - 1);
+ if (has_samples_to_send_) {
+ // We weren't able to fit all the generated samples into the output
+ // buffer. Stop trying; we'll finish up during the next pass.
+ break;
+ }
}
}
diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp
index 9dd7d994..eea84e45 100644
--- a/src/audio/audio_task.cpp
+++ b/src/audio/audio_task.cpp
@@ -126,7 +126,7 @@ void AudioTaskMain(std::unique_ptr<Pipeline> pipeline, IAudioSink* sink) {
if (sink_stream.info().bytes_in_stream == 0) {
// No new bytes to sink, so skip sinking completely.
- ESP_LOGI(kTag, "no bytes to sink");
+ ESP_LOGW(kTag, "no bytes to sink");
continue;
}
diff --git a/src/audio/fatfs_audio_input.cpp b/src/audio/fatfs_audio_input.cpp
index a89858ca..eaa62ee3 100644
--- a/src/audio/fatfs_audio_input.cpp
+++ b/src/audio/fatfs_audio_input.cpp
@@ -56,11 +56,13 @@ auto FatfsAudioInput::OpenFile(const std::string& path) -> bool {
database::SongTags tags;
if (!tag_parser.ReadAndParseTags(path, &tags)) {
ESP_LOGE(kTag, "failed to read tags");
- return false;
+ tags.encoding = database::Encoding::kFlac;
+ // return false;
}
auto stream_type = ContainerToStreamType(tags.encoding);
if (!stream_type.has_value()) {
+ ESP_LOGE(kTag, "couldn't match container to stream");
return false;
}
@@ -144,8 +146,8 @@ auto FatfsAudioInput::ContainerToStreamType(database::Encoding enc)
return codecs::StreamType::kPcm;
case database::Encoding::kFlac:
return codecs::StreamType::kFlac;
- case database::Encoding::kOgg:
- return codecs::StreamType::kOgg;
+ case database::Encoding::kOgg: // Misnamed; this is Ogg Vorbis.
+ return codecs::StreamType::kVorbis;
case database::Encoding::kUnsupported:
default:
return {};
diff --git a/src/audio/include/audio_decoder.hpp b/src/audio/include/audio_decoder.hpp
index 3cda0305..4e7e127e 100644
--- a/src/audio/include/audio_decoder.hpp
+++ b/src/audio/include/audio_decoder.hpp
@@ -42,6 +42,7 @@ class AudioDecoder : public IAudioElement {
std::unique_ptr<codecs::ICodec> current_codec_;
std::optional<StreamInfo::Format> current_input_format_;
std::optional<StreamInfo::Format> current_output_format_;
+ std::optional<std::size_t> seek_to_sample_;
bool has_samples_to_send_;
bool has_input_remaining_;
diff --git a/src/audio/include/stream_info.hpp b/src/audio/include/stream_info.hpp
index 91b2f085..54b87003 100644
--- a/src/audio/include/stream_info.hpp
+++ b/src/audio/include/stream_info.hpp
@@ -6,6 +6,7 @@
#pragma once
+#include <stdint.h>
#include <cstdint>
#include <optional>
#include <string>
@@ -30,6 +31,9 @@ struct StreamInfo {
// generated audio, etc.)
std::optional<std::size_t> length_bytes{};
+ //
+ std::optional<uint32_t> seek_to_seconds{};
+
struct Encoded {
// The codec that this stream is associated with.
codecs::StreamType type;
diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt
index cdf9c99d..478d4d3f 100644
--- a/src/codecs/CMakeLists.txt
+++ b/src/codecs/CMakeLists.txt
@@ -3,7 +3,7 @@
# SPDX-License-Identifier: GPL-3.0-only
idf_component_register(
- SRCS "codec.cpp" "mad.cpp"
+ SRCS "codec.cpp" "mad.cpp" "foxenflac.cpp" "stbvorbis.cpp"
INCLUDE_DIRS "include"
REQUIRES "result" "span" "libmad" "libfoxenflac" "stb_vorbis")
diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp
index 73bc9032..e23b8702 100644
--- a/src/codecs/codec.cpp
+++ b/src/codecs/codec.cpp
@@ -8,7 +8,10 @@
#include <memory>
#include <optional>
+
+#include "foxenflac.hpp"
#include "mad.hpp"
+#include "stbvorbis.hpp"
#include "types.hpp"
namespace codecs {
@@ -17,6 +20,10 @@ auto CreateCodecForType(StreamType type) -> std::optional<ICodec*> {
switch (type) {
case StreamType::kMp3:
return new MadMp3Decoder();
+ case StreamType::kFlac:
+ return new FoxenFlacDecoder();
+ case StreamType::kVorbis:
+ return new StbVorbisDecoder();
default:
return {};
}
diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp
new file mode 100644
index 00000000..a2d6f000
--- /dev/null
+++ b/src/codecs/foxenflac.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "foxenflac.hpp"
+#include <stdint.h>
+
+#include <cstdlib>
+
+#include "esp_log.h"
+#include "foxen/flac.h"
+
+namespace codecs {
+
+FoxenFlacDecoder::FoxenFlacDecoder()
+ : flac_(FX_FLAC_ALLOC(FLAC_MAX_BLOCK_SIZE, 2)) {}
+
+FoxenFlacDecoder::~FoxenFlacDecoder() {
+ free(flac_);
+}
+
+auto FoxenFlacDecoder::BeginStream(const cpp::span<const std::byte> input)
+ -> Result<OutputFormat> {
+ uint32_t bytes_used = input.size_bytes();
+ fx_flac_state_t state =
+ fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
+ &bytes_used, NULL, NULL);
+ if (state != FLAC_END_OF_METADATA) {
+ return {bytes_used, cpp::fail(Error::kMalformedData)};
+ }
+
+ int64_t channels = fx_flac_get_streaminfo(flac_, FLAC_KEY_N_CHANNELS);
+ int64_t fs = fx_flac_get_streaminfo(flac_, FLAC_KEY_SAMPLE_RATE);
+ if (channels == FLAC_INVALID_METADATA_KEY ||
+ fs == FLAC_INVALID_METADATA_KEY) {
+ return {bytes_used, cpp::fail(Error::kMalformedData)};
+ }
+
+ return {bytes_used,
+ OutputFormat{
+ .num_channels = static_cast<uint8_t>(channels),
+ .bits_per_sample = 32, // libfoxenflac output is fixed-size.
+ .sample_rate_hz = static_cast<uint32_t>(fs),
+ }};
+}
+
+auto FoxenFlacDecoder::ContinueStream(cpp::span<const std::byte> input,
+ cpp::span<std::byte> output)
+ -> Result<OutputInfo> {
+ cpp::span<int32_t> output_as_samples{
+ reinterpret_cast<int32_t*>(output.data()), output.size_bytes() / 4};
+ uint32_t bytes_read = input.size_bytes();
+ uint32_t samples_written = output_as_samples.size();
+
+ fx_flac_state_t state =
+ fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(input.data()),
+ &bytes_read, output_as_samples.data(), &samples_written);
+ if (state == FLAC_ERR) {
+ return {bytes_read, cpp::fail(Error::kMalformedData)};
+ }
+
+ if (samples_written > 0) {
+ return {bytes_read,
+ OutputInfo{.bytes_written = samples_written * 4,
+ .is_finished_writing = state == FLAC_END_OF_FRAME}};
+ }
+
+ // No error, but no samples written. We must be out of data.
+ return {bytes_read, cpp::fail(Error::kOutOfInput)};
+}
+
+auto FoxenFlacDecoder::SeekStream(cpp::span<const std::byte> input,
+ std::size_t target_sample) -> Result<void> {
+ // TODO(jacqueline): Implement me.
+ return {0, {}};
+}
+
+} // namespace codecs
diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp
index 31c67e13..4b5ab47f 100644
--- a/src/codecs/include/codec.hpp
+++ b/src/codecs/include/codec.hpp
@@ -21,48 +21,58 @@
namespace codecs {
+/*
+ * Common interface to be implemented by all audio decoders.
+ */
class ICodec {
public:
virtual ~ICodec() {}
+ /* Errors that may be returned by codecs. */
+ enum class Error {
+ // Indicates that more data is required before this codec can finish its
+ // operation. E.g. the input buffer ends with a truncated frame.
+ kOutOfInput,
+ // Indicates that the data within the input buffer is fatally malformed.
+ kMalformedData,
+
+ kInternalError,
+ };
+
+ /*
+ * Alias for more readable return types. All codec methods, success or
+ * failure, should also return the number of bytes they consumed.
+ */
+ template <typename T>
+ using Result = std::pair<std::size_t, cpp::result<T, Error>>;
+
struct OutputFormat {
uint8_t num_channels;
uint8_t bits_per_sample;
uint32_t sample_rate_hz;
};
- virtual auto GetOutputFormat() -> std::optional<OutputFormat> = 0;
-
- enum ProcessingError { MALFORMED_DATA };
-
- virtual auto SetInput(cpp::span<const std::byte> input) -> void = 0;
-
/*
- * Returns the codec's next read position within the input buffer. If the
- * codec is out of usable data, but there is still some data left in the
- * stream, that data should be prepended to the next input buffer.
+ * Decodes metadata or headers from the given input stream, and returns the
+ * format for the samples that will be decoded from it.
*/
- virtual auto GetInputPosition() -> std::size_t = 0;
+ virtual auto BeginStream(cpp::span<const std::byte> input)
+ -> Result<OutputFormat> = 0;
- /*
- * Read one frame (or equivalent discrete chunk) from the input, and
- * synthesize output samples for it.
- *
- * Returns true if we are out of usable data from the input stream, or false
- * otherwise.
- */
- virtual auto ProcessNextFrame() -> cpp::result<bool, ProcessingError> = 0;
+ struct OutputInfo {
+ std::size_t bytes_written;
+ bool is_finished_writing;
+ };
/*
* Writes PCM samples to the given output buffer.
- *
- * Returns the number of bytes that were written, and true if all of the
- * samples synthesized from the last call to `ProcessNextFrame` have been
- * written. If this returns false, then this method should be called again
- * after flushing the output buffer.
*/
- virtual auto WriteOutputSamples(cpp::span<std::byte> output)
- -> std::pair<std::size_t, bool> = 0;
+ virtual auto ContinueStream(cpp::span<const std::byte> input,
+ cpp::span<std::byte> output)
+ -> Result<OutputInfo> = 0;
+
+ virtual auto SeekStream(cpp::span<const std::byte> input,
+ std::size_t target_sample) -> Result<void> = 0;
};
auto CreateCodecForType(StreamType type) -> std::optional<ICodec*>;
diff --git a/src/codecs/include/foxenflac.hpp b/src/codecs/include/foxenflac.hpp
new file mode 100644
index 00000000..cce1b762
--- /dev/null
+++ b/src/codecs/include/foxenflac.hpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "foxen/flac.h"
+#include "span.hpp"
+
+#include "codec.hpp"
+
+namespace codecs {
+
+class FoxenFlacDecoder : public ICodec {
+ public:
+ FoxenFlacDecoder();
+ ~FoxenFlacDecoder();
+
+ auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+ auto ContinueStream(cpp::span<const std::byte>, cpp::span<std::byte>)
+ -> Result<OutputInfo> override;
+ auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
+ -> Result<void> override;
+
+ private:
+ fx_flac_t* flac_;
+};
+
+} // namespace codecs
diff --git a/src/codecs/include/mad.hpp b/src/codecs/include/mad.hpp
index 5ba4db84..e1c479bf 100644
--- a/src/codecs/include/mad.hpp
+++ b/src/codecs/include/mad.hpp
@@ -24,12 +24,22 @@ class MadMp3Decoder : public ICodec {
MadMp3Decoder();
~MadMp3Decoder();
- auto GetOutputFormat() -> std::optional<OutputFormat> override;
- auto SetInput(cpp::span<const std::byte> input) -> void override;
- auto GetInputPosition() -> std::size_t override;
- auto ProcessNextFrame() -> cpp::result<bool, ProcessingError> override;
- auto WriteOutputSamples(cpp::span<std::byte> output)
- -> std::pair<std::size_t, bool> override;
+ /*
+ * Returns the output format for the next frame in the stream. MP3 streams
+ * may represent multiple distinct tracks, with different bitrates, and so we
+ * handle the stream only on a frame-by-frame basis.
+ */
+ auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+
+ /*
+ * Writes samples for the current frame.
+ */
+ auto ContinueStream(cpp::span<const std::byte> input,
+ cpp::span<std::byte> output)
+ -> Result<OutputInfo> override;
+
+ auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
+ -> Result<void> override;
private:
mad_stream stream_;
@@ -37,6 +47,8 @@ class MadMp3Decoder : public ICodec {
mad_synth synth_;
int current_sample_;
+
+ auto GetInputPosition() -> std::size_t;
};
} // namespace codecs
diff --git a/src/codecs/include/stbvorbis.hpp b/src/codecs/include/stbvorbis.hpp
new file mode 100644
index 00000000..045e264e
--- /dev/null
+++ b/src/codecs/include/stbvorbis.hpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <optional>
+#include <string>
+#include <utility>
+
+#include "stb_vorbis.h"
+
+#include "codec.hpp"
+
+namespace codecs {
+
+class StbVorbisDecoder : public ICodec {
+ public:
+ StbVorbisDecoder();
+ ~StbVorbisDecoder();
+
+ auto BeginStream(cpp::span<const std::byte>) -> Result<OutputFormat> override;
+ auto ContinueStream(cpp::span<const std::byte>, cpp::span<std::byte>)
+ -> Result<OutputInfo> override;
+ auto SeekStream(cpp::span<const std::byte> input, std::size_t target_sample)
+ -> Result<void> override;
+
+ private:
+ stb_vorbis* vorbis_;
+
+ int current_sample_;
+ int num_channels_;
+ int num_samples_;
+ float** samples_array_;
+};
+
+} // namespace codecs
diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp
index 61d36a28..3dfc1da9 100644
--- a/src/codecs/include/types.hpp
+++ b/src/codecs/include/types.hpp
@@ -13,7 +13,7 @@ namespace codecs {
enum class StreamType {
kMp3,
kPcm,
- kOgg,
+ kVorbis,
kFlac,
};
diff --git a/src/codecs/mad.cpp b/src/codecs/mad.cpp
index fbe85213..8b9897eb 100644
--- a/src/codecs/mad.cpp
+++ b/src/codecs/mad.cpp
@@ -13,11 +13,12 @@
#include "mad.h"
#include "codec.hpp"
+#include "result.hpp"
#include "types.hpp"
namespace codecs {
-static uint32_t scaleToBits(mad_fixed_t sample, uint8_t bits) {
+static uint32_t mad_fixed_to_pcm(mad_fixed_t sample, uint8_t bits) {
// Round the bottom bits.
sample += (1L << (MAD_F_FRACBITS - bits));
@@ -42,93 +43,167 @@ MadMp3Decoder::~MadMp3Decoder() {
mad_synth_finish(&synth_);
}
-auto MadMp3Decoder::GetOutputFormat() -> std::optional<OutputFormat> {
- if (synth_.pcm.channels == 0 || synth_.pcm.samplerate == 0) {
- return {};
- }
- return std::optional<OutputFormat>({
- .num_channels = static_cast<uint8_t>(synth_.pcm.channels),
- .bits_per_sample = 24,
- .sample_rate_hz = synth_.pcm.samplerate,
- });
+auto MadMp3Decoder::GetInputPosition() -> std::size_t {
+ return stream_.next_frame - stream_.buffer;
}
-auto MadMp3Decoder::SetInput(cpp::span<const std::byte> input) -> void {
+auto MadMp3Decoder::BeginStream(const cpp::span<const std::byte> input)
+ -> Result<OutputFormat> {
mad_stream_buffer(&stream_,
reinterpret_cast<const unsigned char*>(input.data()),
input.size());
-}
-
-auto MadMp3Decoder::GetInputPosition() -> std::size_t {
- return stream_.next_frame - stream_.buffer;
-}
-
-auto MadMp3Decoder::ProcessNextFrame() -> cpp::result<bool, ProcessingError> {
// Whatever was last synthesized is now invalid, so ensure we don't try to
// send it.
current_sample_ = -1;
- // Decode the next frame. To signal errors, this returns -1 and
- // stashes an error code in the stream structure.
- if (mad_frame_decode(&frame_, &stream_) < 0) {
+ // To get the output format for MP3 streams, we simply need to decode the
+ // first frame header.
+ mad_header header;
+ mad_header_init(&header);
+ while (mad_header_decode(&header, &stream_) < 0) {
if (MAD_RECOVERABLE(stream_.error)) {
// Recoverable errors are usually malformed parts of the stream.
// We can recover from them by just retrying the decode.
- return false;
+ continue;
+ } else {
+ // Don't bother checking for other errors; if the first part of the stream
+ // doesn't even contain a header then something's gone wrong.
+ return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
}
-
- if (stream_.error == MAD_ERROR_BUFLEN) {
- // The decoder ran out of bytes before it completed a frame. We
- // need to return back to the caller to give us more data.
- return true;
- }
-
- // The error is unrecoverable. Give up.
- return cpp::fail(MALFORMED_DATA);
}
- // We've successfully decoded a frame!
- // Now we need to synthesize PCM samples based on the frame, and send
- // them downstream.
- mad_synth_frame(&synth_, &frame_);
- current_sample_ = 0;
- return false;
+ uint8_t channels = MAD_NCHANNELS(&header);
+ return {GetInputPosition(),
+ OutputFormat{
+ .num_channels = channels,
+ .bits_per_sample = 24, // We always scale to 24 bits
+ .sample_rate_hz = header.samplerate,
+ }};
}
-auto MadMp3Decoder::WriteOutputSamples(cpp::span<std::byte> output)
- -> std::pair<std::size_t, bool> {
- size_t output_byte = 0;
- // First ensure that we actually have some samples to send off.
+auto MadMp3Decoder::ContinueStream(cpp::span<const std::byte> input,
+ cpp::span<std::byte> output)
+ -> Result<OutputInfo> {
if (current_sample_ < 0) {
- return std::make_pair(output_byte, true);
+ mad_stream_buffer(&stream_,
+ reinterpret_cast<const unsigned char*>(input.data()),
+ input.size());
+
+ // Decode the next frame. To signal errors, this returns -1 and
+ // stashes an error code in the stream structure.
+ while (mad_frame_decode(&frame_, &stream_) < 0) {
+ if (MAD_RECOVERABLE(stream_.error)) {
+ // Recoverable errors are usually malformed parts of the stream.
+ // We can recover from them by just retrying the decode.
+ continue;
+ }
+ if (stream_.error == MAD_ERROR_BUFLEN) {
+ // The decoder ran out of bytes before it completed a frame. We
+ // need to return back to the caller to give us more data.
+ return {GetInputPosition(), cpp::fail(Error::kOutOfInput)};
+ }
+ // The error is unrecoverable. Give up.
+ return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
+ }
+
+ // We've successfully decoded a frame! Now synthesize samples to write out.
+ mad_synth_frame(&synth_, &frame_);
+ current_sample_ = 0;
}
+ size_t output_byte = 0;
while (current_sample_ < synth_.pcm.length) {
- if (output_byte + (2 * synth_.pcm.channels) >= output.size()) {
- return std::make_pair(output_byte, false);
+ if (output_byte + (4 * synth_.pcm.channels) >= output.size()) {
+ // We can't fit the next sample into the buffer. Stop now, and also avoid
+ // writing the sample for only half the channels.
+ return {GetInputPosition(), OutputInfo{.bytes_written = output_byte,
+ .is_finished_writing = false}};
}
for (int channel = 0; channel < synth_.pcm.channels; channel++) {
uint32_t sample_24 =
- scaleToBits(synth_.pcm.samples[channel][current_sample_], 24);
+ mad_fixed_to_pcm(synth_.pcm.samples[channel][current_sample_], 24);
output[output_byte++] = static_cast<std::byte>((sample_24 >> 16) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_24 >> 8) & 0xFF);
output[output_byte++] = static_cast<std::byte>((sample_24)&0xFF);
// 24 bit samples must still be aligned to 32 bits. The LSB is ignored.
output[output_byte++] = static_cast<std::byte>(0);
- /*
- uint16_t sample_16 =
- scaleToBits(synth_.pcm.samples[channel][current_sample_], 16);
- output[output_byte++] = static_cast<std::byte>((sample_16 >> 8) & 0xFF);
- output[output_byte++] = static_cast<std::byte>((sample_16)&0xFF);
- */
}
current_sample_++;
}
// We wrote everything! Reset, ready for the next frame.
current_sample_ = -1;
- return std::make_pair(output_byte, true);
+ return {GetInputPosition(), OutputInfo{.bytes_written = output_byte,
+ .is_finished_writing = true}};
+}
+
+auto MadMp3Decoder::SeekStream(cpp::span<const std::byte> input,
+ std::size_t target_sample) -> Result<void> {
+ mad_stream_buffer(&stream_,
+ reinterpret_cast<const unsigned char*>(input.data()),
+ input.size());
+ std::size_t current_sample = 0;
+ std::size_t samples_per_frame = 0;
+ while (true) {
+ current_sample += samples_per_frame;
+
+ // First, decode the header for this frame.
+ mad_header header;
+ mad_header_init(&header);
+ while (mad_header_decode(&header, &stream_) < 0) {
+ if (MAD_RECOVERABLE(stream_.error)) {
+ // Recoverable errors are usually malformed parts of the stream.
+ // We can recover from them by just retrying the decode.
+ continue;
+ } else {
+ // Don't bother checking for other errors; if the first part of the
+ // stream doesn't even contain a header then something's gone wrong.
+ return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
+ }
+ }
+
+ // Calculate samples per frame if we haven't already.
+ if (samples_per_frame == 0) {
+ samples_per_frame = 32 * MAD_NSBSAMPLES(&header);
+ }
+
+ // Work out how close we are to the target.
+ std::size_t samples_to_go = target_sample - current_sample;
+ std::size_t frames_to_go = samples_to_go / samples_per_frame;
+ if (frames_to_go > 3) {
+ // The target is far in the distance. Keep skipping through headers only.
+ continue;
+ }
+
+ // The target is within the next few frames. We should decode these, to give
+ // the decoder a chance to sync with the stream.
+ while (mad_frame_decode(&frame_, &stream_) < 0) {
+ if (MAD_RECOVERABLE(stream_.error)) {
+ continue;
+ }
+ if (stream_.error == MAD_ERROR_BUFLEN) {
+ return {GetInputPosition(), cpp::fail(Error::kOutOfInput)};
+ }
+ // The error is unrecoverable. Give up.
+ return {GetInputPosition(), cpp::fail(Error::kMalformedData)};
+ }
+
+ if (frames_to_go <= 1) {
+ // The target is within the next couple of frames. We should start
+ // synthesizing a frame early because this guy says so:
+ // https://lists.mars.org/hyperkitty/list/mad-dev@lists.mars.org/message/UZSHXZTIZEF7FZ4KFOR65DUCKAY2OCUT/
+ mad_synth_frame(&synth_, &frame_);
+ }
+
+ if (frames_to_go == 0) {
+ // The target is actually within this frame! Set up for the ContinueStream
+ // call.
+ current_sample_ =
+ (target_sample > current_sample) ? target_sample - current_sample : 0;
+ return {GetInputPosition(), {}};
+ }
+ }
}
} // namespace codecs
diff --git a/src/codecs/stbvorbis.cpp b/src/codecs/stbvorbis.cpp
new file mode 100644
index 00000000..de315416
--- /dev/null
+++ b/src/codecs/stbvorbis.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright 2023 jacqueline <me@jacqueline.id.au>
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+#include "stbvorbis.hpp"
+#include <stdint.h>
+
+#include <cstdint>
+#include <optional>
+
+#include "stb_vorbis.h"
+
+namespace codecs {
+
+StbVorbisDecoder::StbVorbisDecoder()
+ : vorbis_(nullptr),
+ current_sample_(-1),
+ num_channels_(0),
+ num_samples_(0),
+ samples_array_(NULL) {}
+
+StbVorbisDecoder::~StbVorbisDecoder() {
+ if (vorbis_ != nullptr) {
+ stb_vorbis_close(vorbis_);
+ }
+}
+
+static uint32_t scaleToBits(float sample, uint8_t bits) {
+ // Scale to range.
+ int32_t max_val = (1 << (bits - 1));
+ int32_t fixed_point = sample * max_val;
+
+ // Clamp within bounds.
+ fixed_point = std::clamp(fixed_point, -max_val, max_val);
+
+ // Remove sign.
+ return *reinterpret_cast<uint32_t*>(&fixed_point);
+}
+
+auto StbVorbisDecoder::BeginStream(const cpp::span<const std::byte> input)
+ -> Result<OutputFormat> {
+ if (vorbis_ != nullptr) {
+ stb_vorbis_close(vorbis_);
+ vorbis_ = nullptr;
+ }
+ current_sample_ = -1;
+ int bytes_read = 0;
+ int error = 0;
+ vorbis_ =
+ stb_vorbis_open_pushdata(reinterpret_cast<const uint8_t*>(input.data()),
+ input.size_bytes(), &bytes_read, &error, NULL);
+ if (error != 0) {
+ return {0, cpp::fail(Error::kMalformedData)};
+ }
+ stb_vorbis_info info = stb_vorbis_get_info(vorbis_);
+ return {bytes_read,
+ OutputFormat{.num_channels = static_cast<uint8_t>(info.channels),
+ .bits_per_sample = 24,
+ .sample_rate_hz = info.sample_rate}};
+}
+
+auto StbVorbisDecoder::ContinueStream(cpp::span<const std::byte> input,
+ cpp::span<std::byte> output)
+ -> Result<OutputInfo> {
+ std::size_t bytes_used = 0;
+ if (current_sample_ < 0) {
+ num_channels_ = 0;
+ num_samples_ = 0;
+ samples_array_ = NULL;
+
+ while (true) {
+ auto cropped = input.subspan(bytes_used);
+ std::size_t b = stb_vorbis_decode_frame_pushdata(
+ vorbis_, reinterpret_cast<const uint8_t*>(cropped.data()),
+ cropped.size_bytes(), &num_channels_, &samples_array_, &num_samples_);
+ if (b == 0) {
+ return {bytes_used, cpp::fail(Error::kOutOfInput)};
+ }
+ bytes_used += b;
+
+ if (num_samples_ == 0) {
+ // Decoder is synchronising. Decode more bytes.
+ continue;
+ }
+ if (num_channels_ == 0 || samples_array_ == NULL) {
+ // The decoder isn't satisfying its contract.
+ return {bytes_used, cpp::fail(Error::kInternalError)};
+ }
+ current_sample_ = 0;
+ break;
+ }
+ }
+
+ // We successfully decoded a frame. Time to write out the samples.
+ std::size_t output_byte = 0;
+ while (current_sample_ < num_samples_) {
+ if (output_byte + (2 * num_channels_) >= output.size()) {
+ return {0, OutputInfo{.bytes_written = output_byte,
+ .is_finished_writing = false}};
+ }
+
+ for (int channel = 0; channel < num_channels_; channel++) {
+ float raw_sample = samples_array_[channel][current_sample_];
+
+ uint16_t sample_24 = scaleToBits(raw_sample, 24);
+ output[output_byte++] = static_cast<std::byte>((sample_24 >> 16) & 0xFF);
+ output[output_byte++] = static_cast<std::byte>((sample_24 >> 8) & 0xFF);
+ output[output_byte++] = static_cast<std::byte>((sample_24)&0xFF);
+ // Pad to 32 bits for alignment.
+ output[output_byte++] = static_cast<std::byte>(0);
+ }
+ current_sample_++;
+ }
+
+ current_sample_ = -1;
+ return {bytes_used, OutputInfo{.bytes_written = output_byte,
+ .is_finished_writing = true}};
+}
+
+auto StbVorbisDecoder::SeekStream(cpp::span<const std::byte> input,
+ std::size_t target_sample) -> Result<void> {
+ // TODO(jacqueline): Implement me.
+ return {0, {}};
+}
+
+} // namespace codecs
diff --git a/src/database/tag_parser.cpp b/src/database/tag_parser.cpp
index 27d4163f..589c988f 100644
--- a/src/database/tag_parser.cpp
+++ b/src/database/tag_parser.cpp
@@ -96,6 +96,7 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
if (res != 0) {
// Parsing failed.
+ ESP_LOGE(kTag, "tag parsing failed, reason %d", res);
return false;
}
@@ -103,6 +104,15 @@ auto TagParserImpl::ReadAndParseTags(const std::string& path, SongTags* out)
case Fmp3:
out->encoding = Encoding::kMp3;
break;
+ case Fogg:
+ out->encoding = Encoding::kOgg;
+ break;
+ case Fflac:
+ out->encoding = Encoding::kFlac;
+ break;
+ case Fwav:
+ out->encoding = Encoding::kWav;
+ break;
default:
out->encoding = Encoding::kUnsupported;
}
diff --git a/src/tasks/tasks.cpp b/src/tasks/tasks.cpp
index b95d8e16..2477d8b9 100644
--- a/src/tasks/tasks.cpp
+++ b/src/tasks/tasks.cpp
@@ -39,7 +39,7 @@ auto AllocateStack() -> cpp::span<StackType_t>;
// amount of stack space.
template <>
auto AllocateStack<Type::kAudio>() -> cpp::span<StackType_t> {
- std::size_t size = 32 * 1024;
+ std::size_t size = 48 * 1024;
return {static_cast<StackType_t*>(heap_caps_malloc(size, MALLOC_CAP_DEFAULT)),
size};
}