diff options
| author | jacqueline <me@jacqueline.id.au> | 2023-08-10 19:12:38 +1000 |
|---|---|---|
| committer | jacqueline <me@jacqueline.id.au> | 2023-08-10 19:12:38 +1000 |
| commit | 958160aa545e3d91b2a4f1a367817e73d298e8a9 (patch) | |
| tree | 190e6591a6dda1f0d9651c7e127666ead2a3373b /src | |
| parent | d8fc77101dcf80a3643a00b3446dca1e390ce997 (diff) | |
| download | tangara-fw-958160aa545e3d91b2a4f1a367817e73d298e8a9.tar.gz | |
Use the libspeexdsp resampler
AFAICT it runs a little slower? but it's fixed point, and has much
better understood audio characteristics.
Diffstat (limited to 'src')
| -rw-r--r-- | src/app_console/app_console.cpp | 2 | ||||
| -rw-r--r-- | src/audio/CMakeLists.txt | 3 | ||||
| -rw-r--r-- | src/audio/audio_task.cpp | 2 | ||||
| -rw-r--r-- | src/audio/fatfs_source.cpp | 4 | ||||
| -rw-r--r-- | src/audio/include/resample.hpp | 21 | ||||
| -rw-r--r-- | src/audio/resample.cpp | 181 | ||||
| -rw-r--r-- | src/audio/sink_mixer.cpp | 41 | ||||
| -rw-r--r-- | src/codecs/foxenflac.cpp | 12 | ||||
| -rw-r--r-- | src/codecs/include/sample.hpp | 35 | ||||
| -rw-r--r-- | src/codecs/opus.cpp | 19 | ||||
| -rw-r--r-- | src/codecs/vorbis.cpp | 18 |
11 files changed, 85 insertions, 253 deletions
diff --git a/src/app_console/app_console.cpp b/src/app_console/app_console.cpp index 8686ac58..30b7d2dc 100644 --- a/src/app_console/app_console.cpp +++ b/src/app_console/app_console.cpp @@ -403,7 +403,7 @@ int CmdTaskStats(int argc, char** argv) { str << "\t\t"; } - str << std::fixed << std::setprecision(1) << time_percent * 100; + str << std::fixed << std::setprecision(1) << (time_percent * 100); str << "%"; info_strings.push_back({run_time, str.str()}); diff --git a/src/audio/CMakeLists.txt b/src/audio/CMakeLists.txt index 02e84c3f..2d332a1e 100644 --- a/src/audio/CMakeLists.txt +++ b/src/audio/CMakeLists.txt @@ -8,6 +8,7 @@ idf_component_register( "stream_event.cpp" "stream_info.cpp" "audio_fsm.cpp" "sink_mixer.cpp" "resample.cpp" "fatfs_source.cpp" INCLUDE_DIRS "include" - REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm" "database" "system_fsm" "playlist") + REQUIRES "codecs" "drivers" "cbor" "result" "tasks" "span" "memory" "tinyfsm" + "database" "system_fsm" "playlist" "speexdsp") target_compile_options(${COMPONENT_LIB} PRIVATE ${EXTRA_WARNINGS}) diff --git a/src/audio/audio_task.cpp b/src/audio/audio_task.cpp index d880e6b1..797ab7f9 100644 --- a/src/audio/audio_task.cpp +++ b/src/audio/audio_task.cpp @@ -165,7 +165,7 @@ auto AudioTask::BeginDecoding(std::shared_ptr<codecs::IStream> stream) -> bool { current_sink_format_ = IAudioSink::Format{ .sample_rate = open_res->sample_rate_hz, .num_channels = open_res->num_channels, - .bits_per_sample = 32, + .bits_per_sample = 16, }; ESP_LOGI(kTag, "stream started ok"); events::Audio().Dispatch(internal::InputFileOpened{}); diff --git a/src/audio/fatfs_source.cpp b/src/audio/fatfs_source.cpp index 6a9aea47..58986648 100644 --- a/src/audio/fatfs_source.cpp +++ b/src/audio/fatfs_source.cpp @@ -31,7 +31,6 @@ FatfsSource::~FatfsSource() { auto FatfsSource::Read(cpp::span<std::byte> dest) -> ssize_t { if (f_eof(file_.get())) { - ESP_LOGI(kTag, "read from empty file"); return 0; } UINT bytes_read = 0; @@ -40,8 +39,6 @@ auto FatfsSource::Read(cpp::span<std::byte> dest) -> ssize_t { ESP_LOGE(kTag, "error reading from file"); return -1; } - ESP_LOGI(kTag, "read %u bytes into %p (%u)", bytes_read, dest.data(), - dest.size_bytes()); return bytes_read; } @@ -50,7 +47,6 @@ auto FatfsSource::CanSeek() -> bool { } auto FatfsSource::SeekTo(int64_t destination, SeekFrom from) -> void { - ESP_LOGI(kTag, "seeking to %llu", destination); switch (from) { case SeekFrom::kStartOfStream: f_lseek(file_.get(), destination); diff --git a/src/audio/include/resample.hpp b/src/audio/include/resample.hpp index 3855415a..7b114f59 100644 --- a/src/audio/include/resample.hpp +++ b/src/audio/include/resample.hpp @@ -4,6 +4,7 @@ #include <vector> #include "span.hpp" +#include "speex/speex_resampler.h" #include "sample.hpp" @@ -17,28 +18,14 @@ class Resampler { ~Resampler(); - auto source_sample_rate() -> uint32_t { return source_sample_rate_; } - auto target_sample_rate() -> uint32_t { return target_sample_rate_; } - auto channels() -> uint_fast8_t { return num_channels_; } - - auto Process(cpp::span<const sample::Sample> input, + auto Process(cpp::span<sample::Sample> input, cpp::span<sample::Sample> output, bool end_of_data) -> std::pair<size_t, size_t>; private: - auto Subsample(int channel) -> float; - auto ApplyFilter(cpp::span<float> filter, cpp::span<float> input) -> float; - - uint32_t source_sample_rate_; - uint32_t target_sample_rate_; - float factor_; + int err_; + SpeexResamplerState* resampler_; uint8_t num_channels_; - - std::vector<float*> channel_buffers_; - size_t channel_buffer_size_; - - float output_offset_; - int32_t input_index_; }; } // namespace audio
\ No newline at end of file diff --git a/src/audio/resample.cpp b/src/audio/resample.cpp index 430a6a26..bc2c7c51 100644 --- a/src/audio/resample.cpp +++ b/src/audio/resample.cpp @@ -23,183 +23,42 @@ #include "esp_log.h" #include "sample.hpp" +#include "speex/speex_resampler.h" #include "stream_info.hpp" namespace audio { -static constexpr double kLowPassRatio = 0.5; -static constexpr size_t kNumFilters = 64; -static constexpr size_t kFilterSize = 16; - -typedef std::array<float, kFilterSize> Filter; -static std::array<Filter, kNumFilters + 1> sFilters{}; -static bool sFiltersInitialised = false; - -auto InitFilter(int index) -> void; +static constexpr int kQuality = SPEEX_RESAMPLER_QUALITY_MIN; Resampler::Resampler(uint32_t source_sample_rate, uint32_t target_sample_rate, uint8_t num_channels) - : source_sample_rate_(source_sample_rate), - target_sample_rate_(target_sample_rate), - factor_(static_cast<double>(target_sample_rate) / - static_cast<double>(source_sample_rate)), + : err_(0), + resampler_(speex_resampler_init(num_channels, + source_sample_rate, + target_sample_rate, + kQuality, + &err_)), num_channels_(num_channels) { - channel_buffers_.resize(num_channels); - channel_buffer_size_ = kFilterSize * 16; - - for (int i = 0; i < num_channels; i++) { - channel_buffers_[i] = - static_cast<float*>(calloc(sizeof(float), channel_buffer_size_)); - } - - output_offset_ = kFilterSize / 2.0f; - input_index_ = kFilterSize; - - if (!sFiltersInitialised) { - sFiltersInitialised = true; - for (int i = 0; i < kNumFilters + 1; i++) { - InitFilter(i); - } - } + assert(err_ == 0); } -Resampler::~Resampler() {} +Resampler::~Resampler() { + speex_resampler_destroy(resampler_); +} -auto Resampler::Process(cpp::span<const sample::Sample> input, +auto Resampler::Process(cpp::span<sample::Sample> input, cpp::span<sample::Sample> output, bool end_of_data) -> std::pair<size_t, size_t> { - size_t samples_used = 0; - size_t samples_produced = 0; - - size_t input_frames = input.size() / num_channels_; - size_t output_frames = output.size() / num_channels_; - - int half_taps = kFilterSize / 2; - while (output_frames > 0) { - if (output_offset_ >= input_index_ - half_taps) { - if (input_frames > 0) { - // Check whether the channel buffers will overflow with the addition of - // this sample. If so, we need to move the remaining contents back to - // the beginning of the buffer. - if (input_index_ == channel_buffer_size_) { - for (int i = 0; i < num_channels_; ++i) { - memmove(channel_buffers_[i], - channel_buffers_[i] + channel_buffer_size_ - kFilterSize, - kFilterSize * sizeof(float)); - } - - output_offset_ -= channel_buffer_size_ - kFilterSize; - input_index_ -= channel_buffer_size_ - kFilterSize; - } - - for (int i = 0; i < num_channels_; ++i) { - channel_buffers_[i][input_index_] = - sample::ToFloat(input[samples_used++]); - } - - input_index_++; - input_frames--; - } else { - break; - } - } else { - for (int i = 0; i < num_channels_; i++) { - output[samples_produced++] = sample::FromFloat(Subsample(i)); - } - - // NOTE: floating point division here is potentially slow due to FPU - // limitations. Consider explicitly bunding the xtensa libgcc divsion via - // reciprocal implementation if we care about portability between - // compilers. - output_offset_ += 1.0f / factor_; - output_frames--; - } - } - - return {samples_used, samples_produced}; -} - -/* - * Constructs the filter in-place for the given index of sFilters. This only - * needs to be done once, per-filter. 64-bit math is okay here, because filters - * will not be initialised within a performance critical path. - */ -auto InitFilter(int index) -> void { - Filter& filter = sFilters[index]; - std::array<double, kFilterSize> working_buffer{}; + uint32_t samples_used = input.size() / num_channels_; + uint32_t samples_produced = output.size() / num_channels_; - double fraction = index / static_cast<double>(kNumFilters); - double filter_sum = 0.0; - - for (int i = 0; i < kFilterSize; ++i) { - // "dist" is the absolute distance from the sinc maximum to the filter tap - // to be calculated, in radians. - double dist = fabs((kFilterSize / 2.0 - 1.0) + fraction - i) * M_PI; - // "ratio" is that distance divided by half the tap count such that it - // reaches π at the window extremes - double ratio = dist / (kFilterSize / 2.0); - - double value; - if (dist != 0.0) { - value = sin(dist * kLowPassRatio) / (dist * kLowPassRatio); - - // Hann window. We could alternatively use a Blackman Harris window, - // however our unusually small filter size makes the Hann window's - // steeper cutoff more important. - value *= 0.5 * (1.0 + cos(ratio)); - } else { - value = 1.0; - } - - working_buffer[i] = value; - filter_sum += value; - } - - // Filter should have unity DC gain - double scaler = 1.0 / filter_sum; - double error = 0.0; - - for (int i = kFilterSize / 2; i < kFilterSize; - i = kFilterSize - i - (i >= kFilterSize / 2)) { - working_buffer[i] *= scaler; - filter[i] = working_buffer[i] - error; - error += static_cast<double>(filter[i]) - working_buffer[i]; - } -} - -/* - * Performs sub-sampling with interpolation for the given channel. Assumes that - * the channel buffer has already been filled with samples. - */ -auto Resampler::Subsample(int channel) -> float { - cpp::span<float> source{channel_buffers_[channel], channel_buffer_size_}; - - int offset_integral = std::floor(output_offset_); - source = source.subspan(offset_integral); - float offset_fractional = output_offset_ - offset_integral; - - offset_fractional *= kNumFilters; - int filter_index = std::floor(offset_fractional); - - float sum1 = ApplyFilter(sFilters[filter_index], - {source.data() - kFilterSize / 2 + 1, kFilterSize}); - - offset_fractional -= filter_index; - - float sum2 = ApplyFilter(sFilters[filter_index + 1], - {source.data() - kFilterSize / 2 + 1, kFilterSize}); - - return (sum2 * offset_fractional) + (sum1 * (1.0f - offset_fractional)); -} + int err = speex_resampler_process_interleaved_int( + resampler_, input.data(), &samples_used, output.data(), + &samples_produced); + assert(err == 0); -auto Resampler::ApplyFilter(cpp::span<float> filter, cpp::span<float> input) - -> float { - float sum = 0.0; - for (int i = 0; i < kFilterSize; i++) { - sum += filter[i] * input[i]; - } - return sum; + return {samples_used * num_channels_, samples_produced * num_channels_}; } } // namespace audio diff --git a/src/audio/sink_mixer.cpp b/src/audio/sink_mixer.cpp index 9f973d4b..5e712582 100644 --- a/src/audio/sink_mixer.cpp +++ b/src/audio/sink_mixer.cpp @@ -47,10 +47,7 @@ SinkMixer::SinkMixer(IAudioSink* sink) kSampleBufferLength, sizeof(sample::Sample), MALLOC_CAP_SPIRAM)), kSampleBufferLength}; - // Pin to CORE0 because we need the FPU. - // FIXME: A fixed point implementation could run freely on either core, - // which should lead to a big performance increase. - tasks::StartPersistent<tasks::Type::kMixer>(0, [&]() { Main(); }); + tasks::StartPersistent<tasks::Type::kMixer>([&]() { Main(); }); } SinkMixer::~SinkMixer() { @@ -100,7 +97,6 @@ auto SinkMixer::Main() -> void { vTaskDelay(pdMS_TO_TICKS(10)); } - ESP_LOGI(kTag, "configuring sink"); sink_->Configure(new_target); } target_format_ = new_target; @@ -136,6 +132,7 @@ auto SinkMixer::Main() -> void { // bytes we read were half a frame. Either way, we need to calculate the // size of the remainder in bytes. size_t bytes_used = samples_used * sizeof(sample::Sample); + assert(bytes_used <= bytes_in_buffer); leftover_bytes_ = bytes_in_buffer - bytes_used; if (leftover_bytes_ == 0) { leftover_offset_ = 0; @@ -157,20 +154,22 @@ auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos) } size_t samples_used = 0; - while (input.size() < samples_used) { + while (samples_used < input.size()) { cpp::span<sample::Sample> output_source; if (source_format_.sample_rate != target_format_.sample_rate) { if (resampler_ == nullptr) { - ESP_LOGI(kTag, "creating new resampler"); + ESP_LOGI(kTag, "creating new resampler for %lu -> %lu", + source_format_.sample_rate, target_format_.sample_rate); resampler_.reset(new Resampler(source_format_.sample_rate, target_format_.sample_rate, source_format_.num_channels)); } size_t read, written; - std::tie(read, written) = - resampler_->Process(input, resampled_buffer_, is_eos); + std::tie(read, written) = resampler_->Process(input.subspan(samples_used), + resampled_buffer_, is_eos); samples_used += read; + if (read == 0 && written == 0) { // Zero samples used or written. We need more input. break; @@ -181,20 +180,22 @@ auto SinkMixer::HandleSamples(cpp::span<sample::Sample> input, bool is_eos) samples_used = input.size(); } - if (target_format_.bits_per_sample == 16) { - // FIXME: The source should have some kind of hint indicating whether it - // needs dither, since some codecs (e.g. opus) apply their own dither. - ApplyDither(output_source, 16); - - cpp::span<int16_t> dest{reinterpret_cast<int16_t*>(output_source.data()), - output_source.size()}; - for (size_t i = 0; i < output_source.size(); i++) { - dest[i] = sample::ToSigned16Bit(output_source[i]); - } + /* + if (target_format_.bits_per_sample == 16) { + // FIXME: The source should have some kind of hint indicating whether it + // needs dither, since some codecs (e.g. opus) apply their own dither. + ApplyDither(output_source, 16); - output_source = output_source.first(output_source.size() / 2); + cpp::span<int16_t> dest{reinterpret_cast<int16_t*>(output_source.data()), + output_source.size()}; + for (size_t i = 0; i < output_source.size(); i++) { + dest[i] = sample::ToSigned16Bit(output_source[i]); } + output_source = output_source.first(output_source.size() / 2); + } + */ + size_t bytes_sent = 0; size_t bytes_to_send = output_source.size_bytes(); while (bytes_sent < bytes_to_send) { diff --git a/src/codecs/foxenflac.cpp b/src/codecs/foxenflac.cpp index cc110920..eef8225a 100644 --- a/src/codecs/foxenflac.cpp +++ b/src/codecs/foxenflac.cpp @@ -74,19 +74,25 @@ auto FoxenFlacDecoder::DecodeTo(cpp::span<sample::Sample> output) -> cpp::result<OutputInfo, Error> { bool is_eof = buffer_.Refill(input_.get()); - fx_flac_state_t state; - uint32_t samples_written = output.size(); + cpp::span<int32_t> output32{reinterpret_cast<int32_t*>(output.data()), + output.size() / 2}; + uint32_t samples_written = output32.size(); + fx_flac_state_t state; buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t { uint32_t bytes_read = buf.size_bytes(); state = fx_flac_process(flac_, reinterpret_cast<const uint8_t*>(buf.data()), - &bytes_read, output.data(), &samples_written); + &bytes_read, output32.data(), &samples_written); return bytes_read; }); if (state == FLAC_ERR) { return cpp::fail(Error::kMalformedData); } + for (size_t i = 0; i < samples_written; i++) { + output[i] = output32[i] >> 16; + } + return OutputInfo{.samples_written = samples_written, .is_stream_finished = samples_written == 0 && is_eof}; } diff --git a/src/codecs/include/sample.hpp b/src/codecs/include/sample.hpp index f8e08cdc..ea3a7ffc 100644 --- a/src/codecs/include/sample.hpp +++ b/src/codecs/include/sample.hpp @@ -9,38 +9,43 @@ namespace sample { // A signed, 32-bit PCM sample. -typedef int32_t Sample; +typedef int16_t Sample; constexpr auto Clip(int64_t v) -> Sample { - if (v > INT32_MAX) - return INT32_MAX; - if (v < INT32_MIN) - return INT32_MIN; + if (v > INT16_MAX) + return INT16_MAX; + if (v < INT16_MIN) + return INT16_MIN; return v; } constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample { - // Left-align samples, effectively scaling them up to 32 bits. - return src << (sizeof(Sample) * 8 - bits); + if (bits > 16) { + // Left-align samples, effectively scaling them up to 32 bits. + return src << (sizeof(Sample) * 8 - bits); + } else if (bits < 16) { + return src << (bits - (sizeof(Sample) * 8)); + } + return src; } constexpr auto FromUnsigned(uint32_t src, uint_fast8_t bits) -> Sample { // Left-align, then substract the max value / 2 to make the sample centred // around zero. - return (src << (sizeof(uint32_t) * 8 - bits)) - (~0UL >> 1); + return (src << (sizeof(uint16_t) * 8 - bits)) - (~0UL >> 1); } constexpr auto FromFloat(float src) -> Sample { - return std::clamp<float>(src, -1.0f, 1.0f) * static_cast<float>(INT32_MAX); + return std::clamp<float>(src, -1.0f, 1.0f) * static_cast<float>(INT16_MAX); } constexpr auto FromDouble(double src) -> Sample { - return std::clamp<double>(src, -1.0, 1.0) * static_cast<double>(INT32_MAX); + return std::clamp<double>(src, -1.0, 1.0) * static_cast<double>(INT16_MAX); } constexpr auto FromMad(mad_fixed_t src) -> Sample { // Round the bottom bits. - src += (1L << (MAD_F_FRACBITS - 24)); + src += (1L << (MAD_F_FRACBITS - 16)); // Clip the leftover bits to within range. if (src >= MAD_F_ONE) @@ -49,14 +54,10 @@ constexpr auto FromMad(mad_fixed_t src) -> Sample { src = -MAD_F_ONE; // Quantize. - return FromSigned(src >> (MAD_F_FRACBITS + 1 - 24), 24); + return FromSigned(src >> (MAD_F_FRACBITS + 1 - 16), 16); } -constexpr auto ToSigned16Bit(Sample src) -> int16_t { - return src >> 16; -} - -static constexpr float kFactor = 1.0f / static_cast<float>(INT32_MAX); +static constexpr float kFactor = 1.0f / static_cast<float>(INT16_MAX); constexpr auto ToFloat(Sample src) -> float { return src * kFactor; diff --git a/src/codecs/opus.cpp b/src/codecs/opus.cpp index 70ec9e45..c0727c6b 100644 --- a/src/codecs/opus.cpp +++ b/src/codecs/opus.cpp @@ -122,35 +122,24 @@ auto XiphOpusDecoder::OpenStream(std::shared_ptr<IStream> input) return cpp::fail(Error::kMalformedData); } - num_channels_ = std::min<uint8_t>(2, op_channel_count(opus_, -1)); - return OutputFormat{ - .num_channels = num_channels_, + .num_channels = 2, .sample_rate_hz = 48000, }; } auto XiphOpusDecoder::DecodeTo(cpp::span<sample::Sample> output) -> cpp::result<OutputInfo, Error> { - cpp::span<int16_t> staging_buffer{ - reinterpret_cast<int16_t*>(output.subspan(output.size() / 2).data()), - output.size_bytes() / 2}; - - int samples_written = - op_read_stereo(opus_, staging_buffer.data(), staging_buffer.size()); + int samples_written = op_read_stereo(opus_, output.data(), output.size()); if (samples_written < 0) { ESP_LOGE(kTag, "read failed %i", samples_written); return cpp::fail(Error::kMalformedData); } - samples_written *= num_channels_; - for (int i = 0; i < samples_written; i++) { - output[i] = sample::FromSigned(staging_buffer[i], 16); - } - + samples_written *= 2; // Fixed to stereo return OutputInfo{ - .samples_written = static_cast<size_t>(samples_written / 2), + .samples_written = static_cast<size_t>(samples_written), .is_stream_finished = samples_written == 0, }; } diff --git a/src/codecs/vorbis.cpp b/src/codecs/vorbis.cpp index 6fa3256a..aa367e02 100644 --- a/src/codecs/vorbis.cpp +++ b/src/codecs/vorbis.cpp @@ -126,14 +126,9 @@ auto TremorVorbisDecoder::OpenStream(std::shared_ptr<IStream> input) auto TremorVorbisDecoder::DecodeTo(cpp::span<sample::Sample> output) -> cpp::result<OutputInfo, Error> { - cpp::span<int16_t> staging_buffer{ - reinterpret_cast<int16_t*>(output.subspan(output.size() / 2).data()), - output.size_bytes() / 2}; - - int bitstream; - long bytes_written = - ov_read(&vorbis_, reinterpret_cast<char*>(staging_buffer.data()), - staging_buffer.size_bytes(), &bitstream); + int bitstream = 0; + long bytes_written = ov_read(&vorbis_, reinterpret_cast<char*>(output.data()), + output.size_bytes(), &bitstream); if (bytes_written == OV_HOLE) { ESP_LOGE(kTag, "got OV_HOLE"); return cpp::fail(Error::kMalformedData); @@ -142,12 +137,9 @@ auto TremorVorbisDecoder::DecodeTo(cpp::span<sample::Sample> output) return cpp::fail(Error::kMalformedData); } - for (int i = 0; i < bytes_written / 2; i++) { - output[i] = sample::FromSigned(staging_buffer[i], 16); - } - return OutputInfo{ - .samples_written = static_cast<size_t>(bytes_written / 2), + .samples_written = + static_cast<size_t>(bytes_written / sizeof(sample::Sample)), .is_stream_finished = bytes_written == 0, }; } |
