From 5d437513d0eec0ceddd50f1a60c5abdba5da97b9 Mon Sep 17 00:00:00 2001 From: ayumi Date: Tue, 15 Apr 2025 03:15:16 +0200 Subject: Make WavPack seeking faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It turns out that “seeking to a first sample in a not–first block” is actually very common, because Tangara only seeks to exact seconds and the reference encoder tends to size blocks in a way that makes the first sample in a block likely be the sample that the firmware wants to seek to. --- src/codecs/wavpack.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/codecs/wavpack.cpp b/src/codecs/wavpack.cpp index 21865785..7990e4d6 100644 --- a/src/codecs/wavpack.cpp +++ b/src/codecs/wavpack.cpp @@ -73,7 +73,7 @@ auto WavPackDecoder::OpenStream(std::shared_ptr input, uint32_t offset) ); const auto rate = WavpackGetSampleRate(&wavpack_); if (offset && total && input_.get()->CanSeek()) { - const uint32_t want = offset * rate - 1; + const uint32_t want = offset * rate; if (total < want) { ESP_LOGE(kTag, "seeking: offset points beyond the end of the file"); return cpp::fail(Error::kInternalError); @@ -96,7 +96,11 @@ auto WavPackDecoder::OpenStream(std::shared_ptr input, uint32_t offset) } const uint32_t blockIndex = loadLe32(header + 16); const uint32_t blockSamples = loadLe32(header + 20); - if (want >= blockIndex && want <= blockIndex + blockSamples) { + if (want >= blockIndex && want == blockIndex + blockSamples) { + input_->SeekTo(size - 24, IStream::SeekFrom::kCurrentPosition); + target = 0; + break; + } else if (want >= blockIndex && want < blockIndex + blockSamples) { input_->SeekTo(-32, IStream::SeekFrom::kCurrentPosition); target = want - blockIndex; break; -- cgit v1.2.3 From 48556dd603cac0107143f3cdc815c765baa640a9 Mon Sep 17 00:00:00 2001 From: ayumi Date: Wed, 23 Apr 2025 22:21:25 +0200 Subject: Avoid branching up to two times per sample in the WavPack decoder. In my limited tests this improves decoding speed by around 3%. --- src/codecs/wavpack.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/codecs/wavpack.cpp b/src/codecs/wavpack.cpp index 7990e4d6..709302e6 100644 --- a/src/codecs/wavpack.cpp +++ b/src/codecs/wavpack.cpp @@ -154,8 +154,14 @@ auto WavPackDecoder::DecodeTo(std::span output) ESP_LOGE(kTag, "CRC error"); return cpp::fail(Error::kMalformedData); } - for (size_t i = 0; i < samples; i++) - output[i] = sample::FromSigned(buf_[i], bitdepth_); + if (bitdepth_ == 16) + for (size_t i = 0; i < samples; i++) + output[i] = buf_[i]; + else if (bitdepth_ > 16) + for (size_t i = 0; i < samples; i++) + output[i] = sample::shiftWithDither(buf_[i], bitdepth_ - 16); + else for (size_t i = 0; i < samples; i++) + output[i] = buf_[i] << (16 - bitdepth_); return OutputInfo{ .samples_written = samples, .is_stream_finished = samples == 0, -- cgit v1.2.3 From fb044d5ccae7cead4c2d2fda3f729fbfc1737005 Mon Sep 17 00:00:00 2001 From: ayumi Date: Wed, 23 Apr 2025 23:09:36 +0200 Subject: Move WavPack decoder to IRAM In my tests this improves the decoding speed by around 3%. --- lib/wavpack/CMakeLists.txt | 1 + lib/wavpack/wavpack.lf | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 lib/wavpack/wavpack.lf diff --git a/lib/wavpack/CMakeLists.txt b/lib/wavpack/CMakeLists.txt index 98fcda95..f2f24933 100644 --- a/lib/wavpack/CMakeLists.txt +++ b/lib/wavpack/CMakeLists.txt @@ -1,4 +1,5 @@ idf_component_register( SRCS bits.c float.c wputils.c metadata.c unpack.c words.c INCLUDE_DIRS . + LDFRAGMENTS wavpack.lf ) diff --git a/lib/wavpack/wavpack.lf b/lib/wavpack/wavpack.lf new file mode 100644 index 00000000..686acad8 --- /dev/null +++ b/lib/wavpack/wavpack.lf @@ -0,0 +1,4 @@ +[mapping:libwavpack] +archive: libwavpack.a +entries: + * (noflash) -- cgit v1.2.3 From f656c9f5cbfa8515dab287077b10f6769c6e66bc Mon Sep 17 00:00:00 2001 From: ayumi Date: Wed, 23 Apr 2025 23:11:45 +0200 Subject: Utilise more than one bit of the entropy returned by komihash before requesting more --- src/codecs/sample.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/codecs/sample.cpp b/src/codecs/sample.cpp index 63d14203..faf8b0f9 100644 --- a/src/codecs/sample.cpp +++ b/src/codecs/sample.cpp @@ -13,13 +13,17 @@ namespace sample { -static uint64_t sSeed1{0}; -static uint64_t sSeed2{0}; - auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample { // FIXME: Use a better dither. - int16_t noise = static_cast(komirand(&sSeed1, &sSeed2) & 1); - return (src >> bits) ^ noise; + static uint64_t sSeed1{0}; + static uint64_t sSeed2{0}; + static uint64_t noise; + static uint_fast8_t pos = 0; + if (pos++ % 64 == 0) + noise = komirand(&sSeed1, &sSeed2); + else + noise >>= 1; + return (src >> bits) ^ (noise & 1); } } // namespace sample -- cgit v1.2.3 From 2b4ed254c60c576fc6889cf6e3b2cc2068e29fbf Mon Sep 17 00:00:00 2001 From: ayumi Date: Wed, 23 Apr 2025 23:17:14 +0200 Subject: Move komihash to IRAM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Improves WavPack decoding speed by up to 50% in some cases and I’m not sure why this seem to matter so much. --- lib/komihash/include/komihash.h | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/lib/komihash/include/komihash.h b/lib/komihash/include/komihash.h index 7a72fda8..4f6b0f25 100644 --- a/lib/komihash/include/komihash.h +++ b/lib/komihash/include/komihash.h @@ -35,6 +35,8 @@ #include #include +#include "esp_attr.h" + // Macros that apply byte-swapping. #if defined( __GNUC__ ) || defined( __clang__ ) @@ -142,7 +144,7 @@ * @return Endianness-corrected 32-bit value from memory. */ -static inline uint32_t kh_lu32ec( const uint8_t* const p ) +static IRAM_ATTR inline uint32_t kh_lu32ec( const uint8_t* const p ) { uint32_t v; memcpy( &v, p, 4 ); @@ -160,7 +162,7 @@ static inline uint32_t kh_lu32ec( const uint8_t* const p ) * @return Endianness-corrected 64-bit value from memory. */ -static inline uint64_t kh_lu64ec( const uint8_t* const p ) +static IRAM_ATTR inline uint64_t kh_lu64ec( const uint8_t* const p ) { uint64_t v; memcpy( &v, p, 8 ); @@ -179,7 +181,7 @@ static inline uint64_t kh_lu64ec( const uint8_t* const p ) * @return Final byte-padded value from the message. */ -static inline uint64_t kh_lpu64ec_l3( const uint8_t* const Msg, +static IRAM_ATTR inline uint64_t kh_lpu64ec_l3( const uint8_t* const Msg, const size_t MsgLen ) { const int ml8 = -(int) ( MsgLen * 8 ); @@ -211,7 +213,7 @@ static inline uint64_t kh_lpu64ec_l3( const uint8_t* const Msg, * @return Final byte-padded value from the message. */ -static inline uint64_t kh_lpu64ec_nz( const uint8_t* const Msg, +static IRAM_ATTR inline uint64_t kh_lpu64ec_nz( const uint8_t* const Msg, const size_t MsgLen ) { const int ml8 = -(int) ( MsgLen * 8 ); @@ -252,7 +254,7 @@ static inline uint64_t kh_lpu64ec_nz( const uint8_t* const Msg, * @return Final byte-padded value from the message. */ -static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg, +static IRAM_ATTR inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg, const size_t MsgLen ) { const int ml8 = -(int) ( MsgLen * 8 ); @@ -280,7 +282,7 @@ static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg, * @param[out] rh The higher half of the 128-bit result. */ - static inline void kh_m128( const uint64_t m1, const uint64_t m2, + static IRAM_ATTR inline void kh_m128( const uint64_t m1, const uint64_t m2, uint64_t* const rl, uint64_t* const rh ) { const __uint128_t r = (__uint128_t) m1 * m2; @@ -293,7 +295,7 @@ static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg, #include - static inline void kh_m128( const uint64_t m1, const uint64_t m2, + static IRAM_ATTR inline void kh_m128( const uint64_t m1, const uint64_t m2, uint64_t* const rl, uint64_t* const rh ) { *rl = _umul128( m1, m2, rh ); @@ -305,12 +307,12 @@ static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg, // from https://go.dev/src/runtime/softfloat64.go // Licensed under BSD-style license. - static inline uint64_t kh__emulu( const uint32_t x, const uint32_t y ) + static IRAM_ATTR inline uint64_t kh__emulu( const uint32_t x, const uint32_t y ) { return( x * (uint64_t) y ); } - static inline void kh_m128( const uint64_t u, const uint64_t v, + static IRAM_ATTR inline void kh_m128( const uint64_t u, const uint64_t v, uint64_t* const rl, uint64_t* const rh ) { *rl = u * v; @@ -406,7 +408,7 @@ static inline uint64_t kh_lpu64ec_l4( const uint8_t* const Msg, * @return 64-bit hash value. */ -static inline uint64_t komihash_epi( const uint8_t* Msg, size_t MsgLen, +static IRAM_ATTR inline uint64_t komihash_epi( const uint8_t* Msg, size_t MsgLen, uint64_t Seed1, uint64_t Seed5 ) { uint64_t r1h, r2h; @@ -463,7 +465,7 @@ static inline uint64_t komihash_epi( const uint8_t* Msg, size_t MsgLen, * @return 64-bit hash of the input data. */ -static inline uint64_t komihash( const void* const Msg0, size_t MsgLen, +static IRAM_ATTR inline uint64_t komihash( const void* const Msg0, size_t MsgLen, const uint64_t UseSeed ) { const uint8_t* Msg = (const uint8_t*) Msg0; @@ -571,7 +573,7 @@ static inline uint64_t komihash( const void* const Msg0, size_t MsgLen, * @return The next uniformly-random 64-bit value. */ -static inline uint64_t komirand( uint64_t* const Seed1, uint64_t* const Seed2 ) +static IRAM_ATTR inline uint64_t komirand( uint64_t* const Seed1, uint64_t* const Seed2 ) { uint64_t rh; @@ -615,7 +617,7 @@ typedef struct { * little-endian systems. */ -static inline void komihash_stream_init( komihash_stream_t* const ctx, +static IRAM_ATTR inline void komihash_stream_init( komihash_stream_t* const ctx, const uint64_t UseSeed ) { ctx -> Seed[ 0 ] = UseSeed; @@ -633,7 +635,7 @@ static inline void komihash_stream_init( komihash_stream_t* const ctx, * @param MsgLen Message's length, in bytes, can be zero. */ -static inline void komihash_stream_update( komihash_stream_t* const ctx, +static IRAM_ATTR inline void komihash_stream_update( komihash_stream_t* const ctx, const void* const Msg0, size_t MsgLen ) { const uint8_t* Msg = (const uint8_t*) Msg0; @@ -772,7 +774,7 @@ static inline void komihash_stream_update( komihash_stream_t* const ctx, * @return 64-bit hash value. */ -static inline uint64_t komihash_stream_final( komihash_stream_t* const ctx ) +static IRAM_ATTR inline uint64_t komihash_stream_final( komihash_stream_t* const ctx ) { const uint8_t* Msg = ctx -> Buf; size_t MsgLen = ctx -> BufFill; @@ -817,7 +819,7 @@ static inline uint64_t komihash_stream_final( komihash_stream_t* const ctx ) * @return 64-bit hash value. */ -static inline uint64_t komihash_stream_oneshot( const void* const Msg, +static IRAM_ATTR inline uint64_t komihash_stream_oneshot( const void* const Msg, const size_t MsgLen, const uint64_t UseSeed ) { komihash_stream_t ctx; -- cgit v1.2.3