diff options
Diffstat (limited to 'src/codecs')
| -rw-r--r-- | src/codecs/CMakeLists.txt | 4 | ||||
| -rw-r--r-- | src/codecs/codec.cpp | 5 | ||||
| -rw-r--r-- | src/codecs/include/codec.hpp | 4 | ||||
| -rw-r--r-- | src/codecs/include/sample.hpp | 6 | ||||
| -rw-r--r-- | src/codecs/include/types.hpp | 2 | ||||
| -rw-r--r-- | src/codecs/include/wav.hpp | 57 | ||||
| -rw-r--r-- | src/codecs/sample.cpp | 7 | ||||
| -rw-r--r-- | src/codecs/source_buffer.cpp | 2 | ||||
| -rw-r--r-- | src/codecs/wav.cpp | 259 |
9 files changed, 331 insertions, 15 deletions
diff --git a/src/codecs/CMakeLists.txt b/src/codecs/CMakeLists.txt index b8e0bbca..eb1897da 100644 --- a/src/codecs/CMakeLists.txt +++ b/src/codecs/CMakeLists.txt @@ -4,9 +4,9 @@ idf_component_register( SRCS "codec.cpp" "mad.cpp" "miniflac.cpp" "opus.cpp" "vorbis.cpp" - "source_buffer.cpp" "sample.cpp" + "source_buffer.cpp" "sample.cpp" "wav.cpp" INCLUDE_DIRS "include" - REQUIRES "result" "span" "libmad" "miniflac" "tremor" "opusfile" "memory" + REQUIRES "result" "span" "libmad" "miniflac" "tremor" "opusfile" "memory" "util" "komihash") target_compile_options("${COMPONENT_LIB}" PRIVATE ${EXTRA_WARNINGS}) diff --git a/src/codecs/codec.cpp b/src/codecs/codec.cpp index d81d4b05..7bc591aa 100644 --- a/src/codecs/codec.cpp +++ b/src/codecs/codec.cpp @@ -14,6 +14,7 @@ #include "opus.hpp" #include "types.hpp" #include "vorbis.hpp" +#include "wav.hpp" namespace codecs { @@ -21,7 +22,7 @@ auto StreamTypeToString(StreamType t) -> std::string { switch (t) { case StreamType::kMp3: return "Mp3"; - case StreamType::kPcm: + case StreamType::kWav: return "Wav"; case StreamType::kVorbis: return "Vorbis"; @@ -44,6 +45,8 @@ auto CreateCodecForType(StreamType type) -> std::optional<ICodec*> { return new MiniFlacDecoder(); case StreamType::kOpus: return new XiphOpusDecoder(); + case StreamType::kWav: + return new WavDecoder(); default: return {}; } diff --git a/src/codecs/include/codec.hpp b/src/codecs/include/codec.hpp index 87f6637c..36dda8ff 100644 --- a/src/codecs/include/codec.hpp +++ b/src/codecs/include/codec.hpp @@ -76,6 +76,8 @@ class ICodec { kOutOfInput, // Indicates that the data within the input buffer is fatally malformed. kMalformedData, + // Indicated that the format is unsupported + kUnsupportedFormat, kInternalError, }; @@ -88,6 +90,8 @@ class ICodec { return "malformed data"; case Error::kInternalError: return "internal error"; + case Error::kUnsupportedFormat: + return "unsupported format"; } return "uhh"; } diff --git a/src/codecs/include/sample.hpp b/src/codecs/include/sample.hpp index 7e550680..7b3f96a3 100644 --- a/src/codecs/include/sample.hpp +++ b/src/codecs/include/sample.hpp @@ -24,10 +24,6 @@ namespace sample { // 3. Monty from Xiph.org reckons it's all you need. typedef int16_t Sample; -constexpr auto Clip(int64_t v) -> Sample { - return std::clamp<int64_t>(v, INT16_MIN, INT16_MAX); -} - auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample; constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample { @@ -42,7 +38,7 @@ constexpr auto FromSigned(int32_t src, uint_fast8_t bits) -> Sample { constexpr auto FromUnsigned(uint32_t src, uint_fast8_t bits) -> Sample { // Left-align, then substract the max value / 2 to make the sample centred // around zero. - return (src << (sizeof(uint16_t) * 8 - bits)) - (~0UL >> 1); + return (src << (sizeof(uint16_t) * 8 - bits)) - (INT16_MAX+1); } constexpr auto FromFloat(float src) -> Sample { diff --git a/src/codecs/include/types.hpp b/src/codecs/include/types.hpp index c9eefe45..c6dcb486 100644 --- a/src/codecs/include/types.hpp +++ b/src/codecs/include/types.hpp @@ -12,10 +12,10 @@ namespace codecs { enum class StreamType { kMp3, - kPcm, kVorbis, kFlac, kOpus, + kWav, }; auto StreamTypeToString(StreamType t) -> std::string; diff --git a/src/codecs/include/wav.hpp b/src/codecs/include/wav.hpp new file mode 100644 index 00000000..896976dd --- /dev/null +++ b/src/codecs/include/wav.hpp @@ -0,0 +1,57 @@ +/* + * Copyright 2023 Daniel <ailuruxx@gmail.com> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <memory> +#include <optional> +#include <string> +#include <utility> + +#include "sample.hpp" +#include "source_buffer.hpp" + +#include "codec.hpp" + +namespace codecs { + +static const uint16_t kWaveFormatPCM = 0x0001; +static const uint16_t kWaveFormatIEEEFloat = 0x0003; +static const uint16_t kWaveFormatAlaw = 0x0006; +static const uint16_t kWaveFormatMulaw = 0x0007; +static const uint16_t kWaveFormatExtensible = 0xFFFE; + +class WavDecoder : public ICodec { + public: + WavDecoder(); + ~WavDecoder(); + + auto OpenStream(std::shared_ptr<IStream> input) + -> cpp::result<OutputFormat, Error> override; + + auto DecodeTo(cpp::span<sample::Sample> destination) + -> cpp::result<OutputInfo, Error> override; + + auto SeekTo(std::size_t target_sample) -> cpp::result<void, Error> override; + + WavDecoder(const WavDecoder&) = delete; + WavDecoder& operator=(const WavDecoder&) = delete; + + private: + std::shared_ptr<IStream> input_; + SourceBuffer buffer_; + uint16_t wave_format_; + uint16_t subformat_; + OutputFormat output_format_; + uint16_t bytes_per_sample_; + uint16_t num_channels_; + + auto GetFormat() const -> uint16_t; +}; + +} // namespace codecs diff --git a/src/codecs/sample.cpp b/src/codecs/sample.cpp index d4860b94..c99710f1 100644 --- a/src/codecs/sample.cpp +++ b/src/codecs/sample.cpp @@ -21,11 +21,8 @@ auto shiftWithDither(int64_t src, uint_fast8_t bits) -> Sample { uint64_t mask = 0xFFFFFFFF; mask >>= 32 - bits; int64_t noise = static_cast<int32_t>(komirand(&sSeed1, &sSeed2) & mask); - // Centre the noise around 0. - noise -= (mask >> 1); - // Apply to the sample, then clip and shift to 16 bit. - Sample clipped = Clip((src + noise) >> bits); - return clipped; + // Apply to the sample, then shift to 16 bit. + return (src + noise) >> bits; } } // namespace sample diff --git a/src/codecs/source_buffer.cpp b/src/codecs/source_buffer.cpp index bf8951f3..1db2e6c2 100644 --- a/src/codecs/source_buffer.cpp +++ b/src/codecs/source_buffer.cpp @@ -62,7 +62,7 @@ auto SourceBuffer::AddBytes(std::function<size_t(cpp::span<std::byte>)> writer) auto SourceBuffer::ConsumeBytes( std::function<size_t(cpp::span<std::byte>)> reader) -> void { size_t bytes_consumed = std::invoke( - reader, buffer_.subspan(offset_of_bytes_).first(bytes_in_buffer_)); + reader, buffer_.subspan(offset_of_bytes_, bytes_in_buffer_)); assert(bytes_consumed <= bytes_in_buffer_); bytes_in_buffer_ -= bytes_consumed; diff --git a/src/codecs/wav.cpp b/src/codecs/wav.cpp new file mode 100644 index 00000000..a67f3ff4 --- /dev/null +++ b/src/codecs/wav.cpp @@ -0,0 +1,259 @@ +/* + * Copyright 2023 Daniel <ailuruxx@gmail.com> + * + * SPDX-License-Identifier: GPL-3.0-only + */ + +#include "wav.hpp" +#include <stdint.h> +#include <sys/_stdint.h> + +#include <algorithm> +#include <cstdlib> +#include <string> + +#include "debug.hpp" +#include "esp_log.h" +#include "sample.hpp" + +namespace codecs { + +[[maybe_unused]] static const char kTag[] = "wav"; + +static inline auto bytes_to_u16(cpp::span<std::byte const, 2> bytes) + -> uint16_t { + return (uint16_t)bytes[0] | (uint16_t)bytes[1] << 8; +} + +static inline auto bytes_to_u32(cpp::span<std::byte const, 4> bytes) + -> uint32_t { + return (uint32_t)bytes[0] | (uint32_t)bytes[1] << 8 | + (uint32_t)bytes[2] << 16 | (uint32_t)bytes[3] << 24; +} + +static inline auto bytes_to_str(cpp::span<std::byte const> bytes) + -> std::string { + return std::string(reinterpret_cast<const char*>(bytes.data()), + bytes.size_bytes()); +} + +static int16_t convert_f32_to_16_bit(cpp::span<const std::byte> bytes) { + uint64_t val = 0; + val = (uint8_t)bytes[3]; + val = (val << 8) | (uint8_t)bytes[2]; + val = (val << 8) | (uint8_t)bytes[1]; + val = (val << 8) | (uint8_t)bytes[0]; + // Isolate the sign and remove from the value + uint64_t sign = val >> 31; + val -= (sign << 31); + // Isolate the exponent and remove from the value + uint64_t exp = (val >> 23); + val -= (exp << 23); + // Remove old bias and add new bias + exp = exp - 127 + 1023; + // Reconstruct the bits in the correct order and convert to double + uint64_t dval = (sign << 63) + (exp << 52) + (val << 29); + double* fval = reinterpret_cast<double*>(&dval); + return sample::FromDouble(*fval); +} + +static int16_t convert_to_16_bit(cpp::span<const std::byte> bytes) { + int depth = bytes.size(); + int32_t val = 0; + // If 8-bit Assume Unsigned + if (depth == 1) { + return sample::FromUnsigned((uint8_t)bytes[0], 8); + } + // Otherwise, build the signed int of the right depth + switch (depth) { + case 4: + val = (uint8_t)bytes[3]; + case 3: + val = (val << 8) | (uint8_t)bytes[2]; + case 2: + val = (val << 8) | (uint8_t)bytes[1]; + case 1: + val = (val << 8) | (uint8_t)bytes[0]; + } + // Convert to sample + int16_t result = sample::FromSigned(val, depth * 8); + return result; +} + +WavDecoder::WavDecoder() : input_(), buffer_() {} + +WavDecoder::~WavDecoder() {} + +auto WavDecoder::OpenStream(std::shared_ptr<IStream> input) + -> cpp::result<OutputFormat, Error> { + input_ = input; + + std::array<std::byte, 255> buf{std::byte{0}}; + auto size = input->Read(buf); + if (size < 44) { + return cpp::fail(Error::kOutOfInput); + } + + // - check the first 4 bytes = 'RIFF' + // - next 4 bytes = file size + // - check next 4 bytes = 'WAVE' + // - index of 'fmt\0' (i) marks start of fmt data + // - i + 4 = size of fmt header (16, 18 or 40) + // - i + 8 = format (should be 0x01 for pcm, 0xfffe for + // wave_format_exstensible) + // - i + 10 = num channels + // - i + 12 = sample rate + // - i + 16 = byte rate (sample rate * channels * bits per sample / 8) + // - i + 20 = sample size (bits per sample * channels / 8) + // - i + 22 = bits per sample (2 bytes) + // - end of this part, next header we care about is 'data' + // - and then the next 4 bytes = 32 bit int = size of data + + auto buffer_span = cpp::span{buf}; + + std::string riff = bytes_to_str(buffer_span.subspan(0, 4)); + if (riff != "RIFF") { + ESP_LOGW(kTag, "file is not RIFF"); + return cpp::fail(Error::kMalformedData); + } + + uint32_t file_size = bytes_to_u32(buffer_span.subspan(4, 4)) + 8; + + std::string fmt_header = bytes_to_str(buffer_span.subspan(12, 4)); + ESP_LOGI(kTag, "fmt header found? %s", + (fmt_header.starts_with("fmt")) ? "yes" : "no"); + if (!fmt_header.starts_with("fmt")) { + ESP_LOGW(kTag, "Could not find format chunk"); + return cpp::fail(Error::kMalformedData); + } + + // Size of the fmt header, should be 16, 18 or 40 + uint32_t fmt_header_size = bytes_to_u32(buffer_span.subspan(16, 4)); + + wave_format_ = bytes_to_u16(buffer_span.subspan(20, 2)); + if (wave_format_ == kWaveFormatPCM) { + ESP_LOGD(kTag, "wave format: PCM"); + } else if (wave_format_ == kWaveFormatExtensible) { + ESP_LOGD(kTag, "wave format: extensible"); + } else if (wave_format_ == kWaveFormatIEEEFloat) { + ESP_LOGD(kTag, "wave format: IEEE Float"); + } else { + ESP_LOGW(kTag, "WAVE format not supported"); + return cpp::fail(Error::kUnsupportedFormat); + } + + num_channels_ = bytes_to_u16(buffer_span.subspan(22, 2)); + + uint32_t samples_per_second = bytes_to_u32(buffer_span.subspan(24, 4)); + + uint32_t avg_bytes_per_second = bytes_to_u32(buffer_span.subspan(28, 4)); + + uint16_t block_align = bytes_to_u16(buffer_span.subspan(32, 2)); + + bytes_per_sample_ = block_align / num_channels_; + + uint16_t bits_per_sample = bytes_to_u16(buffer_span.subspan(34, 2)); + + // find the start of the data chunk + std::array<std::byte, 4> data_tag = {std::byte{0x64}, std::byte{0x61}, + std::byte{0x74}, std::byte{0x61}}; + auto data_loc = std::ranges::search(buffer_span, data_tag); + if (data_loc.begin() == buffer_span.end()) { + ESP_LOGW(kTag, "Could not find data chunk!"); + return cpp::fail(Error::kMalformedData); + } + + int data_chunk_index = std::distance(buffer_span.begin(), data_loc.begin()); + + uint32_t data_chunk_size = + bytes_to_u32(buffer_span.subspan(data_chunk_index + 4, 4)); + + // calculate number of samples + int number_of_samples = data_chunk_size / bytes_per_sample_; + + // extension to the fmt chunk size (0 or 22) + uint16_t extension_size = 0; + if (wave_format_ == kWaveFormatExtensible) { + extension_size = bytes_to_u16(buffer_span.subspan(36, 2)); + } + + // Parse extension if applicable + if (extension_size == 22) { + // Valid bits per sample + uint16_t valid_bits_per_sample = bytes_to_u16(buffer_span.subspan(38, 2)); + + uint32_t speaker_mask = bytes_to_u32(buffer_span.subspan(40, 4)); + + // Parse subformat + subformat_ = bytes_to_u16(buffer_span.subspan(44, 2)); + if (!(subformat_ == kWaveFormatPCM || + subformat_ == kWaveFormatIEEEFloat)) { + ESP_LOGW(kTag, "WAVE extensible subformat_ not supported"); + return cpp::fail(Error::kUnsupportedFormat); + } + } + + // 64 bit float is not implemented yet, make sure we're not letting it through + if (GetFormat() == kWaveFormatIEEEFloat && bytes_per_sample_ == 8) { + ESP_LOGW(kTag, "WAVE 64-Bit Float not supported"); + return cpp::fail(Error::kUnsupportedFormat); + } + + // Seek track to start of data + input->SeekTo(data_chunk_index + 8, IStream::SeekFrom::kStartOfStream); + + output_format_ = {.num_channels = (uint8_t)num_channels_, + .sample_rate_hz = samples_per_second, + .total_samples = number_of_samples}; + + return output_format_; +} + +auto WavDecoder::DecodeTo(cpp::span<sample::Sample> output) + -> cpp::result<OutputInfo, Error> { + bool is_eof = buffer_.Refill(input_.get()); + size_t samples_written = 0; + + buffer_.ConsumeBytes([&](cpp::span<std::byte> buf) -> size_t { + size_t bytes_read = buf.size_bytes(); + size_t frames_read = + bytes_read / bytes_per_sample_ / output_format_.num_channels; + + samples_written = + std::min<size_t>(frames_read, + output.size() / output_format_.num_channels) * + output_format_.num_channels; + + // For each sample that we're going to write + for (size_t i = 0; i < samples_written; i++) { + auto data = buf.subspan(i * bytes_per_sample_, bytes_per_sample_); + if (GetFormat() == kWaveFormatPCM) { + // PCM + output[i] = convert_to_16_bit(data); + } else if (GetFormat() == kWaveFormatIEEEFloat) { + // 32-Bit Float + if (bytes_per_sample_ == 4) { + output[i] = convert_f32_to_16_bit(data); + } + } + } + + return samples_written * bytes_per_sample_; + }); + + return OutputInfo{.samples_written = samples_written, + .is_stream_finished = samples_written == 0 && is_eof}; +} + +auto WavDecoder::SeekTo(size_t target) -> cpp::result<void, Error> { + return {}; +} + +auto codecs::WavDecoder::GetFormat() const -> uint16_t { + if (wave_format_ == kWaveFormatExtensible) { + return subformat_; + } + return wave_format_; +} + +} // namespace codecs |
