src/audio/audio_decoder.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#include "audio_decoder.hpp"

#include <string.h>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <variant>

#include "cbor/tinycbor/src/cborinternal_p.h"
#include "freertos/FreeRTOS.h"

#include "esp_heap_caps.h"
#include "esp_log.h"
#include "freertos/message_buffer.h"
#include "freertos/portmacro.h"

#include "audio_element.hpp"
#include "chunk.hpp"
#include "fatfs_audio_input.hpp"
#include "stream_info.hpp"

namespace audio {

static const char* kTag = "DEC";

AudioDecoder::AudioDecoder()
    : IAudioElement(),
      current_codec_(),
      current_input_format_(),
      current_output_format_(),
      has_samples_to_send_(false) {}

AudioDecoder::~AudioDecoder() {}

auto AudioDecoder::ProcessStreamInfo(const StreamInfo& info) -> bool {
  if (!std::holds_alternative<StreamInfo::Encoded>(info.format)) {
    return false;
  }
  ESP_LOGI(kTag, "got new stream");
  const auto& encoded = std::get<StreamInfo::Encoded>(info.format);

  // Reuse the existing codec if we can. This will help with gapless playback,
  // since we can potentially just continue to decode as we were before,
  // without any setup overhead.
  if (current_codec_ != nullptr &&
      current_codec_->CanHandleType(encoded.type)) {
    current_codec_->ResetForNewStream();
    ESP_LOGI(kTag, "reusing existing decoder");
    return true;
  }

  // TODO: use audio type from stream
  auto result = codecs::CreateCodecForType(encoded.type);
  if (result.has_value()) {
    ESP_LOGI(kTag, "creating new decoder");
    current_codec_ = std::move(result.value());
  } else {
    ESP_LOGE(kTag, "no codec for this file");
    return false;
  }

  return true;
}

auto AudioDecoder::Process(const std::vector<InputStream>& inputs,
                           OutputStream* output) -> void {
  auto input = inputs.begin();
  const StreamInfo& info = input->info();
  if (std::holds_alternative<std::monostate>(info.format) ||
      info.bytes_in_stream == 0) {
    // TODO(jacqueline): should we clear the stream format?
    // output->prepare({});
    return;
  }

  if (!current_input_format_ || *current_input_format_ != info.format) {
    // The input stream has changed! Immediately throw everything away and
    // start from scratch.
    current_input_format_ = info.format;
    has_samples_to_send_ = false;

    ProcessStreamInfo(info);
  }

  current_codec_->SetInput(input->data());

  while (true) {
    if (has_samples_to_send_) {
      if (!current_output_format_) {
        auto format = current_codec_->GetOutputFormat();
        current_output_format_ = StreamInfo::Pcm{
            .channels = format.num_channels,
            .bits_per_sample = format.bits_per_sample,
            .sample_rate = format.sample_rate_hz,
        };
      }

      if (!output->prepare(*current_output_format_)) {
        break;
      }

      auto write_res = current_codec_->WriteOutputSamples(output->data());
      output->add(write_res.first);
      has_samples_to_send_ = !write_res.second;

      if (has_samples_to_send_) {
        // We weren't able to fit all the generated samples into the output
        // buffer. Stop trying; we'll finish up during the next pass.
        break;
      }
    }

    auto res = current_codec_->ProcessNextFrame();
    if (res.has_error()) {
      // TODO(jacqueline): Handle errors.
      return;
    }

    if (res.value()) {
      // We're out of useable data in this buffer. Finish immediately; there's
      // nothing to send.
      input->mark_incomplete();
      break;
    } else {
      has_samples_to_send_ = true;
    }
  }

  std::size_t pos = current_codec_->GetInputPosition();
  if (pos > 0) {
    input->consume(pos - 1);
  }
}

}  // namespace audio