Pause and unpause the current audio output in response to TTS

author: jacqueline <me@jacqueline.id.au> 2024-09-12 10:44:26 +1000
committer: jacqueline <me@jacqueline.id.au> 2024-09-12 10:44:26 +1000
commit: c51709f99ff5456a5863ca39ff893f823a3642d4 (patch)
tree: 4b2262b6451834dfb0e197fcc7c64fd3ea0f0569
parent: 542ebc65317ac4744a4b96c3131dace5bda10314 (diff)
download: tangara-fw-c51709f99ff5456a5863ca39ff893f823a3642d4.tar.gz
7 files changed, 119 insertions, 43 deletions
diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp
index 4e5fa041..6b38be94 100644
--- a/src/drivers/include/drivers/pcm_buffer.hpp
+++ b/src/drivers/include/drivers/pcm_buffer.hpp
@@ -49,6 +49,7 @@ class PcmBuffer {
 
   auto clear() -> void;
   auto isEmpty() -> bool;
+  auto suspend(bool) -> void;
 
   /*
    * How many samples have been added to this buffer since it was created. This
@@ -75,6 +76,8 @@ class PcmBuffer {
 
   std::atomic<uint32_t> sent_;
   std::atomic<uint32_t> received_;
+  std::atomic<bool> suspended_;
+
   RingbufHandle_t ringbuf_;
 };
 
diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp
index 1e416301..bc58d4b9 100644
--- a/src/drivers/pcm_buffer.cpp
+++ b/src/drivers/pcm_buffer.cpp
@@ -25,7 +25,8 @@ namespace drivers {
 
 [[maybe_unused]] static const char kTag[] = "pcmbuf";
 
-PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) {
+PcmBuffer::PcmBuffer(size_t size_in_samples)
+    : sent_(0), received_(0), suspended_(false) {
   size_t size_in_bytes = size_in_samples * sizeof(int16_t);
   ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples,
            size_in_bytes / 1024);
@@ -51,6 +52,13 @@ auto PcmBuffer::send(std::span<const int16_t> data) -> size_t {
 
 IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool mix, bool isr)
     -> BaseType_t {
+  if (suspended_) {
+    if (!mix) {
+      std::fill_n(dest.begin(), dest.size(), 0);
+    }
+    return false;
+  }
+
   size_t first_read = 0, second_read = 0;
   BaseType_t ret1 = false, ret2 = false;
   std::tie(first_read, ret1) = readSingle(dest, mix, isr);
@@ -86,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool {
          xRingbufferGetCurFreeSize(ringbuf_);
 }
 
+auto PcmBuffer::suspend(bool s) -> void {
+  suspended_ = s;
+}
+
 auto PcmBuffer::totalSent() -> uint32_t {
   return sent_;
 }
diff --git a/src/tangara/audio/audio_events.hpp b/src/tangara/audio/audio_events.hpp
index 91bcf48b..56d150b2 100644
--- a/src/tangara/audio/audio_events.hpp
+++ b/src/tangara/audio/audio_events.hpp
@@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event {
   std::optional<drivers::NvsStorage::Output> set_to;
 };
 
-namespace internal {
+struct TtsPlaybackChanged : tinyfsm::Event {
+  bool is_playing;
+};
 
+namespace internal {
 struct DecodingStarted : tinyfsm::Event {
   std::shared_ptr<TrackInfo> track;
 };
diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index dac04f75..1daf568e 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -76,6 +76,7 @@ std::optional<IAudioOutput::Format> AudioState::sDrainFormat;
 StreamCues AudioState::sStreamCues;
 
 bool AudioState::sIsPaused = true;
+bool AudioState::sIsTtsPlaying = false;
 
 auto AudioState::emitPlaybackUpdate(bool paused) -> void {
   std::optional<uint32_t> position;
@@ -191,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) {
   }
 }
 
+void AudioState::react(const TtsPlaybackChanged& ev) {
+  sIsTtsPlaying = ev.is_playing;
+  updateOutputMode();
+}
+
 void AudioState::react(const internal::DecodingFinished& ev) {
   // If we just finished playing whatever's at the front of the queue, then we
   // need to advanve and start playing the next one ASAP in order to continue
@@ -369,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) {
       sOutput = sI2SOutput;
       break;
   }
-  sOutput->mode(IAudioOutput::Modes::kOnPaused);
   sSampleProcessor->SetOutput(sOutput);
+  updateOutputMode();
 
   // Bluetooth volume isn't 'changed' until we've connected to a device.
   if (new_mode == drivers::NvsStorage::Output::kHeadphones) {
@@ -381,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) {
   }
 }
 
+auto AudioState::updateOutputMode() -> void {
+  if (is_in_state<states::Playback>() || sIsTtsPlaying) {
+    sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+  } else {
+    sOutput->mode(IAudioOutput::Modes::kOnPaused);
+  }
+}
+
 auto AudioState::commitVolume() -> void {
   auto mode = sServices->nvs().OutputMode();
   auto vol = sOutput->GetVolume();
@@ -402,6 +416,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
 
   sDrainBuffers = std::make_unique<drivers::OutputBuffers>(
       kTrackDrainLatencySamples, kSystemDrainLatencySamples);
+  sDrainBuffers->first.suspend(true);
 
   sStreamFactory.reset(
       new FatfsStreamFactory(sServices->database(), sServices->tag_parser()));
@@ -454,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
 static const char kQueueKey[] = "audio:queue";
 static const char kCurrentFileKey[] = "audio:current";
 
+auto Standby::entry() -> void {
+  updateOutputMode();
+}
+
 void Standby::react(const system_fsm::KeyLockChanged& ev) {
   if (!ev.locking) {
     return;
@@ -539,7 +558,8 @@ static void heartbeat(TimerHandle_t) {
 
 void Playback::entry() {
   ESP_LOGI(kTag, "audio output resumed");
-  sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+  sDrainBuffers->first.suspend(false);
+  updateOutputMode();
   emitPlaybackUpdate(false);
 
   if (!sHeartbeatTimer) {
@@ -552,7 +572,7 @@ void Playback::entry() {
 void Playback::exit() {
   ESP_LOGI(kTag, "audio output paused");
   xTimerStop(sHeartbeatTimer, portMAX_DELAY);
-  sOutput->mode(IAudioOutput::Modes::kOnPaused);
+  sDrainBuffers->first.suspend(true);
   emitPlaybackUpdate(true);
 }
 
diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp
index 134d9ffd..bc3feb55 100644
--- a/src/tangara/audio/audio_fsm.hpp
+++ b/src/tangara/audio/audio_fsm.hpp
@@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   void react(const PlaySineWave&);
   void react(const SetTrack&);
   void react(const TogglePlayPause&);
+  void react(const TtsPlaybackChanged&);
 
   void react(const internal::DecodingFinished&);
   void react(const internal::StreamStarted&);
@@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   virtual void react(const system_fsm::HasPhonesChanged&);
 
  protected:
+  auto updateOutputMode() -> void;
   auto emitPlaybackUpdate(bool paused) -> void;
   auto commitVolume() -> void;
 
@@ -88,6 +90,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
   static std::optional<IAudioOutput::Format> sDrainFormat;
 
   static bool sIsPaused;
+  static bool sIsTtsPlaying;
 };
 
 namespace states {
@@ -102,6 +105,7 @@ class Uninitialised : public AudioState {
 
 class Standby : public AudioState {
  public:
+  void entry() override;
   void react(const system_fsm::KeyLockChanged&) override;
   void react(const system_fsm::SdStateChanged&) override;
 
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
index b5b99b5d..a803ce57 100644
--- a/src/tangara/tts/player.cpp
+++ b/src/tangara/tts/player.cpp
@@ -5,11 +5,14 @@
  */
 
 #include "tts/player.hpp"
+#include <mutex>
 
+#include "audio/audio_events.hpp"
 #include "audio/processor.hpp"
 #include "audio/resample.hpp"
 #include "codec.hpp"
 #include "esp_log.h"
+#include "events/event_queue.hpp"
 #include "freertos/projdefs.h"
 #include "portmacro.h"
 #include "sample.hpp"
@@ -22,47 +25,70 @@ namespace tts {
 Player::Player(tasks::WorkerPool& worker,
                drivers::PcmBuffer& output,
                audio::FatfsStreamFactory& factory)
-    : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {}
+    : bg_(worker),
+      stream_factory_(factory),
+      output_(output),
+      stream_playing_(false),
+      stream_cancelled_(false) {}
 
 auto Player::playFile(const std::string& path) -> void {
   ESP_LOGI(kTag, "playing '%s'", path.c_str());
-  int this_play = ++play_count_;
 
   bg_.Dispatch<void>([=, this]() {
-    auto stream = stream_factory_.create(path);
-    if (!stream) {
-      ESP_LOGE(kTag, "creating stream failed");
-      return;
+    // Interrupt current playback
+    {
+      std::scoped_lock<std::mutex> lock{new_stream_mutex_};
+      if (stream_playing_) {
+        stream_cancelled_ = true;
+        stream_playing_.wait(true);
+      }
+      stream_cancelled_ = false;
+      stream_playing_ = true;
     }
 
-    // FIXME: Rather than hardcoding WAV support only, we should work out a
-    // proper subset of 'low memory' decoders that can all be used for TTS
-    // playback.
-    if (stream->type() != codecs::StreamType::kWav) {
-      ESP_LOGE(kTag, "stream was unsupported type");
-      return;
-    }
+    openAndDecode(path);
 
-    auto decoder = codecs::CreateCodecForType(stream->type());
-    if (!decoder) {
-      ESP_LOGE(kTag, "creating decoder failed");
-      return;
+    if (!stream_cancelled_) {
+      events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false});
     }
+    stream_playing_ = false;
+    stream_playing_.notify_all();
+  });
+}
 
-    std::unique_ptr<codecs::ICodec> codec{*decoder};
-    auto open_res = codec->OpenStream(stream, 0);
-    if (open_res.has_error()) {
-      ESP_LOGE(kTag, "opening stream failed");
-      return;
-    }
+auto Player::openAndDecode(const std::string& path) -> void {
+  auto stream = stream_factory_.create(path);
+  if (!stream) {
+    ESP_LOGE(kTag, "creating stream failed");
+    return;
+  }
 
-    decodeToSink(*open_res, std::move(codec), this_play);
-  });
+  // FIXME: Rather than hardcoding WAV support only, we should work out a
+  // proper subset of 'low memory' decoders that can all be used for TTS
+  // playback.
+  if (stream->type() != codecs::StreamType::kWav) {
+    ESP_LOGE(kTag, "stream was unsupported type");
+    return;
+  }
+
+  auto decoder = codecs::CreateCodecForType(stream->type());
+  if (!decoder) {
+    ESP_LOGE(kTag, "creating decoder failed");
+    return;
+  }
+
+  std::unique_ptr<codecs::ICodec> codec{*decoder};
+  auto open_res = codec->OpenStream(stream, 0);
+  if (open_res.has_error()) {
+    ESP_LOGE(kTag, "opening stream failed");
+    return;
+  }
+
+  decodeToSink(*open_res, std::move(codec));
 }
 
 auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
-                          std::unique_ptr<codecs::ICodec> codec,
-                          int play_count) -> void {
+                          std::unique_ptr<codecs::ICodec> codec) -> void {
   // Set up buffers to hold samples between the intermediary parts of
   // processing. We can just use the stack for these, since this method is
   // called only from background workers, which have enormous stacks.
@@ -83,20 +109,18 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
   }
   bool double_samples = format.num_channels == 1;
 
+  // Start our playback (wait for previous to end?)
+  events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true});
+
   // FIXME: This decode-and-process loop is substantially the same as the audio
   // processor's filter loop. Ideally we should refactor both of these loops to
   // reuse code, however I'm holding off on doing this until we've implemented
   // more advanced audio processing features in the audio processor (EQ, tempo
   // shifting, etc.) as it's not clear to me yet how much the two codepaths will
   // be diverging later anyway.
-  while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
-         !stereo_buf.isEmpty()) {
-    if (play_count != play_count_) {
-      // FIXME: This is a little unsafe and could maybe take out the first few
-      // samples of the next file.
-      output_.clear();
-      break;
-    }
+  while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
+          !stereo_buf.isEmpty()) &&
+         !stream_cancelled_) {
     if (codec) {
       auto decode_res = codec->DecodeTo(decode_buf.writeAcquire());
       if (decode_res.has_error()) {
@@ -156,6 +180,14 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
       stereo_buf.readCommit(sent);
     }
   }
+
+  while (!output_.isEmpty()) {
+    if (stream_cancelled_) {
+      output_.clear();
+    } else {
+      vTaskDelay(pdMS_TO_TICKS(100));
+    }
+  }
 }
 
 }  // namespace tts
diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp
index 0a3ba723..47479007 100644
--- a/src/tangara/tts/player.hpp
+++ b/src/tangara/tts/player.hpp
@@ -35,11 +35,13 @@ class Player {
   audio::FatfsStreamFactory& stream_factory_;
   drivers::PcmBuffer& output_;
 
-  std::atomic<int> play_count_;
+  std::mutex new_stream_mutex_;
+  std::atomic<bool> stream_playing_;
+  std::atomic<bool> stream_cancelled_;
 
+  auto openAndDecode(const std::string& path) -> void;
   auto decodeToSink(const codecs::ICodec::OutputFormat&,
-                    std::unique_ptr<codecs::ICodec>,
-                    int play_count) -> void;
+                    std::unique_ptr<codecs::ICodec>) -> void;
 };
 
 }  // namespace tts
author	jacqueline <me@jacqueline.id.au>	2024-09-12 10:44:26 +1000
committer	jacqueline <me@jacqueline.id.au>	2024-09-12 10:44:26 +1000
commit	c51709f99ff5456a5863ca39ff893f823a3642d4 (patch)
tree	4b2262b6451834dfb0e197fcc7c64fd3ea0f0569
parent	542ebc65317ac4744a4b96c3131dace5bda10314 (diff)
download	tangara-fw-c51709f99ff5456a5863ca39ff893f823a3642d4.tar.gz