summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjacqueline <me@jacqueline.id.au>2024-09-12 10:44:26 +1000
committerjacqueline <me@jacqueline.id.au>2024-09-12 10:44:26 +1000
commitc51709f99ff5456a5863ca39ff893f823a3642d4 (patch)
tree4b2262b6451834dfb0e197fcc7c64fd3ea0f0569
parent542ebc65317ac4744a4b96c3131dace5bda10314 (diff)
downloadtangara-fw-c51709f99ff5456a5863ca39ff893f823a3642d4.tar.gz
Pause and unpause the current audio output in response to TTS
-rw-r--r--src/drivers/include/drivers/pcm_buffer.hpp3
-rw-r--r--src/drivers/pcm_buffer.cpp14
-rw-r--r--src/tangara/audio/audio_events.hpp5
-rw-r--r--src/tangara/audio/audio_fsm.cpp26
-rw-r--r--src/tangara/audio/audio_fsm.hpp4
-rw-r--r--src/tangara/tts/player.cpp102
-rw-r--r--src/tangara/tts/player.hpp8
7 files changed, 119 insertions, 43 deletions
diff --git a/src/drivers/include/drivers/pcm_buffer.hpp b/src/drivers/include/drivers/pcm_buffer.hpp
index 4e5fa041..6b38be94 100644
--- a/src/drivers/include/drivers/pcm_buffer.hpp
+++ b/src/drivers/include/drivers/pcm_buffer.hpp
@@ -49,6 +49,7 @@ class PcmBuffer {
auto clear() -> void;
auto isEmpty() -> bool;
+ auto suspend(bool) -> void;
/*
* How many samples have been added to this buffer since it was created. This
@@ -75,6 +76,8 @@ class PcmBuffer {
std::atomic<uint32_t> sent_;
std::atomic<uint32_t> received_;
+ std::atomic<bool> suspended_;
+
RingbufHandle_t ringbuf_;
};
diff --git a/src/drivers/pcm_buffer.cpp b/src/drivers/pcm_buffer.cpp
index 1e416301..bc58d4b9 100644
--- a/src/drivers/pcm_buffer.cpp
+++ b/src/drivers/pcm_buffer.cpp
@@ -25,7 +25,8 @@ namespace drivers {
[[maybe_unused]] static const char kTag[] = "pcmbuf";
-PcmBuffer::PcmBuffer(size_t size_in_samples) : sent_(0), received_(0) {
+PcmBuffer::PcmBuffer(size_t size_in_samples)
+ : sent_(0), received_(0), suspended_(false) {
size_t size_in_bytes = size_in_samples * sizeof(int16_t);
ESP_LOGI(kTag, "allocating pcm buffer of size %u (%uKiB)", size_in_samples,
size_in_bytes / 1024);
@@ -51,6 +52,13 @@ auto PcmBuffer::send(std::span<const int16_t> data) -> size_t {
IRAM_ATTR auto PcmBuffer::receive(std::span<int16_t> dest, bool mix, bool isr)
-> BaseType_t {
+ if (suspended_) {
+ if (!mix) {
+ std::fill_n(dest.begin(), dest.size(), 0);
+ }
+ return false;
+ }
+
size_t first_read = 0, second_read = 0;
BaseType_t ret1 = false, ret2 = false;
std::tie(first_read, ret1) = readSingle(dest, mix, isr);
@@ -86,6 +94,10 @@ auto PcmBuffer::isEmpty() -> bool {
xRingbufferGetCurFreeSize(ringbuf_);
}
+auto PcmBuffer::suspend(bool s) -> void {
+ suspended_ = s;
+}
+
auto PcmBuffer::totalSent() -> uint32_t {
return sent_;
}
diff --git a/src/tangara/audio/audio_events.hpp b/src/tangara/audio/audio_events.hpp
index 91bcf48b..56d150b2 100644
--- a/src/tangara/audio/audio_events.hpp
+++ b/src/tangara/audio/audio_events.hpp
@@ -144,8 +144,11 @@ struct OutputModeChanged : tinyfsm::Event {
std::optional<drivers::NvsStorage::Output> set_to;
};
-namespace internal {
+struct TtsPlaybackChanged : tinyfsm::Event {
+ bool is_playing;
+};
+namespace internal {
struct DecodingStarted : tinyfsm::Event {
std::shared_ptr<TrackInfo> track;
};
diff --git a/src/tangara/audio/audio_fsm.cpp b/src/tangara/audio/audio_fsm.cpp
index dac04f75..1daf568e 100644
--- a/src/tangara/audio/audio_fsm.cpp
+++ b/src/tangara/audio/audio_fsm.cpp
@@ -76,6 +76,7 @@ std::optional<IAudioOutput::Format> AudioState::sDrainFormat;
StreamCues AudioState::sStreamCues;
bool AudioState::sIsPaused = true;
+bool AudioState::sIsTtsPlaying = false;
auto AudioState::emitPlaybackUpdate(bool paused) -> void {
std::optional<uint32_t> position;
@@ -191,6 +192,11 @@ void AudioState::react(const TogglePlayPause& ev) {
}
}
+void AudioState::react(const TtsPlaybackChanged& ev) {
+ sIsTtsPlaying = ev.is_playing;
+ updateOutputMode();
+}
+
void AudioState::react(const internal::DecodingFinished& ev) {
// If we just finished playing whatever's at the front of the queue, then we
// need to advanve and start playing the next one ASAP in order to continue
@@ -369,8 +375,8 @@ void AudioState::react(const OutputModeChanged& ev) {
sOutput = sI2SOutput;
break;
}
- sOutput->mode(IAudioOutput::Modes::kOnPaused);
sSampleProcessor->SetOutput(sOutput);
+ updateOutputMode();
// Bluetooth volume isn't 'changed' until we've connected to a device.
if (new_mode == drivers::NvsStorage::Output::kHeadphones) {
@@ -381,6 +387,14 @@ void AudioState::react(const OutputModeChanged& ev) {
}
}
+auto AudioState::updateOutputMode() -> void {
+ if (is_in_state<states::Playback>() || sIsTtsPlaying) {
+ sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+ } else {
+ sOutput->mode(IAudioOutput::Modes::kOnPaused);
+ }
+}
+
auto AudioState::commitVolume() -> void {
auto mode = sServices->nvs().OutputMode();
auto vol = sOutput->GetVolume();
@@ -402,6 +416,7 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
sDrainBuffers = std::make_unique<drivers::OutputBuffers>(
kTrackDrainLatencySamples, kSystemDrainLatencySamples);
+ sDrainBuffers->first.suspend(true);
sStreamFactory.reset(
new FatfsStreamFactory(sServices->database(), sServices->tag_parser()));
@@ -454,6 +469,10 @@ void Uninitialised::react(const system_fsm::BootComplete& ev) {
static const char kQueueKey[] = "audio:queue";
static const char kCurrentFileKey[] = "audio:current";
+auto Standby::entry() -> void {
+ updateOutputMode();
+}
+
void Standby::react(const system_fsm::KeyLockChanged& ev) {
if (!ev.locking) {
return;
@@ -539,7 +558,8 @@ static void heartbeat(TimerHandle_t) {
void Playback::entry() {
ESP_LOGI(kTag, "audio output resumed");
- sOutput->mode(IAudioOutput::Modes::kOnPlaying);
+ sDrainBuffers->first.suspend(false);
+ updateOutputMode();
emitPlaybackUpdate(false);
if (!sHeartbeatTimer) {
@@ -552,7 +572,7 @@ void Playback::entry() {
void Playback::exit() {
ESP_LOGI(kTag, "audio output paused");
xTimerStop(sHeartbeatTimer, portMAX_DELAY);
- sOutput->mode(IAudioOutput::Modes::kOnPaused);
+ sDrainBuffers->first.suspend(true);
emitPlaybackUpdate(true);
}
diff --git a/src/tangara/audio/audio_fsm.hpp b/src/tangara/audio/audio_fsm.hpp
index 134d9ffd..bc3feb55 100644
--- a/src/tangara/audio/audio_fsm.hpp
+++ b/src/tangara/audio/audio_fsm.hpp
@@ -48,6 +48,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
void react(const PlaySineWave&);
void react(const SetTrack&);
void react(const TogglePlayPause&);
+ void react(const TtsPlaybackChanged&);
void react(const internal::DecodingFinished&);
void react(const internal::StreamStarted&);
@@ -70,6 +71,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
virtual void react(const system_fsm::HasPhonesChanged&);
protected:
+ auto updateOutputMode() -> void;
auto emitPlaybackUpdate(bool paused) -> void;
auto commitVolume() -> void;
@@ -88,6 +90,7 @@ class AudioState : public tinyfsm::Fsm<AudioState> {
static std::optional<IAudioOutput::Format> sDrainFormat;
static bool sIsPaused;
+ static bool sIsTtsPlaying;
};
namespace states {
@@ -102,6 +105,7 @@ class Uninitialised : public AudioState {
class Standby : public AudioState {
public:
+ void entry() override;
void react(const system_fsm::KeyLockChanged&) override;
void react(const system_fsm::SdStateChanged&) override;
diff --git a/src/tangara/tts/player.cpp b/src/tangara/tts/player.cpp
index b5b99b5d..a803ce57 100644
--- a/src/tangara/tts/player.cpp
+++ b/src/tangara/tts/player.cpp
@@ -5,11 +5,14 @@
*/
#include "tts/player.hpp"
+#include <mutex>
+#include "audio/audio_events.hpp"
#include "audio/processor.hpp"
#include "audio/resample.hpp"
#include "codec.hpp"
#include "esp_log.h"
+#include "events/event_queue.hpp"
#include "freertos/projdefs.h"
#include "portmacro.h"
#include "sample.hpp"
@@ -22,47 +25,70 @@ namespace tts {
Player::Player(tasks::WorkerPool& worker,
drivers::PcmBuffer& output,
audio::FatfsStreamFactory& factory)
- : bg_(worker), stream_factory_(factory), output_(output), play_count_(0) {}
+ : bg_(worker),
+ stream_factory_(factory),
+ output_(output),
+ stream_playing_(false),
+ stream_cancelled_(false) {}
auto Player::playFile(const std::string& path) -> void {
ESP_LOGI(kTag, "playing '%s'", path.c_str());
- int this_play = ++play_count_;
bg_.Dispatch<void>([=, this]() {
- auto stream = stream_factory_.create(path);
- if (!stream) {
- ESP_LOGE(kTag, "creating stream failed");
- return;
+ // Interrupt current playback
+ {
+ std::scoped_lock<std::mutex> lock{new_stream_mutex_};
+ if (stream_playing_) {
+ stream_cancelled_ = true;
+ stream_playing_.wait(true);
+ }
+ stream_cancelled_ = false;
+ stream_playing_ = true;
}
- // FIXME: Rather than hardcoding WAV support only, we should work out a
- // proper subset of 'low memory' decoders that can all be used for TTS
- // playback.
- if (stream->type() != codecs::StreamType::kWav) {
- ESP_LOGE(kTag, "stream was unsupported type");
- return;
- }
+ openAndDecode(path);
- auto decoder = codecs::CreateCodecForType(stream->type());
- if (!decoder) {
- ESP_LOGE(kTag, "creating decoder failed");
- return;
+ if (!stream_cancelled_) {
+ events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = false});
}
+ stream_playing_ = false;
+ stream_playing_.notify_all();
+ });
+}
- std::unique_ptr<codecs::ICodec> codec{*decoder};
- auto open_res = codec->OpenStream(stream, 0);
- if (open_res.has_error()) {
- ESP_LOGE(kTag, "opening stream failed");
- return;
- }
+auto Player::openAndDecode(const std::string& path) -> void {
+ auto stream = stream_factory_.create(path);
+ if (!stream) {
+ ESP_LOGE(kTag, "creating stream failed");
+ return;
+ }
- decodeToSink(*open_res, std::move(codec), this_play);
- });
+ // FIXME: Rather than hardcoding WAV support only, we should work out a
+ // proper subset of 'low memory' decoders that can all be used for TTS
+ // playback.
+ if (stream->type() != codecs::StreamType::kWav) {
+ ESP_LOGE(kTag, "stream was unsupported type");
+ return;
+ }
+
+ auto decoder = codecs::CreateCodecForType(stream->type());
+ if (!decoder) {
+ ESP_LOGE(kTag, "creating decoder failed");
+ return;
+ }
+
+ std::unique_ptr<codecs::ICodec> codec{*decoder};
+ auto open_res = codec->OpenStream(stream, 0);
+ if (open_res.has_error()) {
+ ESP_LOGE(kTag, "opening stream failed");
+ return;
+ }
+
+ decodeToSink(*open_res, std::move(codec));
}
auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
- std::unique_ptr<codecs::ICodec> codec,
- int play_count) -> void {
+ std::unique_ptr<codecs::ICodec> codec) -> void {
// Set up buffers to hold samples between the intermediary parts of
// processing. We can just use the stack for these, since this method is
// called only from background workers, which have enormous stacks.
@@ -83,20 +109,18 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
}
bool double_samples = format.num_channels == 1;
+ // Start our playback (wait for previous to end?)
+ events::Audio().Dispatch(audio::TtsPlaybackChanged{.is_playing = true});
+
// FIXME: This decode-and-process loop is substantially the same as the audio
// processor's filter loop. Ideally we should refactor both of these loops to
// reuse code, however I'm holding off on doing this until we've implemented
// more advanced audio processing features in the audio processor (EQ, tempo
// shifting, etc.) as it's not clear to me yet how much the two codepaths will
// be diverging later anyway.
- while (codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
- !stereo_buf.isEmpty()) {
- if (play_count != play_count_) {
- // FIXME: This is a little unsafe and could maybe take out the first few
- // samples of the next file.
- output_.clear();
- break;
- }
+ while ((codec || !decode_buf.isEmpty() || !resample_buf.isEmpty() ||
+ !stereo_buf.isEmpty()) &&
+ !stream_cancelled_) {
if (codec) {
auto decode_res = codec->DecodeTo(decode_buf.writeAcquire());
if (decode_res.has_error()) {
@@ -156,6 +180,14 @@ auto Player::decodeToSink(const codecs::ICodec::OutputFormat& format,
stereo_buf.readCommit(sent);
}
}
+
+ while (!output_.isEmpty()) {
+ if (stream_cancelled_) {
+ output_.clear();
+ } else {
+ vTaskDelay(pdMS_TO_TICKS(100));
+ }
+ }
}
} // namespace tts
diff --git a/src/tangara/tts/player.hpp b/src/tangara/tts/player.hpp
index 0a3ba723..47479007 100644
--- a/src/tangara/tts/player.hpp
+++ b/src/tangara/tts/player.hpp
@@ -35,11 +35,13 @@ class Player {
audio::FatfsStreamFactory& stream_factory_;
drivers::PcmBuffer& output_;
- std::atomic<int> play_count_;
+ std::mutex new_stream_mutex_;
+ std::atomic<bool> stream_playing_;
+ std::atomic<bool> stream_cancelled_;
+ auto openAndDecode(const std::string& path) -> void;
auto decodeToSink(const codecs::ICodec::OutputFormat&,
- std::unique_ptr<codecs::ICodec>,
- int play_count) -> void;
+ std::unique_ptr<codecs::ICodec>) -> void;
};
} // namespace tts