| // Copyright 2017 The Fuchsia Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "src/speech/tts/tts_speaker.h" |
| |
| #include <lib/async/cpp/task.h> |
| |
| namespace tts { |
| |
| static constexpr uint64_t kSharedBufSize = 64 << 10; |
| static constexpr uint32_t kLowWaterMsec = 100; |
| |
| static constexpr uint32_t kFliteChannelCount = 1; |
| static constexpr uint32_t kFliteFrameRate = 16000; |
| static constexpr auto kFliteSampleFormat = fuchsia::media::AudioSampleFormat::SIGNED_16; |
| static constexpr uint32_t kFliteBytesPerFrame = 2; |
| static constexpr uint32_t kLowWaterBytes = |
| (kFliteFrameRate * kLowWaterMsec * kFliteBytesPerFrame) / 1000; |
| |
| TtsSpeaker::TtsSpeaker(async_dispatcher_t* dispatcher) |
| : engine_loop_(&kAsyncLoopConfigNoAttachToCurrentThread), |
| master_dispatcher_(dispatcher), |
| abort_playback_(false), |
| synthesis_complete_(false) { |
| engine_loop_.StartThread(); |
| } |
| |
| zx_status_t TtsSpeaker::Speak(fidl::StringPtr words, fit::closure speak_complete_cbk) { |
| words_ = std::move(words); |
| speak_complete_cbk_ = std::move(speak_complete_cbk); |
| |
| async::PostTask(engine_loop_.dispatcher(), [thiz = shared_from_this()]() { thiz->DoSpeak(); }); |
| |
| return ZX_OK; |
| } |
| |
| zx_status_t TtsSpeaker::Init(const std::unique_ptr<sys::ComponentContext>& startup_context) { |
| zx_status_t res; |
| |
| if (wakeup_event_.is_valid()) { |
| FX_LOGS(ERROR) << "Attempted to initialize TtsSpeaker twice!"; |
| return ZX_ERR_BAD_STATE; |
| } |
| |
| res = zx::event::create(0, &wakeup_event_); |
| if (res != ZX_OK) { |
| FX_LOGS(ERROR) << "Failed to create wakeup event! (res " << res << ")"; |
| return res; |
| } |
| |
| zx::vmo shared_vmo; |
| res = shared_buf_.CreateAndMap(kSharedBufSize, ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, nullptr, |
| &shared_vmo, ZX_RIGHT_READ | ZX_RIGHT_MAP | ZX_RIGHT_TRANSFER); |
| |
| if (res != ZX_OK) { |
| FX_LOGS(ERROR) << "VmoMapper:::CreateAndMap failed - " << res; |
| return res; |
| } |
| |
| FX_DCHECK(startup_context != nullptr); |
| auto audio = startup_context->svc()->Connect<fuchsia::media::Audio>(); |
| |
| audio->CreateAudioRenderer(audio_renderer_.NewRequest()); |
| |
| fuchsia::media::AudioStreamType format; |
| format.sample_format = kFliteSampleFormat; |
| format.channels = kFliteChannelCount; |
| format.frames_per_second = kFliteFrameRate; |
| |
| audio_renderer_->SetPcmStreamType(format); |
| audio_renderer_->AddPayloadBuffer(0, std::move(shared_vmo)); |
| |
| return ZX_OK; |
| } |
| |
| void TtsSpeaker::Shutdown() { |
| abort_playback_.store(true); |
| { |
| std::lock_guard<std::mutex> lock(ring_buffer_lock_); |
| wakeup_event_.signal(0, ZX_USER_SIGNAL_0); |
| } |
| engine_loop_.Shutdown(); |
| } |
| |
| void TtsSpeaker::SendPendingAudio() { |
| if (abort_playback_.load()) |
| return; |
| |
| // Figure out how much audio we have synthesized, but not given to the audio |
| // renderer yet, and hand it off to the renderer. There are three (slightly) |
| // special cases we need to consider. |
| // |
| // 1) We may not permit our payloads to span the ring wrap point. All |
| // payloads must be contiugous in our VMO. |
| // 2) We need to make sure that we break our payloads such that when we hit |
| // our low water mark, we receive a callback which wakes up the engine |
| // thread to produce some more audio. |
| // 3) We need to make sure that we send our final payload that its callback |
| // calls our completion handler. |
| // |
| uint64_t bytes_to_send; |
| { |
| std::lock_guard<std::mutex> lock(ring_buffer_lock_); |
| bytes_to_send = ComputeTxPending(); |
| } |
| |
| bool eos = synthesis_complete_.load(); |
| uint64_t bytes_till_low_water = eos ? 0 : bytes_to_send - kLowWaterBytes; |
| uint64_t bytes_till_ring_wrap = shared_buf_.size() - tx_ptr_; |
| |
| FX_DCHECK(eos || bytes_to_send > kLowWaterBytes); |
| |
| while (bytes_to_send) { |
| uint64_t todo = bytes_to_send; |
| |
| if (bytes_till_ring_wrap && (todo > bytes_till_ring_wrap)) { |
| todo = bytes_till_ring_wrap; |
| } |
| |
| if (bytes_till_low_water && (todo > bytes_till_low_water)) { |
| todo = bytes_till_low_water; |
| } |
| |
| fuchsia::media::StreamPacket pkt; |
| pkt.payload_offset = tx_ptr_; |
| pkt.payload_size = todo; |
| |
| tx_ptr_ += todo; |
| if (tx_ptr_ >= shared_buf_.size()) { |
| FX_DCHECK(tx_ptr_ == shared_buf_.size()); |
| tx_ptr_ = 0; |
| } |
| |
| if (eos && (todo == bytes_to_send)) { |
| audio_renderer_->SendPacket( |
| pkt, [speak_complete_cbk = std::move(speak_complete_cbk_)]() { speak_complete_cbk(); }); |
| } else if (todo == bytes_till_low_water) { |
| audio_renderer_->SendPacket(pkt, [thiz = shared_from_this(), new_rd_pos = tx_ptr_]() { |
| thiz->UpdateRdPtr(new_rd_pos); |
| }); |
| } else { |
| audio_renderer_->SendPacketNoReply(pkt); |
| } |
| |
| FX_DCHECK(todo <= bytes_to_send); |
| bytes_to_send -= todo; |
| if (bytes_till_ring_wrap) |
| bytes_till_ring_wrap -= todo; |
| if (bytes_till_low_water) |
| bytes_till_low_water -= todo; |
| } |
| |
| if (!clock_started_) { |
| audio_renderer_->PlayNoReply(fuchsia::media::NO_TIMESTAMP, fuchsia::media::NO_TIMESTAMP); |
| clock_started_ = true; |
| } |
| } |
| |
| void TtsSpeaker::UpdateRdPtr(uint64_t new_pos) { |
| if (!abort_playback_.load()) { |
| std::lock_guard<std::mutex> lock(ring_buffer_lock_); |
| rd_ptr_ = new_pos; |
| wakeup_event_.signal(0, ZX_USER_SIGNAL_0); |
| } |
| } |
| |
| int TtsSpeaker::ProduceAudioCbk(const cst_wave* wave, int start, int sz, int last) { |
| if (abort_playback_.load()) { |
| return CST_AUDIO_STREAM_STOP; |
| } |
| |
| FX_DCHECK(sz >= 0); |
| |
| const void* payload; |
| int16_t junk = 0; |
| |
| if (sz == 0) { |
| FX_DCHECK(last); |
| payload = &junk; |
| sz = 1; |
| } else { |
| payload = wave->samples + start; |
| } |
| |
| uint64_t size = static_cast<uint64_t>(sz) * kFliteBytesPerFrame; |
| |
| while (true) { |
| { // explicit scope for ring buffer lock. |
| std::lock_guard<std::mutex> lock(ring_buffer_lock_); |
| uint64_t space = ComputeWriteSpace(); |
| |
| if (size < space) { |
| while (size > 0) { |
| uint64_t todo; |
| todo = std::min<uint64_t>(shared_buf_.size() - wr_ptr_, size); |
| |
| ::memcpy(reinterpret_cast<uint8_t*>(shared_buf_.start()) + wr_ptr_, payload, todo); |
| |
| size -= todo; |
| wr_ptr_ += todo; |
| payload = reinterpret_cast<const void*>(reinterpret_cast<uintptr_t>(payload) + todo); |
| |
| if (wr_ptr_ >= shared_buf_.size()) { |
| FX_DCHECK(wr_ptr_ == shared_buf_.size()); |
| wr_ptr_ = 0; |
| } |
| } |
| |
| break; |
| } |
| |
| wakeup_event_.signal(ZX_USER_SIGNAL_0, 0); |
| } |
| |
| // Looks like we need to wait for there to be some space. Before we do so, |
| // let the master thread know it needs to send the data we just produced. |
| async::PostTask(master_dispatcher_, |
| [thiz = shared_from_this()]() { thiz->SendPendingAudio(); }); |
| |
| zx_signals_t pending; |
| zx_status_t res; |
| |
| res = wakeup_event_.wait_one(ZX_USER_SIGNAL_0, zx::time::infinite(), &pending); |
| if ((res != ZX_OK) || abort_playback_.load()) { |
| return CST_AUDIO_STREAM_STOP; |
| } |
| } |
| |
| // If this is the last chunk of audio, tell the master thread to send the rest |
| // of our synthesized audio right now. |
| if (last) { |
| synthesis_complete_.store(true); |
| async::PostTask(master_dispatcher_, |
| [thiz = shared_from_this()]() { thiz->SendPendingAudio(); }); |
| } |
| |
| return CST_AUDIO_STREAM_CONT; |
| } |
| |
| void TtsSpeaker::DoSpeak() { |
| cst_voice* vox = flite_fuchsia_create_voice( |
| [](const cst_wave* w, int start, int size, int last, |
| struct cst_audio_streaming_info_struct* asi) -> int { |
| auto thiz = reinterpret_cast<TtsSpeaker*>(asi->userdata); |
| return thiz->ProduceAudioCbk(w, start, size, last); |
| }, |
| this); |
| |
| flite_text_to_speech(words_->data(), vox, "play"); |
| delete_voice(vox); |
| |
| if (abort_playback_.load()) { |
| async::PostTask(master_dispatcher_, [speak_complete_cbk = std::move(speak_complete_cbk_)]() { |
| speak_complete_cbk(); |
| }); |
| } |
| } |
| |
| } // namespace tts |