日期:
来源:郭霖收集编辑:小虾米君
https://juejin.cn/post/7199472291586441275
对于需要使用 TTS 的请求 App 而言,无需关心 TTS 的具体实现,通过 TextToSpeech API 即用即有 对于需要对外提供 TTS 能力的实现 Engine 而言,无需维护复杂的 TTS 时序和逻辑,按照 TextToSpeechService 框架的定义对接即可,无需关心系统如何将实现和请求进行衔接
// TTSTest.kt
class TTSTest(context: Context) {
private val tts: TextToSpeech = TextToSpeech(context) { initResult -> ... }
init {
tts.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String?) { ... }
override fun onDone(utteranceId: String?) { ... }
override fun onStop(utteranceId: String?, interrupted: Boolean) { ... }
override fun onError(utteranceId: String?) { ... }
})
}
fun testTextToSpeech(context: Context) {
tts.speak(
"你好,汽车",
TextToSpeech.QUEUE_ADD,
Bundle(),
"xxdtgfsf"
)
tts.playEarcon(
EARCON_DONE,
TextToSpeech.QUEUE_ADD,
Bundle(),
"yydtgfsf"
)
}
companion object {
const val EARCON_DONE = "earCon_done"
}
}
init 绑定
如果构造 TTS 接口的实例时指定了目标 Engine 的 package,那么首选连接到该 Engine 反之,获取设备设置的 default Engine 并连接,设置来自于 TtsEngines 从系统设置数据 SettingsProvider 中 读取 TTS_DEFAULT_SYNTH 而来 如果 default 不存在或者没有安装的话,从 TtsEngines 获取第一位的系统 Engine 并连接。第一位指的是从所有 TTS Service 实现 Engine 列表里获得第一个属于 system image 的 Engine
具体是直接获取名为 texttospeech 、管理 TTS Service 的系统服务 TextToSpeechManagerService 的接口代理并直接调用它的 createSession() 创建一个 session,同时暂存其指向的 ITextToSpeechSession 代理接口。
其 connect() 实现较为简单,封装 Action 为 INTENT_ACTION_TTS_SERVICE 的 Intent 进行 bindService(),后续由 AMS 执行和 Engine 的绑定,这里不再展开。
// TTSTest.kt
// TextToSpeech.java
public class TextToSpeech {
public TextToSpeech(Context context, OnInitListener listener) {
this(context, listener, null);
}
private TextToSpeech( ... ) {
...
initTts();
}
private int initTts() {
// Step 1: Try connecting to the engine that was requested.
if (mRequestedEngine != null) {
if (mEnginesHelper.isEngineInstalled(mRequestedEngine)) {
if (connectToEngine(mRequestedEngine)) {
mCurrentEngine = mRequestedEngine;
return SUCCESS;
}
...
} else if (!mUseFallback) {
...
dispatchOnInit(ERROR);
return ERROR;
}
}
// Step 2: Try connecting to the user's default engine.
final String defaultEngine = getDefaultEngine();
...
// Step 3: Try connecting to the highest ranked engine in the system.
final String highestRanked = mEnginesHelper.getHighestRankedEngineName();
...
dispatchOnInit(ERROR);
return ERROR;
}
private boolean connectToEngine(String engine) {
Connection connection;
if (mIsSystem) {
connection = new SystemConnection();
} else {
connection = new DirectConnection();
}
boolean bound = connection.connect(engine);
if (!bound) {
return false;
} else {
mConnectingServiceConnection = connection;
return true;
}
}
}
// TextToSpeech.java
public class TextToSpeech {
...
private abstract class Connection implements ServiceConnection {
private ITextToSpeechService mService;
...
private final ITextToSpeechCallback.Stub mCallback =
new ITextToSpeechCallback.Stub() {
public void onStop(String utteranceId, boolean isStarted)
throws RemoteException {
UtteranceProgressListener listener = mUtteranceProgressListener;
if (listener != null) {
listener.onStop(utteranceId, isStarted);
}
};
@Override
public void onSuccess(String utteranceId) { ... }
@Override
public void onError(String utteranceId, int errorCode) { ... }
@Override
public void onStart(String utteranceId) { ... }
...
};
@Override
public void onServiceConnected(ComponentName componentName, IBinder service) {
synchronized(mStartLock) {
mConnectingServiceConnection = null;
mService = ITextToSpeechService.Stub.asInterface(service);
mServiceConnection = Connection.this;
mEstablished = false;
mOnSetupConnectionAsyncTask = new SetupConnectionAsyncTask();
mOnSetupConnectionAsyncTask.execute();
}
}
...
}
private class DirectConnection extends Connection {
@Override
boolean connect(String engine) {
Intent intent = new Intent(Engine.INTENT_ACTION_TTS_SERVICE);
intent.setPackage(engine);
return mContext.bindService(intent, this, Context.BIND_AUTO_CREATE);
}
...
}
private class SystemConnection extends Connection {
...
boolean connect(String engine) {
IBinder binder = ServiceManager.getService(Context.TEXT_TO_SPEECH_MANAGER_SERVICE);
...
try {
manager.createSession(engine, new ITextToSpeechSessionCallback.Stub() {
...
});
return true;
} ...
}
...
}
}
speak 播报
// TextToSpeech.java
public class TextToSpeech {
...
private Connection mServiceConnection;
public int speak(final CharSequence text, ... ) {
return runAction((ITextToSpeechService service) -> {
...
}, ERROR, "speak");
}
private <R> R runAction(Action<R> action, R errorResult, String method) {
return runAction(action, errorResult, method, true, true);
}
private <R> R runAction( ... ) {
synchronized (mStartLock) {
...
return mServiceConnection.runAction(action, errorResult, method, reconnect,
onlyEstablishedConnection);
}
}
private abstract class Connection implements ServiceConnection {
public <R> R runAction( ... ) {
synchronized (mStartLock) {
try {
...
return action.run(mService);
}
...
}
}
}
}
如有设置的话,调用用 TTS Engine 的 playAudio() 直接播放 反之调用 text 转 audio 的接口 speak()
// TextToSpeech.java
public class TextToSpeech {
...
public int speak(final CharSequence text, ... ) {
return runAction((ITextToSpeechService service) -> {
Uri utteranceUri = mUtterances.get(text);
if (utteranceUri != null) {
return service.playAudio(getCallerIdentity(), utteranceUri, queueMode,
getParams(params), utteranceId);
} else {
return service.speak(getCallerIdentity(), text, queueMode, getParams(params),
utteranceId);
}
}, ERROR, "speak");
}
...
}
speak 请求封装给 Handler 的是 SynthesisSpeechItem playAudio 请求封装的是 AudioSpeechItem
// TextToSpeechService.java
public abstract class TextToSpeechService extends Service {
private final ITextToSpeechService.Stub mBinder =
new ITextToSpeechService.Stub() {
@Override
public int speak(
IBinder caller,
CharSequence text,
int queueMode,
Bundle params,
String utteranceId) {
SpeechItem item =
new SynthesisSpeechItem(
caller,
Binder.getCallingUid(),
Binder.getCallingPid(),
params,
utteranceId,
text);
return mSynthHandler.enqueueSpeechItem(queueMode, item);
}
@Override
public int playAudio( ... ) {
SpeechItem item =
new AudioSpeechItem( ... );
...
}
...
};
...
}
// TextToSpeechService.java
private class SynthHandler extends Handler {
...
public int enqueueSpeechItem(int queueMode, final SpeechItem speechItem) {
UtteranceProgressDispatcher utterenceProgress = null;
if (speechItem instanceof UtteranceProgressDispatcher) {
utterenceProgress = (UtteranceProgressDispatcher) speechItem;
}
if (!speechItem.isValid()) {
if (utterenceProgress != null) {
utterenceProgress.dispatchOnError(
TextToSpeech.ERROR_INVALID_REQUEST);
}
return TextToSpeech.ERROR;
}
if (queueMode == TextToSpeech.QUEUE_FLUSH) {
stopForApp(speechItem.getCallerIdentity());
} else if (queueMode == TextToSpeech.QUEUE_DESTROY) {
stopAll();
}
Runnable runnable = new Runnable() {
@Override
public void run() {
if (setCurrentSpeechItem(speechItem)) {
speechItem.play();
removeCurrentSpeechItem();
} else {
speechItem.stop();
}
}
};
Message msg = Message.obtain(this, runnable);
msg.obj = speechItem.getCallerIdentity();
if (sendMessage(msg)) {
return TextToSpeech.SUCCESS;
} else {
if (utterenceProgress != null) {
utterenceProgress.dispatchOnError(TextToSpeech.ERROR_SERVICE);
}
return TextToSpeech.ERROR;
}
}
...
}
// TextToSpeechService.java
private abstract class SpeechItem {
...
public void play() {
synchronized (this) {
if (mStarted) {
throw new IllegalStateException("play() called twice");
}
mStarted = true;
}
playImpl();
}
}
class SynthesisSpeechItem extends UtteranceSpeechItemWithParams {
public SynthesisSpeechItem(
...
String utteranceId,
CharSequence text) {
mSynthesisRequest = new SynthesisRequest(mText, mParams);
...
}
...
@Override
protected void playImpl() {
AbstractSynthesisCallback synthesisCallback;
mEventLogger.onRequestProcessingStart();
synchronized (this) {
...
mSynthesisCallback = createSynthesisCallback();
synthesisCallback = mSynthesisCallback;
}
TextToSpeechService.this.onSynthesizeText(mSynthesisRequest, synthesisCallback);
if (synthesisCallback.hasStarted() && !synthesisCallback.hasFinished()) {
synthesisCallback.done();
}
}
...
}
Engine 需要从 SynthesisRequest 中提取 speak 的目标文本、参数等信息,针对不同信息进行区别处理。并通过 SynthesisCallback 的各接口将数据和时机带回: 在数据合成前,通过 start() 告诉系统生成音频的采样频率,多少位 pcm 格式音频,几通道等等。PlaybackSynthesisCallback 的实现将会创建播放的 SynthesisPlaybackQueueItem 交由 AudioPlaybackHandler 去排队调度 之后,通过 audioAvailable() 接口将合成的数据以 byte[] 形式传递回来,会取出 start() 时创建的 QueueItem put 该 audio 数据开始播放 最后,通过 done() 告知合成完毕
// PlaybackSynthesisCallback.java
class PlaybackSynthesisCallback extends AbstractSynthesisCallback {
...
@Override
public int start(int sampleRateInHz, int audioFormat, int channelCount) {
mDispatcher.dispatchOnBeginSynthesis(sampleRateInHz, audioFormat, channelCount);
int channelConfig = BlockingAudioTrack.getChannelConfig(channelCount);
synchronized (mStateLock) {
...
SynthesisPlaybackQueueItem item = new SynthesisPlaybackQueueItem(
mAudioParams, sampleRateInHz, audioFormat, channelCount,
mDispatcher, mCallerIdentity, mLogger);
mAudioTrackHandler.enqueue(item);
mItem = item;
}
return TextToSpeech.SUCCESS;
}
@Override
public int audioAvailable(byte[] buffer, int offset, int length) {
SynthesisPlaybackQueueItem item = null;
synchronized (mStateLock) {
...
item = mItem;
}
final byte[] bufferCopy = new byte[length];
System.arraycopy(buffer, offset, bufferCopy, 0, length);
mDispatcher.dispatchOnAudioAvailable(bufferCopy);
try {
item.put(bufferCopy);
}
...
return TextToSpeech.SUCCESS;
}
@Override
public int done() {
int statusCode = 0;
SynthesisPlaybackQueueItem item = null;
synchronized (mStateLock) {
...
mDone = true;
if (mItem == null) {
if (mStatusCode == TextToSpeech.SUCCESS) {
mDispatcher.dispatchOnSuccess();
} else {
mDispatcher.dispatchOnError(mStatusCode);
}
return TextToSpeech.ERROR;
}
item = mItem;
statusCode = mStatusCode;
}
if (statusCode == TextToSpeech.SUCCESS) {
item.done();
} else {
item.stop(statusCode);
}
return TextToSpeech.SUCCESS;
}
...
}
// SynthesisPlaybackQueueItem.java
final class SynthesisPlaybackQueueItem ... {
void put(byte[] buffer) throws InterruptedException {
try {
mListLock.lock();
long unconsumedAudioMs = 0;
...
mDataBufferList.add(new ListEntry(buffer));
mUnconsumedBytes += buffer.length;
mReadReady.signal();
} finally {
mListLock.unlock();
}
}
private byte[] take() throws InterruptedException {
try {
mListLock.lock();
while (mDataBufferList.size() == 0 && !mStopped && !mDone) {
mReadReady.await();
}
...
ListEntry entry = mDataBufferList.poll();
mUnconsumedBytes -= entry.mBytes.length;
mNotFull.signal();
return entry.mBytes;
} finally {
mListLock.unlock();
}
}
public void run() {
...
final UtteranceProgressDispatcher dispatcher = getDispatcher();
dispatcher.dispatchOnStart();
if (!mAudioTrack.init()) {
dispatcher.dispatchOnError(TextToSpeech.ERROR_OUTPUT);
return;
}
try {
byte[] buffer = null;
while ((buffer = take()) != null) {
mAudioTrack.write(buffer);
}
} ...
mAudioTrack.waitAndRelease();
dispatchEndStatus();
}
void done() {
try {
mListLock.lock();
mDone = true;
mReadReady.signal();
mNotFull.signal();
} finally {
mListLock.unlock();
}
}
}
// TextToSpeechService.java
interface UtteranceProgressDispatcher {
void dispatchOnStop();
void dispatchOnSuccess();
void dispatchOnStart();
void dispatchOnError(int errorCode);
void dispatchOnBeginSynthesis(int sampleRateInHz, int audioFormat, int channelCount);
void dispatchOnAudioAvailable(byte[] audio);
public void dispatchOnRangeStart(int start, int end, int frame);
}
private abstract class UtteranceSpeechItem extends SpeechItem
implements UtteranceProgressDispatcher {
...
@Override
public void dispatchOnStart() {
final String utteranceId = getUtteranceId();
if (utteranceId != null) {
mCallbacks.dispatchOnStart(getCallerIdentity(), utteranceId);
}
}
@Override
public void dispatchOnAudioAvailable(byte[] audio) {
final String utteranceId = getUtteranceId();
if (utteranceId != null) {
mCallbacks.dispatchOnAudioAvailable(getCallerIdentity(), utteranceId, audio);
}
}
@Override
public void dispatchOnSuccess() {
final String utteranceId = getUtteranceId();
if (utteranceId != null) {
mCallbacks.dispatchOnSuccess(getCallerIdentity(), utteranceId);
}
}
@Override
public void dispatchOnStop() { ... }
@Override
public void dispatchOnError(int errorCode) { ... }
@Override
public void dispatchOnBeginSynthesis(int sampleRateInHz, int audioFormat, int channelCount) { ... }
@Override
public void dispatchOnRangeStart(int start, int end, int frame) { ... }
}
private class CallbackMap extends RemoteCallbackList<ITextToSpeechCallback> {
...
public void dispatchOnStart(Object callerIdentity, String utteranceId) {
ITextToSpeechCallback cb = getCallbackFor(callerIdentity);
if (cb == null) return;
try {
cb.onStart(utteranceId);
} ...
}
public void dispatchOnAudioAvailable(Object callerIdentity, String utteranceId, byte[] buffer) {
ITextToSpeechCallback cb = getCallbackFor(callerIdentity);
if (cb == null) return;
try {
cb.onAudioAvailable(utteranceId, buffer);
} ...
}
public void dispatchOnSuccess(Object callerIdentity, String utteranceId) {
ITextToSpeechCallback cb = getCallbackFor(callerIdentity);
if (cb == null) return;
try {
cb.onSuccess(utteranceId);
} ...
}
...
}
// TextToSpeech.java
public class TextToSpeech {
...
private abstract class Connection implements ServiceConnection {
...
private final ITextToSpeechCallback.Stub mCallback =
new ITextToSpeechCallback.Stub() {
@Override
public void onStart(String utteranceId) {
UtteranceProgressListener listener = mUtteranceProgressListener;
if (listener != null) {
listener.onStart(utteranceId);
}
}
...
};
}
}
// TTSTest.kt
class TTSTest(context: Context) {
init {
tts.setOnUtteranceProgressListener(object : UtteranceProgressListener() {
override fun onStart(utteranceId: String?) { ... }
override fun onDone(utteranceId: String?) { ... }
override fun onStop(utteranceId: String?, interrupted: Boolean) { ... }
override fun onError(utteranceId: String?) { ... }
})
}
....
}
TTS 播报前记得申请对应 type 的音频焦点 TTS Request App 的 Activity 或 Service 生命周期销毁的时候,比如 onDestroy() 等时候,需要调用 TextToSpeech 的 shutdown() 释放连接、资源 可以通过 addSpeech() 指定固定文本的对应 audio 资源(比如说语音里常用的几套唤醒后的欢迎词 audio),在后续的文本请求时直接播放该 audio,免去文本转语音的过程、提高效率
Duplicate call to done() done() was called before start() call
TTS Request App 调用 TextToSpeech 构造函数,由系统准备播报工作前的准备,比如通过 Connection 绑定和初始化目标的 TTS Engine Request App 提供目标 text 并调用 speak() 请求 TextToSpeech 会检查目标 text 是否设置过本地的 audio 资源,没有的话回通过 Connection 调用 ITextToSpeechService AIDL 的 speak() 继续 TextToSpeechService 收到后封装请求 SynthesisRequest 和用于回调结果的 SynthesisCallback 实例 之后将两者作为参数调用核心实现 onSynthesizeText(),其将解析 Request 并进行 Speech 音频数据合成 此后通过 SynthesisCallback 将合成前后的关键回调告知系统,尤其是 AudioTrack 播放 同时需要将 speak 请求的结果告知 Request App,即通过 UtteranceProgressDispatcher 中转,实际上是调用 ITextToSpeechCallback AIDL 最后通过 UtteranceProgressListener 告知 TextToSpeech 初始化时设置的各回调