科大讯飞 IFLYTEK TTS
官网给出demo了
语音合成(流式版)WebAPI 文档 | 讯飞开放平台文档中心 (xfyun.cn)
class Ws_Param(object):
# 初始化
def __init__(self, APPID, APIKey, APISecret, Text, _vcn="xiaoyan"):
self.APPID = APPID
self.APIKey = APIKey
self.APISecret = APISecret
self.Text = Text
# 公共参数(common)
self.CommonArgs = {"app_id": self.APPID}
# 业务参数(business),更多个性化参数可在官网查看
self.BusinessArgs = {"aue": "raw", "auf": "audio/L16;rate=16000", "tte": "utf8", "vcn": _vcn}
self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-8')), "UTF8")}
# 使用小语种须使用以下方式,此处的unicode指的是 utf16小端的编码方式,即"UTF-16LE"”
# self.Data = {"status": 2, "text": str(base64.b64encode(self.Text.encode('utf-16')), "UTF8")}
# 生成url
def create_url(self):
url = 'wss://tts-api.xfyun.cn/v2/tts'
# 生成RFC1123格式的时间戳
now = datetime.now()
date = format_date_time(mktime(now.timetuple()))
# 拼接字符串
signature_origin = "host: " + "ws-api.xfyun.cn" + "\n"
signature_origin += "date: " + date + "\n"
signature_origin += "GET " + "/v2/tts " + "HTTP/1.1"
# 进行hmac-sha256进行加密
signature_sha = hmac.new(self.APISecret.encode('utf-8'), signature_origin.encode('utf-8'),
digestmod=hashlib.sha256).digest()
signature_sha = base64.b64encode(signature_sha).decode(encoding='utf-8')
authorization_origin = "api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"" % (
self.APIKey, "hmac-sha256", "host date request-line", signature_sha)
authorization = base64.b64encode(authorization_origin.encode('utf-8')).decode(encoding='utf-8')
# 将请求的鉴权参数组合为字典
v = {
"authorization": authorization,
"date": date,
"host": "ws-api.xfyun.cn"
}
# 拼接鉴权参数,生成url
url = url + '?' + urlencode(v)
return url
def on_message(ws, message, output_filename='output.pcm'):
try:
message = json.loads(message)
code = message["code"]
sid = message["sid"]
audio = message["data"]["audio"]
audio = base64.b64decode(audio)
status = message["data"]["status"]
print(message)
if status == 2:
print("ws is closed")
ws.close()
pcm_to_mp3(output_filename, output_filename.replace('.pcm', '.mp3'))
if code != 0:
errMsg = message["message"]
print("sid:%s call error:%s code is:%s" % (sid, errMsg, code))
else:
# Write audio data to the specified output file
with open(output_filename, 'ab') as f:
f.write(audio)
except Exception as e:
print("receive msg,but parse exception:", e)
# 收到websocket错误的处理
def on_error(ws, error):
print("### error:", error)
# 收到websocket关闭的处理
def on_close(ws):
print("### closed ###")
# 收到websocket连接建立的处理
def on_open(ws):
def run(*args):
d = {"common": wsParam.CommonArgs,
"business": wsParam.BusinessArgs,
"data": wsParam.Data,
}
d = json.dumps(d)
print("------>开始发送文本数据")
ws.send(d)
if os.path.exists('demo.pcm'):
os.remove('demo.pcm')
thread.start_new_thread(run, ())
但是输出是`.pcm`的音频格式,而我需要`.mp3`的音频格式,所以需要多写一个音频格式转换的函数:
def pcm_to_mp3(pcm_file, mp3_file, channels=1, sample_rate=16000, bit_depth=16):
"""
Convert a PCM file to MP3 format.
:param pcm_file: Path to the input PCM file.
:param mp3_file: Path where the output MP3 file will be saved.
:param channels: Number of audio channels. Default is 1 (mono).
:param sample_rate: Sample rate in Hz. Default is 16000.
:param bit_depth: Bit depth. Default is 16.
"""
# Read PCM file
audio = AudioSegment(
data=open(pcm_file, 'rb').read(),
sample_width=bit_depth // 8,
frame_rate=sample_rate,
channels=channels
)
# Export to MP3
audio.export(mp3_file, format="mp3")
然后就可以免费试用科大讯飞的API进行语音合成了,支持挺多音色,但是免费版本对英文应该是没有支持,就我的试用结果来看,英文是直接念英文字母。
if __name__ == "__main__":
# 测试时候在此处正确填写相关信息即可运行
# APPID
# APISecret
# APIKey
# Text
# vcn :(https://console.xfyun.cn/services/tts)
# 讯飞小燕 普通话 xiaoyan
# 讯飞许久 普通话 aisjiuxu
# 讯飞小萍 普通话 aisxping
# 讯飞小婧 普通话 aisjinger
# 讯飞许小宝 普通话 aisbabyxu
wsParam = Ws_Param(APPID='', APISecret='',
APIKey='',
Text="我们的AI视频真好看真精彩", _vcn='aisxping')
# 输出文件名
output_filename = 'test_aisxping.pcm'
websocket.enableTrace(False)
wsUrl = wsParam.create_url()
ws = websocket.WebSocketApp(wsUrl,
on_message=lambda ws, msg: on_message(ws, msg, output_filename),
on_error=on_error,
on_close=on_close)
ws.on_open = on_open
ws.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})
OpenAI TTS
[openai tts 官网](platform.openai.com/docs/guides/text-to-speech)
# voice_list = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
def generate_speech(input_text, voice="onyx", folder="src/audio", file_name="default.mp3"):
try:
# Initialize OpenAI client
client = OpenAI(api_key="", base_url="")
# Construct file path for output
speech_file_path = folder + "/" + file_name
# Generate speech
with client.audio.speech.with_streaming_response.create(
model="tts-1",
voice=voice,
input=input_text
) as response:
response.stream_to_file(speech_file_path)
logging.info(f"Speech file saved to {speech_file_path}")
except Exception as e:
logging.error(f"Error occurred: {e}")
PlayHT TTS
中英文都支持,能选的声音也很多,也不算贵,是自用最好的一个,还可以自定义音色。
import requests
headers = {
"Authorization": "",
"accept": "text/event-stream",
"content-type": "application/json",
"X-USER-ID": ""
}
url = "https://api.play.ht/api/v2/tts"
payload = {
"text": "Hello from a realistic voice.",
"voice": "s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",
"output_format": "mp3",
"voice_engine": "PlayHT2.0"
}
response = requests.post(url, json=payload, headers=headers)
print(response.text)
参与讨论
(Participate in the discussion)
参与讨论