Python语音识别API实现文字转语音的几种方法|江阴雨辰互联

2023年7月10日发(作者：)

Python语⾳识别API实现⽂字转语⾳的⼏种⽅法搜狗（⽬前好⽤，免费） def textToAudio_Sougou(message, filePath): # /doc/?url=/docs/content/tts/references/rest/

''' curl -X POST -H "Content-Type: application/json" --data '{ "appid": "xxx", "appkey": "xxx", "exp": "3600s" }' /apis/auth/v1/create_token '''

token = 'xxx' headers = {

'Authorization' : 'Bearer '+token, 'Appid' : 'xxx', 'Content-Type' : 'application/json', 'appkey' : 'xxx', 'secretkey' : 'xxx' } data = { 'input': { 'text': message }, 'config': { 'audio_config': { 'audio_encoding': 'LINEAR16', 'pitch': 1.0, 'volume': 1.0, 'speaking_rate': 1.0 }, 'voice_config': { 'language_code': 'zh-cmn-Hans-CN', 'speaker': 'female' } } }

result = (url=url, headers=headers, data=(data, ensure_ascii=False).encode('utf-8')).content with open(filePath, 'wb') as f: (result)百度（现在收费了，送⼀定额度）import base64import jsonimport osimport timeimport shutilimport requestsclass BaiduVoiceToTxt(): # 初始化函数 def __init__(self): # 定义要进⾏切割的pcm⽂件的位置。speech-vad-demo固定好的，没的选 _path = ".speech-vad-demopcm16k_" # 定义pcm⽂件被切割后，分割成的⽂件输出到的⽬录。speech-vad-demo固定好的，没的选 _pcm_path = ".speech-vad-demooutput_pcm" # 百度AI接⼝只接受pcm格式，所以需要转换格式 # 此函数⽤于将要识别的mp3⽂件转换成pcm格式，并输出为.speech-vad-demopcm16k_ def change_file_format(self,filepath): file_name = filepath # 如果.speech-vad-demopcm16k_⽂件已存在，则先将其删除 if (f"{_path}"): (f"{_path}") # 调⽤系统命令，将⽂件转换成pcm格式，并输出为.speech-vad-demopcm16k_ change_file_format_command = f". -y -i {file_name} -acodec pcm_s16le -f s16le -ac 1 -ar 16000 {_path}" (change_file_format_command) # 百度AI接⼝最长只接受60秒的⾳视，所以需要切割 # 此函数⽤于将.speech-vad-demopcm16k_切割 def devide_video(self): # 如果切割输出⽬录.speech-vad-demooutput_pcm已存在，那其中很可能已有⽂件，先将其清空 # 清空⽬录的⽂件是先删除，再创建 if (f"{_pcm_path}"): (f"{_pcm_path}") (1) (f"{_pcm_path}") # 使⽤相对路径.pcm和.output_pcm，所以先要将当前⼯作⽬录切换到.speech-vad-demo下不然找不到⽂件 (".speech-vad-demo") # 直接执⾏.，其默认会将.pcm16k_⽂件切割并输出到.output_pcm⽬录下 devide_video_command = "." (devide_video_command) # 切换回⼯作⽬录 ("..") # 此函数⽤于将.speech-vad-demooutput_pcm下的⽂件的⽂件名的时间格式化成0:00:00,000形式 def format_time(self, msecs): # ⼀个⼩时毫秒数 hour_msecs = 60 * 60 * 1000 # ⼀分钟对应毫秒数 minute_msecs = 60 * 1000 # ⼀秒钟对应毫秒数 second_msecs = 1000 # ⽂件名的时间是毫秒需要先转成秒。+500是为了四舍五⼊，//是整除 # msecs = (msecs + 500) // 1000 # ⼩时 hour = msecs // hour_msecs if hour < 10: hour = f"0{hour}" # 扣除⼩时后剩余毫秒数 hour_left_msecs = msecs % hour_msecs # 分钟 minute = hour_left_msecs // minute_msecs # 如果不⾜10分钟那在其前补0凑成两位数格式 if minute < 10: minute = f"0{minute}" # 扣除分钟后剩余毫秒数 minute_left_msecs = hour_left_msecs % minute_msecs # 秒 second = minute_left_msecs // second_msecs # 如果秒数不⾜10秒，⼀样在其前补0凑⾜两位数格式 if second < 10: second = f"0{second}" # 扣除秒后剩余毫秒数 second_left_msecs = minute_left_msecs % second_msecs # 如果不⾜10毫秒或100毫秒，在其前补0凑⾜三位数格式 if second_left_msecs < 10: second_left_msecs = f"00{second_left_msecs}" elif second_left_msecs < 100: second_left_msecs = f"0{second_left_msecs}" # 格式化成00:00:00,000形式，并返回 time_format = f"{hour}:{minute}:{second},{second_left_msecs}" return time_format # 此函数⽤于申请访问ai接⼝的access_token def get_access_token(self): # 此变量赋值成⾃⼰API Key的值 client_id = 'f3wT23Otc8jXlDZ4HGtS4jfT' # 此变量赋值成⾃⼰Secret Key的值 client_secret = 'YPPjW3E0VGPUOfZwhjNGVn7LTu3hwssj' auth_url = '/oauth/2.0/token?grant_type=client_credentials&client_id=' + client_id + '&client_secret=' + client_secret response_at = (auth_url) # 以json格式读取响应结果 json_result = (response_) # 获取access_token access_token = json_result['access_token'] return access_token # 此函数⽤于将.speech-vad-demooutput_pcm下的单个⽂件由语⾳转成⽂件 def transfer_voice_to_srt(self,access_token,filepath): # 百度语⾳识别接⼝ url_voice_ident = "/server_api" # 接⼝规范，以json格式post数据 headers = { 'Content-Type': 'application/json' } # 打开pcm⽂件并读取⽂件内容 pcm_obj = open(filepath,'rb') pcm_content_base64 = base64.b64encode(pcm_()) pcm_() # 获取pcm⽂件⼤⼩ pcm_content_len = e(filepath) # 接⼝规范，则体函义见官⽅⽂件，值得注意的是cuid和speech两个参数的写法 post_data = { "format": "pcm", "rate": 16000, "dev_pid": 1737, "channel": 1, "token": access_token, "cuid": "1111111111", "len": pcm_content_len, "speech": pcm_content_(), } proxies = { 'http':"127.0.0.1:8080" } # 调⽤接⼝，进⾏⾳⽂转换 response = (url_voice_ident, headers=headers, data=(post_data)) # response = (url_voice_ident,headers=headers,data=(post_data),proxies=proxies) return __name__ == "__main__": # 实例化 baidu_voice_to_srt_obj = BaiduVoiceToTxt() # ⾃⼰要进⾏⾳⽂转换的⾳视存放的⽂件夹 video_dir = ".video" all_video_file =[] all_file = r(video_dir) subtitle_format = "{fscx75fscy75}" # 只接受.mp3格式⽂件。因为其他格式没研究怎么转成pcm才是符合接⼝要求的 for filename in all_file: if ".mp3" in filename: all_video_(filename) all_video_() i = 0 video_file_num = len(all_video_file) print(f"当前共有{video_file_num}个⾳频⽂件需要转换，即将进⾏处理请稍等...") # 此层for循环是逐个mp3⽂件进⾏处理 for video_file_name in all_video_file: i += 1 print(f"当前转换{video_file_name}({i}/{video_file_num})") # 将⾳视翻译成的内容输出到同⽬录下同名.txt⽂件中 video_file_srt_path = f".video{video_file_name[:-4]}.srt" # 以覆盖形式打开.txt⽂件 video_file_srt_obj = open(video_file_srt_path,'w+') filepath = (video_dir, video_file_name) # 调⽤change_file_format将mp3转成pcm格式 baidu_voice_to_srt__file_format(filepath) # 将转换成的pcm⽂件切割成多个⼩于60秒的pcm⽂件 baidu_voice_to_srt__video() # 获取token access_token = baidu_voice_to_srt__access_token() # 获取.speech-vad-demooutput_pcm⽬录下的⽂件列表 file_dir = baidu_voice_to_srt__pcm_path all_pcm_file = r(file_dir) all_pcm_() j = 0 pcm_file_num = len(all_pcm_file) print(f"当前所转⽂件{video_file_name}({i}/{video_file_num})被切分成{pcm_file_num}块，即将逐块进⾏⾳⽂转换请稍等...") # 此层for是将.speech-vad-demooutput_pcm⽬录下的所有⽂件逐个进⾏⾳⽂转换 for filename in all_pcm_file: j += 1 filepath = (file_dir, filename) if ((filepath)): # 获取⽂件名上的时间 time_str = filename[10:-6] time_str_dict = time_("-") time_start_str = baidu_voice_to_srt__time(int(time_str_dict[0])) time_end_str = baidu_voice_to_srt__time(int(time_str_dict[1])) print(f"当前转换{video_file_name}({i}/{video_file_num})-{time_start_str}-{time_end_str}({j}/{pcm_file_num})") response_text = baidu_voice_to_srt_er_voice_to_srt(access_token, filepath) # 以json形式读取返回结果 json_result = (response_text) # 将⾳⽂转换结果写⼊.srt⽂件 video_file_srt_ines(f"{j}rn") video_file_srt_ines(f"{time_start_str} --> {time_end_str}rn") if json_result['err_no'] == 0: print(f"{time_start_str}-{time_end_str}({j}/{pcm_file_num})转换成功：{json_result['result'][0]}") video_file_srt_ines(f"{subtitle_format}{json_result['result'][0]}rn") elif json_result['err_no'] == 3301: print(f"{time_start_str}-{time_end_str}({j}/{pcm_file_num})⾳频质量过差⽆法识别") video_file_srt_ines(f"{subtitle_format}⾳频质量过差⽆法识别rn") else: print(f"{time_start_str}-{time_end_str}({j}/{pcm_file_num})转换过程遇到其他错误") video_file_srt_ines(f"{subtitle_format}转换过程遇到其他错误rn") video_file_srt_ines(f"rn") video_file_srt_()腾讯（收费的）到此这篇关于Python语⾳识别API实现⽂字转语⾳的⼏种⽅法的⽂章就介绍到这了,更多相关Python ⽂字转语⾳内容请搜索以前的⽂章或继续浏览下⾯的相关⽂章希望⼤家以后多多⽀持！

发布者：admin，转转请注明出处：http://www.yc00.com/web/1688931342a184798.html