forked from SociallyIneptWeeb/LanguageLeapAI
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvoice_translator.py
More file actions
139 lines (109 loc) · 3.96 KB
/
voice_translator.py
File metadata and controls
139 lines (109 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import speech_recognition as sr
import wave
from os import getenv
from pathlib import Path
from time import sleep
import deepl
import googletrans
import keyboard
import pyaudio
import requests
from dotenv import load_dotenv
from modules.asr import speech_to_text
from modules.tts import speak
load_dotenv()
USE_DEEPL = getenv('USE_DEEPL', 'False').lower() in ('true', '1', 't')
DEEPL_AUTH_KEY = getenv('DEEPL_AUTH_KEY')
TARGET_LANGUAGE = getenv('TARGET_LANGUAGE_CODE')
MIC_NAME = getenv('MICROPHONE_INPUT_NAME')
def get_mic_index(starting_name):
for i, microphone_name in enumerate(sr.Microphone.list_microphone_names()):
if microphone_name.startswith(starting_name):
print(f'MICROPHONE_INPUT found: {microphone_name} at index {i}')
return i
raise ValueError(f'MICROPHONE_INPUT not found starting with: {starting_name}')
MIC_ID = get_mic_index(MIC_NAME)
#just to check if it's found i have no clue what i'm doing
APP_OUTPUT_NAME = getenv('AUX_OUTPUT_NAME')
def get_mic_index(starting_name):
for i, microphone_name in enumerate(sr.Microphone.list_microphone_names()):
if microphone_name.startswith(starting_name):
print(f'AUX_OUTPUT found: {microphone_name} at index {i}')
return i
raise ValueError(f'AUX_OUTPUT not found starting with: {starting_name}')
APP_OUTPUT_ID = get_mic_index(APP_OUTPUT_NAME)
RECORD_KEY = getenv('MIC_RECORD_KEY')
LOGGING = getenv('LOGGING', 'False').lower() in ('true', '1', 't')
MIC_AUDIO_PATH = Path(__file__).resolve().parent / r'audio/mic.wav'
CHUNK = 1024
FORMAT = pyaudio.paInt16
def on_press_key(_):
global frames, recording, stream
if not recording:
frames = []
recording = True
stream = p.open(format=FORMAT,
channels=MIC_CHANNELS,
rate=MIC_SAMPLING_RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=MIC_ID)
def on_release_key(_):
global recording, stream
recording = False
stream.stop_stream()
stream.close()
stream = None
# if empty audio file
if not frames:
print('No audio file to transcribe detected.')
return
# write microphone audio to file
wf = wave.open(str(MIC_AUDIO_PATH), 'wb')
wf.setnchannels(MIC_CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(MIC_SAMPLING_RATE)
wf.writeframes(b''.join(frames))
wf.close()
# transcribe audio
try:
eng_speech = speech_to_text(MIC_AUDIO_PATH, 'transcribe', 'en')
except requests.exceptions.JSONDecodeError:
print('Too many requests to process at once')
return
if eng_speech:
if USE_DEEPL:
translated_speech = translator.translate_text(eng_speech, target_lang=TARGET_LANGUAGE)
else:
translated_speech = translator.translate(eng_speech, dest=TARGET_LANGUAGE).text
if LOGGING:
print(f'English: {eng_speech}')
print(f'Translated: {translated_speech}')
speak(translated_speech, TARGET_LANGUAGE)
else:
print('No speech detected.')
if __name__ == '__main__':
p = pyaudio.PyAudio()
# get channels and sampling rate of mic
mic_info = p.get_device_info_by_index(MIC_ID)
MIC_CHANNELS = mic_info['maxInputChannels']
MIC_SAMPLING_RATE = int(mic_info['defaultSampleRate'])
frames = []
recording = False
stream = None
# Set DeepL or Google Translator
if USE_DEEPL:
translator = deepl.Translator(DEEPL_AUTH_KEY)
else:
translator = googletrans.Translator()
keyboard.on_press_key(RECORD_KEY, on_press_key)
keyboard.on_release_key(RECORD_KEY, on_release_key)
try:
while True:
if recording and stream:
data = stream.read(CHUNK)
frames.append(data)
else:
sleep(0.5)
except KeyboardInterrupt:
print('Closing voice translator.')