diff --git a/client/lib/src/AudioStreamer.ts b/client/lib/src/AudioStreamer.ts index fd492529..252af791 100644 --- a/client/lib/src/AudioStreamer.ts +++ b/client/lib/src/AudioStreamer.ts @@ -58,12 +58,9 @@ export class AudioStreamer extends Duplex { } // Option Change - setServerUrl = (serverUrl: string, mode: Protocol, openTab: boolean = false) => { + setServerUrl = (serverUrl: string, mode: Protocol) => { this.serverUrl = serverUrl this.protocol = mode - if (openTab) { - window.open(serverUrl, '_blank') - } console.log(`[AudioStreamer] Server Setting:${this.serverUrl} ${this.protocol}`) this.createSocketIO()// mode check is done in the method. diff --git a/client/lib/src/VoiceChangerClient.ts b/client/lib/src/VoiceChangerClient.ts index fd961c8b..aba247ee 100644 --- a/client/lib/src/VoiceChangerClient.ts +++ b/client/lib/src/VoiceChangerClient.ts @@ -2,7 +2,7 @@ import { VoiceChangerWorkletNode, VolumeListener } from "./VoiceChangerWorkletNo // @ts-ignore import workerjs from "raw-loader!../worklet/dist/index.js"; import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js"; -import { createDummyMediaStream } from "./util"; +import { createDummyMediaStream, validateUrl } from "./util"; import { BufferSize, DefaultVoiceChangerOptions, DefaultVoiceChangerRequestParamas, Protocol, VoiceChangerMode, VoiceChangerRequestParamas, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const"; import MicrophoneStream from "microphone-stream"; import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer"; @@ -169,7 +169,21 @@ export class VoiceChnagerClient { } // Audio Streamer Settingg setServerUrl = (serverUrl: string, mode: Protocol, openTab: boolean = false) => { - this.audioStreamer.setServerUrl(serverUrl, mode, openTab) + const url = validateUrl(serverUrl) + const pageUrl = `${location.protocol}//${location.host}` + console.log("SERVER CHECK", url, pageUrl) + + if (url != pageUrl && location.protocol == "https:") { + if (openTab) { + const value = window.confirm("MMVC Server is different from this page's origin. Open tab to open ssl connection. OK? (You can close the opened tab after ssl connection succeed.)"); + if (value) { + window.open(url, '_blank') + } else { + alert("Your voice conversion may fail...") + } + } + } + this.audioStreamer.setServerUrl(validateUrl(serverUrl), mode) } setRequestParams = (val: VoiceChangerRequestParamas) => { diff --git a/client/lib/src/uploader.ts b/client/lib/src/uploader.ts index 183441ea..e7676def 100644 --- a/client/lib/src/uploader.ts +++ b/client/lib/src/uploader.ts @@ -1,7 +1,6 @@ import { OnnxExecutionProvider } from "./const" +import { validateUrl } from "./util" -const DEBUG = false -const DEBUG_BASE_URL = "http://localhost:18888" type FileChunk = { hash: number, @@ -15,12 +14,10 @@ export type ServerInfo = { providers: string[] } - export const getInfo = async (baseUrl: string) => { - const getInfoURL = DEBUG ? `${DEBUG_BASE_URL}/info` : `${baseUrl}/info` - + const url = validateUrl(baseUrl) + "/info" const info = await new Promise((resolve) => { - const request = new Request(getInfoURL, { + const request = new Request(url, { method: 'GET', }); fetch(request).then(async (response) => { @@ -33,7 +30,7 @@ export const getInfo = async (baseUrl: string) => { export const uploadLargeFile = async (baseUrl: string, file: File, onprogress: (progress: number, end: boolean) => void) => { - const uploadURL = DEBUG ? `${DEBUG_BASE_URL}/upload_file` : `${baseUrl}/upload_file` + const url = validateUrl(baseUrl) + "/upload_file" onprogress(0, false) const size = 1024 * 1024; const fileChunks: FileChunk[] = []; @@ -60,7 +57,7 @@ export const uploadLargeFile = async (baseUrl: string, file: File, onprogress: ( const formData = new FormData(); formData.append("file", chunk.chunk); formData.append("filename", `${file.name}_${chunk.hash}`); - const request = new Request(uploadURL, { + const request = new Request(url, { method: 'POST', body: formData, }); @@ -82,13 +79,12 @@ export const uploadLargeFile = async (baseUrl: string, file: File, onprogress: ( } export const concatUploadedFile = async (baseUrl: string, file: File, chunkNum: number) => { - const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/concat_uploaded_file` : `${baseUrl}/concat_uploaded_file` - + const url = validateUrl(baseUrl) + "/concat_uploaded_file" new Promise((resolve) => { const formData = new FormData(); formData.append("filename", file.name); formData.append("filenameChunkNum", "" + chunkNum); - const request = new Request(loadModelURL, { + const request = new Request(url, { method: 'POST', body: formData, }); @@ -100,13 +96,13 @@ export const concatUploadedFile = async (baseUrl: string, file: File, chunkNum: } export const loadModel = async (baseUrl: string, configFile: File, pyTorchModelFile: File | null, onnxModelFile: File | null) => { - const loadModelURL = DEBUG ? `${DEBUG_BASE_URL}/load_model` : `${baseUrl}/load_model` + const url = validateUrl(baseUrl) + "/load_model" const loadP = new Promise((resolve) => { const formData = new FormData(); formData.append("pyTorchModelFilename", pyTorchModelFile?.name || "-"); formData.append("onnxModelFilename", onnxModelFile?.name || "-"); formData.append("configFilename", configFile.name); - const request = new Request(loadModelURL, { + const request = new Request(url, { method: 'POST', body: formData, }); @@ -119,7 +115,7 @@ export const loadModel = async (baseUrl: string, configFile: File, pyTorchModelF } export const setOnnxExecutionProvider = async (baseUrl: string, provider: OnnxExecutionProvider) => { - const url = DEBUG ? `${DEBUG_BASE_URL}/set_onnx_provider` : `${baseUrl}/set_onnx_provider` + const url = validateUrl(baseUrl) + "/set_onnx_provider" const loadP = new Promise((resolve) => { const formData = new FormData(); formData.append("provider", provider); diff --git a/client/lib/src/util.ts b/client/lib/src/util.ts index 9d9e04b6..862af256 100644 --- a/client/lib/src/util.ts +++ b/client/lib/src/util.ts @@ -57,3 +57,10 @@ export const fileSelectorAsDataURL = async (regex: string) => { } +export const validateUrl = (url: string) => { + + if (url.endsWith("/")) { + return url.substring(0, url.length - 1) + } + return url +} diff --git a/server/voice_changer/VoiceChanger.py b/server/voice_changer/VoiceChanger.py index 7f5c271b..d2315130 100755 --- a/server/voice_changer/VoiceChanger.py +++ b/server/voice_changer/VoiceChanger.py @@ -88,16 +88,11 @@ class VoiceChanger(): def set_onnx_provider(self, provider:str): if hasattr(self, "onnx_session"): self.onnx_session.set_providers(providers=[provider]) - print("ONNX_MDEOL!1", self.onnx_session.get_providers()) + print("ONNX_MDEOL: ", self.onnx_session.get_providers()) return {"provider":self.onnx_session.get_providers()} - def on_request(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData): - convertSize = convertChunkNum * 128 # 128sample/1chunk - if unpackedData.shape[0] * 2 > convertSize: - convertSize = unpackedData.shape[0] * 2 - - print("convert Size", convertChunkNum, convertSize) + def _generate_strength(self, crossFadeOffsetRate, crossFadeEndRate, unpackedData): if self.crossFadeOffsetRate != crossFadeOffsetRate or self.crossFadeEndRate != crossFadeEndRate or self.unpackedData_length != unpackedData.shape[0]: self.crossFadeOffsetRate = crossFadeOffsetRate @@ -128,25 +123,37 @@ class VoiceChanger(): if hasattr(self, 'prev_audio1') == True: delattr(self,"prev_audio1") + def _generate_input(self, unpackedData, convertSize, srcId): + # 今回変換するデータをテンソルとして整形する + audio = torch.FloatTensor(unpackedData.astype(np.float32)) # float32でtensorfを作成 + audio_norm = audio / self.hps.data.max_wav_value # normalize + audio_norm = audio_norm.unsqueeze(0) # unsqueeze + self.audio_buffer = torch.cat([self.audio_buffer, audio_norm], axis=1) # 過去のデータに連結 + audio_norm = self.audio_buffer[:, -convertSize:] # 変換対象の部分だけ抽出 + self.audio_buffer = audio_norm + + spec = spectrogram_torch(audio_norm, self.hps.data.filter_length, + self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length, + center=False) + spec = torch.squeeze(spec, 0) + sid = torch.LongTensor([int(srcId)]) + + data = (self.text_norm, spec, audio_norm, sid) + data = TextAudioSpeakerCollate()([data]) + return data + + + def on_request(self, gpu, srcId, dstId, timestamp, convertChunkNum, crossFadeLowerValue, crossFadeOffsetRate, crossFadeEndRate, unpackedData): + convertSize = convertChunkNum * 128 # 128sample/1chunk + if unpackedData.shape[0] * 2 > convertSize: + convertSize = unpackedData.shape[0] * 2 + + print("convert Size", convertChunkNum, convertSize) + + self._generate_strength(crossFadeOffsetRate, crossFadeEndRate, unpackedData) + data = self. _generate_input(unpackedData, convertSize, srcId) try: - # 今回変換するデータをテンソルとして整形する - audio = torch.FloatTensor(unpackedData.astype(np.float32)) # float32でtensorfを作成 - audio_norm = audio / self.hps.data.max_wav_value # normalize - audio_norm = audio_norm.unsqueeze(0) # unsqueeze - self.audio_buffer = torch.cat([self.audio_buffer, audio_norm], axis=1) # 過去のデータに連結 - audio_norm = self.audio_buffer[:, -convertSize:] # 変換対象の部分だけ抽出 - self.audio_buffer = audio_norm - - spec = spectrogram_torch(audio_norm, self.hps.data.filter_length, - self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length, - center=False) - spec = torch.squeeze(spec, 0) - sid = torch.LongTensor([int(srcId)]) - - data = (self.text_norm, spec, audio_norm, sid) - data = TextAudioSpeakerCollate()([data]) - # if gpu < 0 or (self.gpu_num == 0 and not self.mps_enabled): if gpu == -2 and hasattr(self, 'onnx_session') == True: x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]