Merge pull request #139 from w-okada/v.1.5.0

V.1.5.0
This commit is contained in:
w-okada 2023-02-22 14:38:32 +09:00 committed by GitHub
commit 7d81ecbbff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
92 changed files with 7885 additions and 11060 deletions

View File

@ -71,7 +71,7 @@ Windows版とMac版を提供しています。事前準備は必要ありませ
### アルファ版(for v.1.5.x) ### アルファ版(for v.1.5.x)
- [MMVCServerSIO_mac_onnxcpu_v.1.5.0.8a.zip](https://drive.google.com/file/d/1HhgrPMQwgjVgJJngsyZ4JJiieQQAI-zC/view?usp=sharing) 509MB - [MMVCServerSIO_mac_onnxcpu_v.1.5.0.8a.zip](https://drive.google.com/file/d/1HhgrPMQwgjVgJJngsyZ4JJiieQQAI-zC/view?usp=sharing) 509MB
- [MMVCServerSIO_win_onnxgpu_cuda_v.1.5.0.8a.zip](https://drive.google.com/file/d/182q30PeI7ULgdtn-wg5VEGb0mUfHsCi5/view?usp=sharing) 2.55GB - [MMVCServerSIO_win_onnxgpu_cuda_v.1.5.0.8a.zip](https://drive.google.com/file/d/182q30PeI7ULgdtn-wg5VEGb0mUfHsCi5/view?usp=sharing)2.55GB
### 最新バージョン(for v.1.3.x) ### 最新バージョン(for v.1.3.x)
- [MMVCServerSIO_mac_onnxcpu_v.1.3.7.2.zip](https://drive.google.com/file/d/1AcJaQXH8ZtlCSrifvRBWdat19HD_A2fr/view?usp=sharing) 365MB - [MMVCServerSIO_mac_onnxcpu_v.1.3.7.2.zip](https://drive.google.com/file/d/1AcJaQXH8ZtlCSrifvRBWdat19HD_A2fr/view?usp=sharing) 365MB
- [MMVCServerSIO_win_onnxdirectML_cuda_v.1.3.7.2.zip](https://drive.google.com/file/d/1WKW3uqmIi9D13Jzao8jWVqx2KANmmQji/view?usp=sharing) 2050MB - [MMVCServerSIO_win_onnxdirectML_cuda_v.1.3.7.2.zip](https://drive.google.com/file/d/1WKW3uqmIi9D13Jzao8jWVqx2KANmmQji/view?usp=sharing) 2050MB

8
client/demo/.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,8 @@
{
"files.associations": {
"*.css": "postcss"
},
"workbench.colorCustomizations": {
"tab.activeBackground": "#65952acc"
}
}

BIN
client/demo/dist/assets/buymeacoffee.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,10 @@
/*!
localForage -- Offline Storage, Improved
Version 1.10.0
https://localforage.github.io/localForage
(c) 2013-2017 Mozilla, Apache License 2.0
*/
/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */ /*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
/** /**

File diff suppressed because it is too large Load Diff

View File

@ -10,6 +10,7 @@
"build:prod": "npm-run-all clean webpack:prod", "build:prod": "npm-run-all clean webpack:prod",
"build:dev": "npm-run-all clean webpack:dev", "build:dev": "npm-run-all clean webpack:dev",
"start": "webpack-dev-server --config webpack.dev.js", "start": "webpack-dev-server --config webpack.dev.js",
"build:mod": "cd ../lib && npm run build:dev && cd - && cp -r ../lib/dist/* node_modules/@dannadori/voice-changer-client-js/dist/",
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1"
}, },
"keywords": [ "keywords": [
@ -18,39 +19,44 @@
"author": "wataru.okada@flect.co.jp", "author": "wataru.okada@flect.co.jp",
"license": "ISC", "license": "ISC",
"devDependencies": { "devDependencies": {
"@babel/plugin-transform-runtime": "^7.19.6", "@babel/plugin-transform-runtime": "^7.21.0",
"@babel/preset-env": "^7.20.2", "@babel/preset-env": "^7.20.2",
"@babel/preset-react": "^7.18.6", "@babel/preset-react": "^7.18.6",
"@babel/preset-typescript": "^7.18.6", "@babel/preset-typescript": "^7.21.0",
"@types/node": "^18.11.18", "@types/node": "^18.14.0",
"@types/react": "^18.0.27", "@types/react": "^18.0.28",
"@types/react-dom": "^18.0.10", "@types/react-dom": "^18.0.11",
"autoprefixer": "^10.4.13", "autoprefixer": "^10.4.13",
"babel-loader": "^9.1.2", "babel-loader": "^9.1.2",
"copy-webpack-plugin": "^11.0.0", "copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.7.3", "css-loader": "^6.7.3",
"eslint": "^8.33.0", "eslint": "^8.34.0",
"eslint-config-prettier": "^8.6.0", "eslint-config-prettier": "^8.6.0",
"eslint-plugin-prettier": "^4.2.1", "eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-react": "^7.32.1", "eslint-plugin-react": "^7.32.2",
"eslint-webpack-plugin": "^3.2.0", "eslint-webpack-plugin": "^4.0.0",
"html-loader": "^4.2.0", "html-loader": "^4.2.0",
"html-webpack-plugin": "^5.5.0", "html-webpack-plugin": "^5.5.0",
"npm-run-all": "^4.1.5", "npm-run-all": "^4.1.5",
"postcss-loader": "^7.0.2", "postcss-loader": "^7.0.2",
"postcss-nested": "^6.0.0", "postcss-nested": "^6.0.1",
"prettier": "^2.8.3", "prettier": "^2.8.4",
"rimraf": "^4.1.2", "rimraf": "^4.1.2",
"style-loader": "^3.3.1", "style-loader": "^3.3.1",
"ts-loader": "^9.4.2", "ts-loader": "^9.4.2",
"tsconfig-paths": "^4.1.2", "tsconfig-paths": "^4.1.2",
"typescript": "^4.9.4", "typescript": "^4.9.5",
"webpack": "^5.75.0", "webpack": "^5.75.0",
"webpack-cli": "^5.0.1", "webpack-cli": "^5.0.1",
"webpack-dev-server": "^4.11.1" "webpack-dev-server": "^4.11.1"
}, },
"dependencies": { "dependencies": {
"@dannadori/voice-changer-client-js": "^1.0.66", "@dannadori/voice-changer-client-js": "^1.0.79",
"@fortawesome/fontawesome-svg-core": "^6.3.0",
"@fortawesome/free-brands-svg-icons": "^6.3.0",
"@fortawesome/free-regular-svg-icons": "^6.3.0",
"@fortawesome/free-solid-svg-icons": "^6.3.0",
"@fortawesome/react-fontawesome": "^0.2.0",
"react": "^18.2.0", "react": "^18.2.0",
"react-dom": "^18.2.0" "react-dom": "^18.2.0"
} }

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 122 KiB

View File

@ -1,22 +1,60 @@
import * as React from "react"; import * as React from "react";
import { createRoot } from "react-dom/client"; import { createRoot } from "react-dom/client";
import "./css/App.css" import "./css/App.css"
import { useMemo, } from "react"; import { ErrorInfo, useMemo, useState, } from "react";
import { useMicrophoneOptions } from "./100_options_microphone"; import { useMicrophoneOptions } from "./100_options_microphone";
import { AppStateProvider, useAppState } from "./001_provider/001_AppStateProvider";
import localForage from "localforage";
import { library } from "@fortawesome/fontawesome-svg-core";
import { fas } from "@fortawesome/free-solid-svg-icons";
import { far } from "@fortawesome/free-regular-svg-icons";
import { fab } from "@fortawesome/free-brands-svg-icons";
import { AppRootProvider } from "./001_provider/001_AppRootProvider";
import ErrorBoundary from "./001_provider/900_ErrorBoundary";
import { INDEXEDDB_KEY_CLIENT, INDEXEDDB_KEY_MODEL_DATA, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_WORKLET, INDEXEDDB_KEY_WORKLETNODE } from "@dannadori/voice-changer-client-js";
library.add(fas, far, fab);
const container = document.getElementById("app")!; const container = document.getElementById("app")!;
const root = createRoot(container); const root = createRoot(container);
const App = () => { const App = () => {
const appState = useAppState()
const { voiceChangerSetting } = useMicrophoneOptions()
const titleRow = useMemo(() => {
return (
<div className="top-title">
<span className="title">Voice Changer Setting</span>
<span className="top-title-version">for v.1.5.x</span>
<span className="belongings">
<a className="link" href="https://github.com/w-okada/voice-changer" target="_blank" rel="noopener noreferrer">
<img src="./assets/icons/github.svg" />
<span>github</span>
</a>
<a className="link" href="https://zenn.dev/wok/books/0003_vc-helper-v_1_5" target="_blank" rel="noopener noreferrer">
<img src="./assets/icons/help-circle.svg" />
<span>manual</span>
</a>
<a className="link" href="https://www.buymeacoffee.com/wokad" target="_blank" rel="noopener noreferrer">
<img className="donate-img" src="./assets/buymeacoffee.png" />
<span></span>
</a>
const { voiceChangerSetting, clearSetting } = useMicrophoneOptions() </span>
<span className="belongings">
const onClearSettingClicked = async () => { </span>
clearSetting() </div>
location.reload() )
} }, [])
const clearRow = useMemo(() => { const clearRow = useMemo(() => {
const onClearSettingClicked = async () => {
await appState.clearSetting()
location.reload()
}
return ( return (
<> <>
<div className="body-row split-3-3-4 left-padding-1"> <div className="body-row split-3-3-4 left-padding-1">
@ -30,38 +68,99 @@ const App = () => {
) )
}, []) }, [])
const mainSetting = useMemo(() => {
return (
<>
<div className="main-body">
{titleRow}
{clearRow}
{voiceChangerSetting}
</div>
</>
)
}, [voiceChangerSetting])
return (
<>
{mainSetting}
</>
)
}
const AppStateWrapper = () => {
// エラーバウンダリー設定
const [error, setError] = useState<{ error: Error, errorInfo: ErrorInfo }>()
// localForage.config({
// driver: localForage.INDEXEDDB,
// name: INDEXEDDB_DB_APP_NAME,
// version: 1.0,
// storeName: INDEXEDDB_DB_NAME,
// description: 'appStorage'
// })
const errorComponent = useMemo(() => {
const errorName = error?.error.name || "no error name"
const errorMessage = error?.error.message || "no error message"
const errorInfos = (error?.errorInfo.componentStack || "no error stack").split("\n")
const onClearCacheClicked = () => {
[
INDEXEDDB_KEY_CLIENT,
INDEXEDDB_KEY_SERVER,
INDEXEDDB_KEY_WORKLETNODE,
INDEXEDDB_KEY_MODEL_DATA,
INDEXEDDB_KEY_WORKLET
].forEach((x) => {
localForage.removeItem(x)
})
location.reload();
}
return (
<div className="error-container">
<div className="top-error-message">
</div>
<div className="top-error-description">
<p></p>
<p></p>
<p><button onClick={onClearCacheClicked}></button></p>
</div>
<div className="error-detail">
<div className="error-name">
{errorName}
</div>
<div className="error-message">
{errorMessage}
</div>
<div className="error-info-container">
{errorInfos.map(x => {
return <div className="error-info-line" key={x}>{x}</div>
})}
</div>
</div>
</div>
)
}, [error])
const updateError = (error: Error, errorInfo: React.ErrorInfo) => {
console.log("error compo", error, errorInfo)
setError({ error, errorInfo })
}
return ( return (
<div className="main-body"> <ErrorBoundary fallback={errorComponent} onError={updateError}>
<div className="body-row split-6-4"> <AppStateProvider>
<div className="body-top-title"> <App></App>
Voice Changer Setting </AppStateProvider>
</div> </ErrorBoundary>
<div className="body-top-title-belongings">
<div className="belonging-item">
<a className="link" href="https://github.com/w-okada/voice-changer" target="_blank" rel="noopener noreferrer">
<img src="./assets/icons/github.svg" />
<span>github</span>
</a>
</div>
<div className="belonging-item">
<a className="link" href="https://zenn.dev/wok/articles/s01_vc001_top" target="_blank" rel="noopener noreferrer">
<img src="./assets/icons/help-circle.svg" />
<span>manual</span>
</a>
</div>
</div>
</div>
{clearRow}
{voiceChangerSetting}
<div>
<audio id="audio-output"></audio>
</div>
</div>
) )
} }
root.render( root.render(
<App></App> <AppRootProvider>
<AppStateWrapper></AppStateWrapper>
</AppRootProvider>
); );

View File

@ -0,0 +1,26 @@
import { useEffect, useState } from "react"
export type AudioConfigState = {
audioContext: AudioContext | null
}
export const useAudioConfig = (): AudioConfigState => {
const [audioContext, setAudioContext] = useState<AudioContext | null>(null)
useEffect(() => {
const createAudioContext = () => {
const ctx = new AudioContext()
document.removeEventListener('touchstart', createAudioContext);
document.removeEventListener('mousedown', createAudioContext);
setAudioContext(ctx)
}
document.addEventListener('touchstart', createAudioContext, false);
document.addEventListener('mousedown', createAudioContext, false);
}, [])
const ret: AudioConfigState = {
audioContext
}
return ret
}

View File

@ -0,0 +1,25 @@
import { ClientState, useClient } from "@dannadori/voice-changer-client-js"
import { AUDIO_ELEMENT_FOR_PLAY_RESULT } from "../const"
export type UseVCClientProps = {
audioContext: AudioContext | null
}
export type VCClientState = {
clientState: ClientState
}
export const useVCClient = (props: UseVCClientProps) => {
const clientState = useClient({
audioContext: props.audioContext,
audioOutputElementId: AUDIO_ELEMENT_FOR_PLAY_RESULT
})
const ret: VCClientState = {
clientState
}
return ret
}

View File

@ -0,0 +1,67 @@
import { useEffect, useState } from "react"
import { StateControlCheckbox, useStateControlCheckbox } from "../hooks/useStateControlCheckbox";
import { OpenAdvancedSettingCheckbox, OpenConverterSettingCheckbox, OpenDeviceSettingCheckbox, OpenModelSettingCheckbox, OpenQualityControlCheckbox, OpenServerControlCheckbox, OpenSpeakerSettingCheckbox } from "../const"
export type StateControls = {
openServerControlCheckbox: StateControlCheckbox
openModelSettingCheckbox: StateControlCheckbox
openDeviceSettingCheckbox: StateControlCheckbox
openQualityControlCheckbox: StateControlCheckbox
openSpeakerSettingCheckbox: StateControlCheckbox
openConverterSettingCheckbox: StateControlCheckbox
openAdvancedSettingCheckbox: StateControlCheckbox
}
type FrontendManagerState = {
stateControls: StateControls
isConverting: boolean,
isAnalyzing: boolean
};
export type FrontendManagerStateAndMethod = FrontendManagerState & {
setIsConverting: (val: boolean) => void
setIsAnalyzing: (val: boolean) => void
}
export const useFrontendManager = (): FrontendManagerStateAndMethod => {
const [isConverting, setIsConverting] = useState<boolean>(false)
const [isAnalyzing, setIsAnalyzing] = useState<boolean>(false)
// (1) Controller Switch
const openServerControlCheckbox = useStateControlCheckbox(OpenServerControlCheckbox);
const openModelSettingCheckbox = useStateControlCheckbox(OpenModelSettingCheckbox);
const openDeviceSettingCheckbox = useStateControlCheckbox(OpenDeviceSettingCheckbox);
const openQualityControlCheckbox = useStateControlCheckbox(OpenQualityControlCheckbox);
const openSpeakerSettingCheckbox = useStateControlCheckbox(OpenSpeakerSettingCheckbox);
const openConverterSettingCheckbox = useStateControlCheckbox(OpenConverterSettingCheckbox);
const openAdvancedSettingCheckbox = useStateControlCheckbox(OpenAdvancedSettingCheckbox);
useEffect(() => {
openServerControlCheckbox.updateState(true)
openModelSettingCheckbox.updateState(true)
openDeviceSettingCheckbox.updateState(true)
openSpeakerSettingCheckbox.updateState(true)
openConverterSettingCheckbox.updateState(true)
// openQualityControlCheckbox.updateState(true)
}, [])
const returnValue = {
stateControls: {
openServerControlCheckbox,
openModelSettingCheckbox,
openDeviceSettingCheckbox,
openQualityControlCheckbox,
openSpeakerSettingCheckbox,
openConverterSettingCheckbox,
openAdvancedSettingCheckbox
},
isConverting,
setIsConverting,
isAnalyzing,
setIsAnalyzing
};
return returnValue;
};

View File

@ -0,0 +1,30 @@
import React, { useContext, useRef } from "react";
import { ReactNode } from "react";
import { AudioConfigState, useAudioConfig } from "../001_globalHooks/001_useAudioConfig";
type Props = {
children: ReactNode;
};
type AppRootValue = {
audioContextState: AudioConfigState
}
const AppRootContext = React.createContext<AppRootValue | null>(null);
export const useAppRoot = (): AppRootValue => {
const state = useContext(AppRootContext);
if (!state) {
throw new Error("useAppState must be used within AppStateProvider");
}
return state;
};
export const AppRootProvider = ({ children }: Props) => {
const audioContextState = useAudioConfig()
const providerValue: AppRootValue = {
audioContextState,
};
return <AppRootContext.Provider value={providerValue}>{children}</AppRootContext.Provider>;
};

View File

@ -0,0 +1,50 @@
import { ClientState } from "@dannadori/voice-changer-client-js";
import React, { useContext, useEffect, useRef } from "react";
import { ReactNode } from "react";
import { useVCClient, VCClientState } from "../001_globalHooks/001_useVCClient";
import { FrontendManagerStateAndMethod, useFrontendManager } from "../001_globalHooks/010_useFrontendManager";
import { useAppRoot } from "./001_AppRootProvider";
type Props = {
children: ReactNode;
};
type AppStateValue = ClientState & {
audioContext: AudioContext
frontendManagerState: FrontendManagerStateAndMethod;
initializedRef: React.MutableRefObject<boolean>
}
const AppStateContext = React.createContext<AppStateValue | null>(null);
export const useAppState = (): AppStateValue => {
const state = useContext(AppStateContext);
if (!state) {
throw new Error("useAppState must be used within AppStateProvider");
}
return state;
};
export const AppStateProvider = ({ children }: Props) => {
const appRoot = useAppRoot()
const clientState = useVCClient({ audioContext: appRoot.audioContextState.audioContext })
const frontendManagerState = useFrontendManager();
const initializedRef = useRef<boolean>(false)
useEffect(() => {
if (clientState.clientState.initialized) {
initializedRef.current = true
}
}, [clientState.clientState.initialized])
const providerValue: AppStateValue = {
audioContext: appRoot.audioContextState.audioContext!,
...clientState.clientState,
frontendManagerState,
initializedRef
};
return <AppStateContext.Provider value={providerValue}>{children}</AppStateContext.Provider>;
};

View File

@ -0,0 +1,56 @@
import React, { createRef, ErrorInfo } from 'react';
type ErrorBoundaryProps = {
children: React.ReactNode;
fallback: React.ReactNode;
onError?: (error: Error, errorInfo: React.ErrorInfo) => void;
}
type ErrorBoundaryState = {
hasError: boolean;
}
class ErrorBoundary extends React.Component<ErrorBoundaryProps, ErrorBoundaryState> {
private eventHandler: () => void
constructor(props: ErrorBoundaryProps) {
super(props);
this.state = { hasError: false };
this.eventHandler = this.updateError.bind(this);
}
static getDerivedStateFromError(_error: Error) {
// console.warn("React Error Boundary Catch", error)
return { hasError: true };
}
componentDidCatch(error: Error, errorInfo: ErrorInfo) {
// For logging
console.warn("React Error Boundary Catch", error, errorInfo)
const { onError } = this.props;
if (onError) {
onError(error, errorInfo);
}
}
// 非同期例外対応
updateError() {
this.setState({ hasError: true });
}
componentDidMount() {
window.addEventListener('unhandledrejection', this.eventHandler)
}
componentWillUnmount() {
window.removeEventListener('unhandledrejection', this.eventHandler)
}
render() {
if (this.state.hasError) {
return this.props.fallback;
}
return this.props.children;
}
}
export default ErrorBoundary;

View File

@ -1,69 +1,46 @@
import * as React from "react"; import * as React from "react";
import { useEffect, useMemo, useState } from "react"; import { useMemo } from "react";
import { AUDIO_ELEMENT_FOR_PLAY_RESULT } from "./const"; import { useModelSettingArea } from "./102_model_setting";
import { useServerSettingArea } from "./101_server_setting"; import { useDeviceSetting } from "./103_device_setting";
import { useDeviceSetting } from "./102_device_setting"; import { useConvertSetting } from "./106_convert_setting";
import { useConvertSetting } from "./104_convert_setting"; import { useAdvancedSetting } from "./107_advanced_setting";
import { useAdvancedSetting } from "./105_advanced_setting"; import { useSpeakerSetting } from "./105_speaker_setting";
import { useSpeakerSetting } from "./103_speaker_setting"; import { useServerControl } from "./101_server_control";
import { useServerControl } from "./106_server_control"; import { useQualityControl } from "./104_qulity_control";
import { useClient } from "@dannadori/voice-changer-client-js";
export const useMicrophoneOptions = () => { export const useMicrophoneOptions = () => {
const [audioContext, setAudioContext] = useState<AudioContext | null>(null) const serverControl = useServerControl()
const modelSetting = useModelSettingArea()
const clientState = useClient({ const deviceSetting = useDeviceSetting()
audioContext: audioContext, const speakerSetting = useSpeakerSetting()
audioOutputElementId: AUDIO_ELEMENT_FOR_PLAY_RESULT const convertSetting = useConvertSetting()
}) const advancedSetting = useAdvancedSetting()
const qualityControl = useQualityControl()
const serverSetting = useServerSettingArea({ clientState })
const deviceSetting = useDeviceSetting(audioContext, { clientState })
const speakerSetting = useSpeakerSetting({ clientState })
const convertSetting = useConvertSetting({ clientState })
const advancedSetting = useAdvancedSetting({ clientState })
const serverControl = useServerControl({ clientState })
const clearSetting = async () => {
await clientState.clearSetting()
}
useEffect(() => {
const createAudioContext = () => {
const ctx = new AudioContext()
setAudioContext(ctx)
document.removeEventListener('touchstart', createAudioContext);
document.removeEventListener('mousedown', createAudioContext);
}
document.addEventListener('touchstart', createAudioContext);
document.addEventListener('mousedown', createAudioContext);
}, [])
const voiceChangerSetting = useMemo(() => { const voiceChangerSetting = useMemo(() => {
return ( return (
<> <>
<div className="body-row left-padding-1">
<div className="body-section-title">Virtual Microphone</div>
</div>
{serverControl.serverControl} {serverControl.serverControl}
{serverSetting.serverSetting} {modelSetting.modelSetting}
{deviceSetting.deviceSetting} {deviceSetting.deviceSetting}
{qualityControl.qualityControl}
{speakerSetting.speakerSetting} {speakerSetting.speakerSetting}
{convertSetting.convertSetting} {convertSetting.convertSetting}
{advancedSetting.advancedSetting} {advancedSetting.advancedSetting}
</> </>
) )
}, [serverControl.serverControl, }, [serverControl.serverControl,
serverSetting.serverSetting, modelSetting.modelSetting,
deviceSetting.deviceSetting, deviceSetting.deviceSetting,
speakerSetting.speakerSetting, speakerSetting.speakerSetting,
convertSetting.convertSetting, convertSetting.convertSetting,
advancedSetting.advancedSetting]) advancedSetting.advancedSetting,
qualityControl.qualityControl])
return { return {
voiceChangerSetting, voiceChangerSetting
clearSetting
} }
} }

View File

@ -0,0 +1,149 @@
import React, { useEffect, useMemo, useState } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
export const useServerControl = () => {
const appState = useAppState()
const [startWithAudioContextCreate, setStartWithAudioContextCreate] = useState<boolean>(false)
const [showPerformanceDetail, setShowPerformanceDetail] = useState<boolean>(false)
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openServerControlCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
useEffect(() => {
if (!startWithAudioContextCreate) {
return
}
appState.frontendManagerState.setIsConverting(true)
appState.clientSetting.start()
}, [startWithAudioContextCreate])
const startButtonRow = useMemo(() => {
const onStartClicked = async () => {
if (!appState.initializedRef.current) {
while (true) {
// console.log("wait 500ms")
await new Promise<void>((resolve) => {
setTimeout(resolve, 500)
})
// console.log("initiliazed", appState.initializedRef.current)
if (appState.initializedRef.current) {
break
}
}
setStartWithAudioContextCreate(true)
} else {
appState.frontendManagerState.setIsConverting(true)
await appState.clientSetting.start()
}
}
const onStopClicked = async () => {
appState.frontendManagerState.setIsConverting(false)
await appState.clientSetting.stop()
}
const startClassName = appState.frontendManagerState.isConverting ? "body-button-active" : "body-button-stanby"
const stopClassName = appState.frontendManagerState.isConverting ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1">Start</div>
<div className="body-button-container">
<div onClick={onStartClicked} className={startClassName}>start</div>
<div onClick={onStopClicked} className={stopClassName}>stop</div>
</div>
<div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [appState.frontendManagerState.isConverting, appState.clientSetting.start, appState.clientSetting.stop])
const performanceRow = useMemo(() => {
const performanceDetailLabel = showPerformanceDetail ? "[pre, main, post] <<" : "more >>"
const performanceData = showPerformanceDetail ? `[${appState.performance.preprocessTime}, ${appState.performance.mainprocessTime},${appState.performance.postprocessTime}]` : ""
return (
<>
<div className="body-row split-3-1-1-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">monitor:</div>
<div className="body-item-text">vol<span className="body-item-text-small">(rms)</span></div>
<div className="body-item-text">buf<span className="body-item-text-small">(ms)</span></div>
<div className="body-item-text">res<span className="body-item-text-small">(ms)</span></div>
<div className="body-item-text">
<span onClick={() => { setShowPerformanceDetail(!showPerformanceDetail) }} >{performanceDetailLabel}</span>
</div>
</div>
<div className="body-row split-3-1-1-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1"></div>
<div className="body-item-text">{appState.volume.toFixed(4)}</div>
<div className="body-item-text">{appState.bufferingTime}</div>
<div className="body-item-text">{appState.performance.responseTime}</div>
<div className="body-item-text">{performanceData}</div>
</div>
</>
)
}, [appState.volume, appState.bufferingTime, appState.performance, showPerformanceDetail])
const infoRow = useMemo(() => {
const onReloadClicked = async () => {
const info = await appState.getInfo()
console.log("info", info)
}
return (
<>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Model Info:</div>
<div className="body-item-text">
<span className="body-item-text-item">{appState.serverSetting.serverSetting.configFile || ""}</span>
<span className="body-item-text-item">{appState.serverSetting.serverSetting.pyTorchModelFile || ""}</span>
<span className="body-item-text-item">{appState.serverSetting.serverSetting.onnxModelFile || ""}</span>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onReloadClicked}>reload</div>
</div>
</div>
</>
)
}, [appState.getInfo, appState.serverSetting.serverSetting])
const serverControl = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openServerControlCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openServerControlCheckbox.updateState(!appState.frontendManagerState.stateControls.openServerControlCheckbox.checked()) }}>
Server Control
</span>
</div>
<div className="partition-content">
{startButtonRow}
{performanceRow}
{infoRow}
</div>
</div>
</>
)
}, [startButtonRow, performanceRow, infoRow])
return {
serverControl,
}
}

View File

@ -1,226 +0,0 @@
import { OnnxExecutionProvider, Framework, fileSelector } from "@dannadori/voice-changer-client-js"
import React, { useState } from "react"
import { useMemo } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
export type UseServerSettingProps = {
clientState: ClientState
}
export type ServerSettingState = {
serverSetting: JSX.Element;
}
export const useServerSettingArea = (props: UseServerSettingProps): ServerSettingState => {
const [showPyTorch, setShowPyTorch] = useState<boolean>(false)
const uploadeModelRow = useMemo(() => {
const onPyTorchFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".pth") == false) {
alert("モデルファイルの拡張子はpthである必要があります。")
return
}
props.clientState.serverSetting.setFileUploadSetting({
...props.clientState.serverSetting.fileUploadSetting,
pyTorchModel: {
file: file
}
})
}
const onPyTorchFileClearClicked = () => {
props.clientState.serverSetting.setFileUploadSetting({
...props.clientState.serverSetting.fileUploadSetting,
pyTorchModel: null
})
}
const onConfigFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".json") == false) {
alert("モデルファイルの拡張子はjsonである必要があります。")
return
}
props.clientState.serverSetting.setFileUploadSetting({
...props.clientState.serverSetting.fileUploadSetting,
configFile: {
file: file
}
})
}
const onConfigFileClearClicked = () => {
props.clientState.serverSetting.setFileUploadSetting({
...props.clientState.serverSetting.fileUploadSetting,
configFile: null
})
}
const onOnnxFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".onnx") == false) {
alert("モデルファイルの拡張子はonnxである必要があります。")
return
}
props.clientState.serverSetting.setFileUploadSetting({
...props.clientState.serverSetting.fileUploadSetting,
onnxModel: {
file: file
}
})
}
const onOnnxFileClearClicked = () => {
props.clientState.serverSetting.setFileUploadSetting({
...props.clientState.serverSetting.fileUploadSetting,
onnxModel: null
})
}
const onModelUploadClicked = async () => {
props.clientState.serverSetting.loadModel()
}
const uploadButtonClassName = props.clientState.serverSetting.isUploading ? "body-button-disabled" : "body-button"
const uploadButtonAction = props.clientState.serverSetting.isUploading ? () => { } : onModelUploadClicked
const uploadButtonLabel = props.clientState.serverSetting.isUploading ? "wait..." : "upload"
const configFilenameText = props.clientState.serverSetting.fileUploadSetting.configFile?.filename || props.clientState.serverSetting.fileUploadSetting.configFile?.file?.name || ""
const onnxModelFilenameText = props.clientState.serverSetting.fileUploadSetting.onnxModel?.filename || props.clientState.serverSetting.fileUploadSetting.onnxModel?.file?.name || ""
const pyTorchFilenameText = props.clientState.serverSetting.fileUploadSetting.pyTorchModel?.filename || props.clientState.serverSetting.fileUploadSetting.pyTorchModel?.file?.name || ""
return (
<>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Model Uploader</div>
<div className="body-item-text">
<div></div>
</div>
<div className="body-item-text">
<div>
<input type="checkbox" checked={showPyTorch} onChange={(e) => {
setShowPyTorch(e.target.checked)
}} /> enable PyTorch
</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Config(.json)</div>
<div className="body-item-text">
<div>{configFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onConfigFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onConfigFileClearClicked}>clear</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Onnx(.onnx)</div>
<div className="body-item-text">
<div>{onnxModelFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onOnnxFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onOnnxFileClearClicked}>clear</div>
</div>
</div>
{showPyTorch ?
(
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">PyTorch(.pth)</div>
<div className="body-item-text">
<div>{pyTorchFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onPyTorchFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onPyTorchFileClearClicked}>clear</div>
</div>
</div>
)
:
(
<></>
)
}
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2"></div>
<div className="body-item-text">
{props.clientState.serverSetting.isUploading ? `uploading.... ${props.clientState.serverSetting.uploadProgress}%` : ""}
</div>
<div className="body-button-container">
<div className={uploadButtonClassName} onClick={uploadButtonAction}>{uploadButtonLabel}</div>
</div>
</div>
</>
)
}, [
props.clientState.serverSetting.fileUploadSetting,
props.clientState.serverSetting.loadModel,
props.clientState.serverSetting.isUploading,
props.clientState.serverSetting.uploadProgress,
showPyTorch])
const frameworkRow = useMemo(() => {
const onFrameworkChanged = async (val: Framework) => {
props.clientState.serverSetting.setFramework(val)
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Framework</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.serverSetting.setting.framework} onChange={(e) => {
onFrameworkChanged(e.target.value as
Framework)
}}>
{
Object.values(Framework).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.serverSetting.setting.framework, props.clientState.serverSetting.setFramework])
const onnxExecutionProviderRow = useMemo(() => {
if (props.clientState.serverSetting.setting.framework != "ONNX") {
return
}
const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => {
props.clientState.serverSetting.setOnnxExecutionProvider(val)
}
return (
<div className="body-row split-3-7 left-padding-1">
<div className="body-item-title left-padding-2">OnnxExecutionProvider</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.serverSetting.setting.onnxExecutionProvider} onChange={(e) => {
onOnnxExecutionProviderChanged(e.target.value as
OnnxExecutionProvider)
}}>
{
Object.values(OnnxExecutionProvider).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.serverSetting.setting.framework, props.clientState.serverSetting.setting.onnxExecutionProvider, props.clientState.serverSetting.setOnnxExecutionProvider])
const serverSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Server Setting</div>
<div className="body-select-container">
</div>
</div>
{uploadeModelRow}
{frameworkRow}
{onnxExecutionProviderRow}
</>
)
}, [uploadeModelRow, frameworkRow, onnxExecutionProviderRow])
return {
serverSetting,
}
}

View File

@ -1,236 +0,0 @@
import { fileSelectorAsDataURL, useIndexedDB } from "@dannadori/voice-changer-client-js"
import React, { useEffect, useMemo, useState } from "react"
import { AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_CONVERTED, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, INDEXEDDB_KEY_AUDIO_OUTPUT } from "./const"
import { ClientState } from "@dannadori/voice-changer-client-js";
const reloadDevices = async () => {
try {
const ms = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
ms.getTracks().forEach(x => { x.stop() })
} catch (e) {
console.warn("Enumerate device error::", e)
}
const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
const audioInputs = mediaDeviceInfos.filter(x => { return x.kind == "audioinput" })
audioInputs.push({
deviceId: "none",
groupId: "none",
kind: "audioinput",
label: "none",
toJSON: () => { }
})
audioInputs.push({
deviceId: "file",
groupId: "file",
kind: "audioinput",
label: "file",
toJSON: () => { }
})
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
return [audioInputs, audioOutputs]
}
export type UseDeviceSettingProps = {
clientState: ClientState
}
export type DeviceSettingState = {
deviceSetting: JSX.Element;
}
export const useDeviceSetting = (audioContext: AudioContext | null, props: UseDeviceSettingProps): DeviceSettingState => {
const [inputAudioDeviceInfo, setInputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [outputAudioDeviceInfo, setOutputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [audioInputForGUI, setAudioInputForGUI] = useState<string>("none")
const [audioOutputForGUI, setAudioOutputForGUI] = useState<string>("none")
const [fileInputEchoback, setFileInputEchoback] = useState<boolean>()//最初のmuteが有効になるように。undefined
const { getItem, setItem } = useIndexedDB()
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
setInputAudioDeviceInfo(audioInfo[0])
setOutputAudioDeviceInfo(audioInfo[1])
}
initialize()
}, [])
useEffect(() => {
if (typeof props.clientState.clientSetting.setting.audioInput == "string") {
if (inputAudioDeviceInfo.find(x => {
// console.log("COMPARE:", x.deviceId, props.clientState.clientSetting.setting.audioInput)
return x.deviceId == props.clientState.clientSetting.setting.audioInput
})) {
setAudioInputForGUI(props.clientState.clientSetting.setting.audioInput)
}
}
}, [inputAudioDeviceInfo, props.clientState.clientSetting.setting.audioInput])
const audioInputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">AudioInput</div>
<div className="body-select-container">
<select className="body-select" value={audioInputForGUI} onChange={(e) => {
setAudioInputForGUI(e.target.value)
}}>
{
inputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
</div>
</div>
)
}, [inputAudioDeviceInfo, audioInputForGUI, props.clientState.clientSetting.setting.audioInput])
useEffect(() => {
if (!audioContext) {
return
}
if (audioInputForGUI == "file") {
// file selector (audioMediaInputRow)
} else {
props.clientState.clientSetting.setAudioInput(audioInputForGUI)
}
}, [audioContext, audioInputForGUI, props.clientState.clientSetting.setAudioInput])
const audioMediaInputRow = useMemo(() => {
if (audioInputForGUI != "file") {
return <></>
}
const onFileLoadClicked = async () => {
const url = await fileSelectorAsDataURL("")
// input stream for client.
const audio = document.getElementById(AUDIO_ELEMENT_FOR_TEST_CONVERTED) as HTMLAudioElement
audio.src = url
await audio.play()
const src = audioContext!.createMediaElementSource(audio);
const dst = audioContext!.createMediaStreamDestination()
src.connect(dst)
props.clientState.clientSetting.setAudioInput(dst.stream)
const audio_echo = document.getElementById(AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) as HTMLAudioElement
audio_echo.srcObject = dst.stream
audio_echo.play()
setFileInputEchoback(false)
// original stream to play.
const audio_org = document.getElementById(AUDIO_ELEMENT_FOR_TEST_ORIGINAL) as HTMLAudioElement
audio_org.src = url
audio_org.pause()
// audio_org.onplay = () => {
// console.log(audioOutputRef.current)
// // @ts-ignore
// audio_org.setSinkId(audioOutputRef.current)
// }
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title"></div>
<div className="body-item-text">
<div style={{ display: "none" }}>
org:<audio id={AUDIO_ELEMENT_FOR_TEST_ORIGINAL} controls></audio>
</div>
<div>
<audio id={AUDIO_ELEMENT_FOR_TEST_CONVERTED} controls></audio>
<audio id={AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK} controls hidden></audio>
</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onFileLoadClicked}>load</div>
<input type="checkbox" checked={fileInputEchoback} onChange={(e) => { setFileInputEchoback(e.target.checked) }} /> echoback
</div>
</div>
)
}, [audioInputForGUI, props.clientState.clientSetting.setAudioInput, fileInputEchoback])
const audioOutputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">AudioOutput</div>
<div className="body-select-container">
<select className="body-select" value={audioOutputForGUI} onChange={(e) => {
setAudioOutputForGUI(e.target.value)
setItem(INDEXEDDB_KEY_AUDIO_OUTPUT, e.target.value)
}}>
{
outputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
<audio hidden id={AUDIO_ELEMENT_FOR_PLAY_RESULT}></audio>
</div>
</div>
)
}, [outputAudioDeviceInfo, audioOutputForGUI])
useEffect(() => {
[AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
if (audio) {
if (audioOutputForGUI == "none") {
// @ts-ignore
audio.setSinkId("")
} else {
// @ts-ignore
audio.setSinkId(audioOutputForGUI)
}
}
})
}, [audioOutputForGUI])
useEffect(() => {
const loadCache = async () => {
const key = await getItem(INDEXEDDB_KEY_AUDIO_OUTPUT)
if (key) {
setAudioOutputForGUI(key as string)
}
}
loadCache()
}, [])
useEffect(() => {
[AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
if (audio) {
audio.volume = fileInputEchoback ? 1 : 0
}
})
}, [fileInputEchoback])
const deviceSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Device Setting</div>
<div className="body-select-container">
</div>
</div>
{audioInputRow}
{audioMediaInputRow}
{audioOutputRow}
</>
)
}, [audioInputRow, audioMediaInputRow, audioOutputRow])
return {
deviceSetting,
}
}

View File

@ -0,0 +1,293 @@
import { OnnxExecutionProvider, Framework, fileSelector, Correspondence } from "@dannadori/voice-changer-client-js"
import React, { useState } from "react"
import { useMemo } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
export type ServerSettingState = {
modelSetting: JSX.Element;
}
export const useModelSettingArea = (): ServerSettingState => {
const appState = useAppState()
const [showPyTorch, setShowPyTorch] = useState<boolean>(true)
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openModelSettingCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
const uploadeModelRow = useMemo(() => {
const onPyTorchFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".pth") == false) {
alert("モデルファイルの拡張子はpthである必要があります。")
return
}
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
pyTorchModel: {
file: file
}
})
}
const onPyTorchFileClearClicked = () => {
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
pyTorchModel: null
})
}
const onConfigFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".json") == false) {
alert("モデルファイルの拡張子はjsonである必要があります。")
return
}
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
configFile: {
file: file
}
})
}
const onConfigFileClearClicked = () => {
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
configFile: null
})
}
const onOnnxFileLoadClicked = async () => {
const file = await fileSelector("")
if (file.name.endsWith(".onnx") == false) {
alert("モデルファイルの拡張子はonnxである必要があります。")
return
}
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
onnxModel: {
file: file
}
})
}
const onOnnxFileClearClicked = () => {
appState.serverSetting.setFileUploadSetting({
...appState.serverSetting.fileUploadSetting,
onnxModel: null
})
}
const onCorrespondenceFileLoadClicked = async () => {
const file = await fileSelector("")
const correspondenceText = await file.text()
const cors = correspondenceText.split("\n").map(line => {
const items = line.split("|")
if (items.length != 3) {
console.warn("Invalid Correspondence Line:", line)
return null
} else {
const cor: Correspondence = {
sid: Number(items[0]),
correspondence: Number(items[1]),
dirname: items[2]
}
return cor
}
}).filter(x => { return x != null }) as Correspondence[]
console.log(cors)
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, correspondences: cors })
}
const onCorrespondenceFileClearClicked = () => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, correspondences: [] })
}
const onModelUploadClicked = async () => {
appState.serverSetting.loadModel()
}
const uploadButtonClassName = appState.serverSetting.isUploading ? "body-button-disabled" : "body-button"
const uploadButtonAction = appState.serverSetting.isUploading ? () => { } : onModelUploadClicked
const uploadButtonLabel = appState.serverSetting.isUploading ? "wait..." : "upload"
const configFilenameText = appState.serverSetting.fileUploadSetting.configFile?.filename || appState.serverSetting.fileUploadSetting.configFile?.file?.name || ""
const onnxModelFilenameText = appState.serverSetting.fileUploadSetting.onnxModel?.filename || appState.serverSetting.fileUploadSetting.onnxModel?.file?.name || ""
const pyTorchFilenameText = appState.serverSetting.fileUploadSetting.pyTorchModel?.filename || appState.serverSetting.fileUploadSetting.pyTorchModel?.file?.name || ""
const correspondenceFileText = appState.clientSetting.clientSetting.correspondences ? JSON.stringify(appState.clientSetting.clientSetting.correspondences.map(x => { return x.dirname })) : ""
const uploadingStatus = appState.serverSetting.isUploading ?
appState.serverSetting.uploadProgress == 0 ? `loading model...(wait about 20sec)` : `uploading.... ${appState.serverSetting.uploadProgress}%` : ""
return (
<>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Model Uploader</div>
<div className="body-item-text">
<div></div>
</div>
<div className="body-item-text">
<div>
<input type="checkbox" checked={showPyTorch} onChange={(e) => {
setShowPyTorch(e.target.checked)
}} /> enable PyTorch
</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Config(.json)</div>
<div className="body-item-text">
<div>{configFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onConfigFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onConfigFileClearClicked}>clear</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Correspondence</div>
<div className="body-item-text">
<div>{correspondenceFileText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onCorrespondenceFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onCorrespondenceFileClearClicked}>clear</div>
</div>
</div>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">Onnx(.onnx)</div>
<div className="body-item-text">
<div>{onnxModelFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onOnnxFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onOnnxFileClearClicked}>clear</div>
</div>
</div>
{showPyTorch ?
(
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">PyTorch(.pth)</div>
<div className="body-item-text">
<div>{pyTorchFilenameText}</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onPyTorchFileLoadClicked}>select</div>
<div className="body-button left-margin-1" onClick={onPyTorchFileClearClicked}>clear</div>
</div>
</div>
)
:
(
<></>
)
}
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2"></div>
<div className="body-item-text">
{uploadingStatus}
</div>
<div className="body-button-container">
<div className={uploadButtonClassName} onClick={uploadButtonAction}>{uploadButtonLabel}</div>
</div>
</div>
</>
)
}, [
appState.serverSetting.fileUploadSetting,
appState.serverSetting.loadModel,
appState.serverSetting.isUploading,
appState.serverSetting.uploadProgress,
appState.clientSetting.clientSetting.correspondences,
appState.serverSetting.updateServerSettings,
appState.serverSetting.setFileUploadSetting,
showPyTorch])
const frameworkRow = useMemo(() => {
const onFrameworkChanged = async (val: Framework) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, framework: val })
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Framework</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.serverSetting.framework} onChange={(e) => {
onFrameworkChanged(e.target.value as
Framework)
}}>
{
Object.values(Framework).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting.framework, appState.serverSetting.updateServerSettings])
const onnxExecutionProviderRow = useMemo(() => {
if (appState.serverSetting.serverSetting.framework != "ONNX") {
return
}
const onOnnxExecutionProviderChanged = async (val: OnnxExecutionProvider) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, onnxExecutionProvider: val })
}
console.log("setting", appState.serverSetting.serverSetting)
return (
<div className="body-row split-3-7 left-padding-1">
<div className="body-item-title left-padding-2">OnnxExecutionProvider</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.serverSetting.onnxExecutionProvider} onChange={(e) => {
onOnnxExecutionProviderChanged(e.target.value as
OnnxExecutionProvider)
}}>
{
Object.values(OnnxExecutionProvider).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting.framework, appState.serverSetting.serverSetting.onnxExecutionProvider, appState.serverSetting.updateServerSettings])
const modelSetting = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openModelSettingCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openModelSettingCheckbox.updateState(!appState.frontendManagerState.stateControls.openModelSettingCheckbox.checked()) }}>
Model Setting
</span>
<span></span>
</div>
<div className="partition-content">
{uploadeModelRow}
{frameworkRow}
{onnxExecutionProviderRow}
</div>
</div>
</>
)
}, [uploadeModelRow, frameworkRow, onnxExecutionProviderRow])
return {
modelSetting,
}
}

View File

@ -0,0 +1,390 @@
import { fileSelectorAsDataURL, useIndexedDB } from "@dannadori/voice-changer-client-js"
import React, { useEffect, useMemo, useRef, useState } from "react"
import { AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_CONVERTED, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, INDEXEDDB_KEY_AUDIO_OUTPUT } from "./const"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
const reloadDevices = async () => {
try {
const ms = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
ms.getTracks().forEach(x => { x.stop() })
} catch (e) {
console.warn("Enumerate device error::", e)
}
const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
const audioInputs = mediaDeviceInfos.filter(x => { return x.kind == "audioinput" })
audioInputs.push({
deviceId: "none",
groupId: "none",
kind: "audioinput",
label: "none",
toJSON: () => { }
})
audioInputs.push({
deviceId: "file",
groupId: "file",
kind: "audioinput",
label: "file",
toJSON: () => { }
})
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
audioOutputs.push({
deviceId: "none",
groupId: "none",
kind: "audiooutput",
label: "none",
toJSON: () => { }
})
// audioOutputs.push({
// deviceId: "record",
// groupId: "record",
// kind: "audiooutput",
// label: "record",
// toJSON: () => { }
// })
return [audioInputs, audioOutputs]
}
export type DeviceSettingState = {
deviceSetting: JSX.Element;
}
export const useDeviceSetting = (): DeviceSettingState => {
const appState = useAppState()
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openDeviceSettingCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
const [inputAudioDeviceInfo, setInputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [outputAudioDeviceInfo, setOutputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [audioInputForGUI, setAudioInputForGUI] = useState<string>("none")
const [audioOutputForGUI, setAudioOutputForGUI] = useState<string>("none")
const [fileInputEchoback, setFileInputEchoback] = useState<boolean>()//最初のmuteが有効になるように。undefined
const { getItem, setItem } = useIndexedDB()
const audioSrcNode = useRef<MediaElementAudioSourceNode>()
const [outputRecordingStarted, setOutputRecordingStarted] = useState<boolean>(false)
const [useServerMicrophone, setUseServerMicrophone] = useState<boolean>(false)
// リスト内の
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
setInputAudioDeviceInfo(audioInfo[0])
setOutputAudioDeviceInfo(audioInfo[1])
// if (useServerMicrophone) {
// try {
// const serverDevices = await appState.serverSetting.getServerDevices()
// setServerInputAudioDeviceInfo(serverDevices.audio_input_devices)
// } catch (e) {
// console.warn(e)
// }
// }
}
initialize()
}, [useServerMicrophone])
// キャッシュの設定は反映(たぶん、設定操作の時も起動していしまう。が問題は起こらないはず)
useEffect(() => {
if (typeof appState.clientSetting.clientSetting.audioInput == "string") {
if (inputAudioDeviceInfo.find(x => {
// console.log("COMPARE:", x.deviceId, appState.clientSetting.setting.audioInput)
return x.deviceId == appState.clientSetting.clientSetting.audioInput
})) {
setAudioInputForGUI(appState.clientSetting.clientSetting.audioInput)
}
}
}, [inputAudioDeviceInfo, appState.clientSetting.clientSetting.audioInput])
const audioInputRow = useMemo(() => {
if (useServerMicrophone) {
return <></>
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">AudioInput</div>
<div className="body-select-container">
<select className="body-select" value={audioInputForGUI} onChange={(e) => {
setAudioInputForGUI(e.target.value)
}}>
{
inputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
</div>
</div>
)
}, [inputAudioDeviceInfo, audioInputForGUI, useServerMicrophone])
useEffect(() => {
if (audioInputForGUI == "file") {
// file selector (audioMediaInputRow)
} else {
if (!useServerMicrophone) {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, audioInput: audioInputForGUI })
} else {
console.log("server mic")
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, audioInput: null })
}
}
}, [appState.audioContext, audioInputForGUI, appState.clientSetting.updateClientSetting])
const audioMediaInputRow = useMemo(() => {
if (audioInputForGUI != "file") {
return <></>
}
const onFileLoadClicked = async () => {
const url = await fileSelectorAsDataURL("")
// input stream for client.
const audio = document.getElementById(AUDIO_ELEMENT_FOR_TEST_CONVERTED) as HTMLAudioElement
audio.pause()
audio.srcObject = null
audio.src = url
await audio.play()
if (!audioSrcNode.current) {
audioSrcNode.current = appState.audioContext!.createMediaElementSource(audio);
}
if (audioSrcNode.current.mediaElement != audio) {
audioSrcNode.current = appState.audioContext!.createMediaElementSource(audio);
}
const dst = appState.audioContext.createMediaStreamDestination()
audioSrcNode.current.connect(dst)
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, audioInput: dst.stream })
const audio_echo = document.getElementById(AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) as HTMLAudioElement
audio_echo.srcObject = dst.stream
audio_echo.play()
audio_echo.volume = 0
setFileInputEchoback(false)
// original stream to play.
const audio_org = document.getElementById(AUDIO_ELEMENT_FOR_TEST_ORIGINAL) as HTMLAudioElement
audio_org.src = url
audio_org.pause()
// audio_org.onplay = () => {
// console.log(audioOutputRef.current)
// // @ts-ignore
// audio_org.setSinkId(audioOutputRef.current)
// }
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title"></div>
<div className="body-item-text">
<div style={{ display: "none" }}>
org:<audio id={AUDIO_ELEMENT_FOR_TEST_ORIGINAL} controls></audio>
</div>
<div>
<audio id={AUDIO_ELEMENT_FOR_TEST_CONVERTED} controls></audio>
<audio id={AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK} controls hidden></audio>
</div>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onFileLoadClicked}>load</div>
<input type="checkbox" checked={fileInputEchoback} onChange={(e) => { setFileInputEchoback(e.target.checked) }} /> echoback
</div>
</div>
)
}, [audioInputForGUI, appState.clientSetting.updateClientSetting, fileInputEchoback])
const audioOutputRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">AudioOutput</div>
<div className="body-select-container">
<select className="body-select" value={audioOutputForGUI} onChange={(e) => {
setAudioOutputForGUI(e.target.value)
setItem(INDEXEDDB_KEY_AUDIO_OUTPUT, e.target.value)
}}>
{
outputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
<audio hidden id={AUDIO_ELEMENT_FOR_PLAY_RESULT}></audio>
</div>
</div>
)
}, [outputAudioDeviceInfo, audioOutputForGUI])
const audioOutputRecordingRow = useMemo(() => {
// if (audioOutputForGUI != "record") {
// return <></>
// }
const onOutputRecordStartClicked = async () => {
setOutputRecordingStarted(true)
await appState.workletNodeSetting.startOutputRecording()
}
const onOutputRecordStopClicked = async () => {
setOutputRecordingStarted(false)
const record = await appState.workletNodeSetting.stopOutputRecording()
downloadRecord(record)
}
const startClassName = outputRecordingStarted ? "body-button-active" : "body-button-stanby"
const stopClassName = outputRecordingStarted ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-2">output record</div>
<div className="body-button-container">
<div onClick={onOutputRecordStartClicked} className={startClassName}>start</div>
<div onClick={onOutputRecordStopClicked} className={stopClassName}>stop</div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [audioOutputForGUI, outputRecordingStarted, appState.workletNodeSetting.startOutputRecording, appState.workletNodeSetting.stopOutputRecording])
useEffect(() => {
[AUDIO_ELEMENT_FOR_PLAY_RESULT, AUDIO_ELEMENT_FOR_TEST_ORIGINAL, AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
if (audio) {
if (audioOutputForGUI == "none") {
// @ts-ignore
audio.setSinkId("")
if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) {
audio.volume = 0
} else {
audio.volume = 0
}
} else {
// @ts-ignore
audio.setSinkId(audioOutputForGUI)
if (x == AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK) {
audio.volume = fileInputEchoback ? 1 : 0
} else {
audio.volume = 1
}
}
}
})
}, [audioOutputForGUI])
useEffect(() => {
const loadCache = async () => {
const key = await getItem(INDEXEDDB_KEY_AUDIO_OUTPUT)
if (key) {
setAudioOutputForGUI(key as string)
}
}
loadCache()
}, [])
useEffect(() => {
[AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK].forEach(x => {
const audio = document.getElementById(x) as HTMLAudioElement
if (audio) {
audio.volume = fileInputEchoback ? 1 : 0
}
})
}, [fileInputEchoback])
const deviceSetting = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openDeviceSettingCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openDeviceSettingCheckbox.updateState(!appState.frontendManagerState.stateControls.openDeviceSettingCheckbox.checked()) }}>
Device Setting
</span>
<span className="belongings">
<input className="belongings-checkbox" type="checkbox" checked={useServerMicrophone} onChange={(e) => {
setUseServerMicrophone(e.target.checked)
}} /> use server mic (Experimental)
</span>
</div>
<div className="partition-content">
{audioInputRow}
{audioMediaInputRow}
{audioOutputRow}
{audioOutputRecordingRow}
</div>
</div>
</>
)
}, [audioInputRow, audioMediaInputRow, audioOutputRow, audioOutputRecordingRow, useServerMicrophone])
const downloadRecord = (data: Float32Array) => {
const writeString = (view: DataView, offset: number, string: string) => {
for (var i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
const floatTo16BitPCM = (output: DataView, offset: number, input: Float32Array) => {
for (var i = 0; i < input.length; i++, offset += 2) {
var s = Math.max(-1, Math.min(1, input[i]));
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
}
};
const buffer = new ArrayBuffer(44 + data.length * 2);
const view = new DataView(buffer);
// https://www.youfit.co.jp/archives/1418
writeString(view, 0, 'RIFF'); // RIFFヘッダ
view.setUint32(4, 32 + data.length * 2, true); // これ以降のファイルサイズ
writeString(view, 8, 'WAVE'); // WAVEヘッダ
writeString(view, 12, 'fmt '); // fmtチャンク
view.setUint32(16, 16, true); // fmtチャンクのバイト数
view.setUint16(20, 1, true); // フォーマットID
view.setUint16(22, 1, true); // チャンネル数
view.setUint32(24, 48000, true); // サンプリングレート
view.setUint32(28, 48000 * 2, true); // データ速度
view.setUint16(32, 2, true); // ブロックサイズ
view.setUint16(34, 16, true); // サンプルあたりのビット数
writeString(view, 36, 'data'); // dataチャンク
view.setUint32(40, data.length * 2, true); // 波形データのバイト数
floatTo16BitPCM(view, 44, data); // 波形データ
const audioBlob = new Blob([view], { type: 'audio/wav' });
const url = URL.createObjectURL(audioBlob);
const a = document.createElement("a");
a.href = url;
a.download = `output.wav`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
return {
deviceSetting,
}
}

View File

@ -1,118 +0,0 @@
import React, { useMemo, useState } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
export type UseSpeakerSettingProps = {
clientState: ClientState
}
export const useSpeakerSetting = (props: UseSpeakerSettingProps) => {
const [editSpeakerTargetId, setEditSpeakerTargetId] = useState<number>(0)
const [editSpeakerTargetName, setEditSpeakerTargetName] = useState<string>("")
const srcIdRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Source Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.serverSetting.setting.srcId} onChange={(e) => {
props.clientState.serverSetting.setSrcId(Number(e.target.value))
}}>
{
props.clientState.clientSetting.setting.speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.speakers, props.clientState.serverSetting.setting.srcId, props.clientState.serverSetting.setSrcId])
const dstIdRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Destination Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.serverSetting.setting.dstId} onChange={(e) => {
props.clientState.serverSetting.setDstId(Number(e.target.value))
}}>
{
props.clientState.clientSetting.setting.speakers.map(x => {
return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.speakers, props.clientState.serverSetting.setting.dstId, props.clientState.serverSetting.setDstId])
const editSpeakerIdMappingRow = useMemo(() => {
const onSetSpeakerMappingClicked = async () => {
const targetId = editSpeakerTargetId
const targetName = editSpeakerTargetName
const targetSpeaker = props.clientState.clientSetting.setting.speakers.find(x => { return x.id == targetId })
if (targetSpeaker) {
if (targetName.length == 0) { // Delete
const newSpeakers = props.clientState.clientSetting.setting.speakers.filter(x => { return x.id != targetId })
props.clientState.clientSetting.setSpeakers(newSpeakers)
} else { // Update
targetSpeaker.name = targetName
props.clientState.clientSetting.setSpeakers([...props.clientState.clientSetting.setting.speakers])
}
} else {
if (targetName.length == 0) { // Noop
} else {// add
props.clientState.clientSetting.setting.speakers.push({
id: targetId,
name: targetName
})
props.clientState.clientSetting.setSpeakers([...props.clientState.clientSetting.setting.speakers])
}
}
}
return (
<div className="body-row split-3-1-2-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Edit Speaker Mapping</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={editSpeakerTargetId} onChange={(e) => {
const id = Number(e.target.value)
setEditSpeakerTargetId(id)
setEditSpeakerTargetName(props.clientState.clientSetting.setting.speakers.find(x => { return x.id == id })?.name || "")
}} />
</div>
<div className="body-input-container">
<input type="text" value={editSpeakerTargetName} onChange={(e) => {
setEditSpeakerTargetName(e.target.value)
}} />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetSpeakerMappingClicked}>set</div>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.speakers, editSpeakerTargetId, editSpeakerTargetName])
const speakerSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Speaker Setting</div>
<div className="body-select-container">
</div>
</div>
{srcIdRow}
{dstIdRow}
{editSpeakerIdMappingRow}
</>
)
}, [srcIdRow, dstIdRow, editSpeakerIdMappingRow])
return {
speakerSetting,
}
}

View File

@ -1,61 +0,0 @@
import React, { useMemo } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
export type UseConvertSettingProps = {
clientState: ClientState
}
export type ConvertSettingState = {
convertSetting: JSX.Element;
}
export const useConvertSetting = (props: UseConvertSettingProps): ConvertSettingState => {
const inputChunkNumRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Input Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={props.clientState.clientSetting.setting.inputChunkNum} onChange={(e) => {
props.clientState.clientSetting.setInputChunkNum(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.clientSetting.setting.inputChunkNum, props.clientState.clientSetting.setInputChunkNum])
const gpuRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">GPU</div>
<div className="body-input-container">
<input type="number" min={-2} max={5} step={1} value={props.clientState.serverSetting.setting.gpu} onChange={(e) => {
props.clientState.serverSetting.setGpu(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.serverSetting.setting.gpu, props.clientState.serverSetting.setGpu])
const convertSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Converter Setting</div>
<div className="body-select-container">
</div>
</div>
{inputChunkNumRow}
{gpuRow}
</>
)
}, [inputChunkNumRow, gpuRow])
return {
convertSetting,
}
}

View File

@ -0,0 +1,313 @@
import { F0Detector } from "@dannadori/voice-changer-client-js"
import React, { useEffect, useMemo, useState } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
export type QualityControlState = {
qualityControl: JSX.Element;
}
const reloadDevices = async () => {
try {
const ms = await navigator.mediaDevices.getUserMedia({ video: false, audio: true });
ms.getTracks().forEach(x => { x.stop() })
} catch (e) {
console.warn("Enumerate device error::", e)
}
const mediaDeviceInfos = await navigator.mediaDevices.enumerateDevices();
const audioOutputs = mediaDeviceInfos.filter(x => { return x.kind == "audiooutput" })
return audioOutputs
}
export const useQualityControl = (): QualityControlState => {
const appState = useAppState()
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openQualityControlCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
const [recording, setRecording] = useState<boolean>(false)
const [outputAudioDeviceInfo, setOutputAudioDeviceInfo] = useState<MediaDeviceInfo[]>([])
const [audioOutputForGUI, setAudioOutputForGUI] = useState<string>("none")
useEffect(() => {
const initialize = async () => {
const audioInfo = await reloadDevices()
setOutputAudioDeviceInfo(audioInfo)
}
initialize()
}, [])
const noiseControlRow = useMemo(() => {
return (
<div className="body-row split-3-2-2-2-1 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Noise Suppression</div>
<div>
<input type="checkbox" checked={appState.clientSetting.clientSetting.echoCancel} onChange={(e) => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, echoCancel: e.target.checked })
}} /> echo cancel
</div>
<div>
<input type="checkbox" checked={appState.clientSetting.clientSetting.noiseSuppression} onChange={(e) => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, noiseSuppression: e.target.checked })
}} /> suppression1
</div>
<div>
<input type="checkbox" checked={appState.clientSetting.clientSetting.noiseSuppression2} onChange={(e) => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, noiseSuppression2: e.target.checked })
}} /> suppression2
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
appState.clientSetting.clientSetting.echoCancel,
appState.clientSetting.clientSetting.noiseSuppression,
appState.clientSetting.clientSetting.noiseSuppression2,
appState.clientSetting.updateClientSetting
])
const gainControlRow = useMemo(() => {
return (
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Gain Control</div>
<div>
<span className="body-item-input-slider-label">in</span>
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={appState.clientSetting.clientSetting.inputGain} onChange={(e) => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, inputGain: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.clientSetting.clientSetting.inputGain}</span>
</div>
<div>
<span className="body-item-input-slider-label">out</span>
<input type="range" className="body-item-input-slider" min="0.0" max="1.0" step="0.1" value={appState.clientSetting.clientSetting.outputGain} onChange={(e) => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, outputGain: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.clientSetting.clientSetting.outputGain}</span>
</div>
<div className="body-button-container">
</div>
</div>
)
}, [
appState.clientSetting.clientSetting.inputGain,
appState.clientSetting.clientSetting.outputGain,
appState.clientSetting.updateClientSetting
])
const f0DetectorRow = useMemo(() => {
const desc = { "harvest": "High Quality", "dio": "Light Weight" }
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">F0 Detector</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.serverSetting.f0Detector} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Detector: e.target.value as F0Detector })
}}>
{
Object.values(F0Detector).map(x => {
//@ts-ignore
return <option key={x} value={x}>{x}({desc[x]})</option>
})
}
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting.f0Detector, appState.serverSetting.updateServerSettings])
const recordIORow = useMemo(() => {
const onRecordStartClicked = async () => {
setRecording(true)
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, recordIO: 1 })
}
const onRecordStopClicked = async () => {
setRecording(false)
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, recordIO: 0 })
// set wav (input)
const wavInput = document.getElementById("body-wav-container-wav-input") as HTMLAudioElement
wavInput.src = "/tmp/in.wav?" + new Date().getTime()
wavInput.controls = true
// @ts-ignore
wavInput.setSinkId(audioOutputForGUI)
// set wav (output)
const wavOutput = document.getElementById("body-wav-container-wav-output") as HTMLAudioElement
wavOutput.src = "/tmp/out.wav?" + new Date().getTime()
wavOutput.controls = true
// @ts-ignore
wavOutput.setSinkId(audioOutputForGUI)
}
const onRecordAnalizeClicked = async () => {
if (appState.frontendManagerState.isConverting) {
alert("please stop voice conversion. 解析処理と音声変換を同時に行うことはできません。音声変化をストップしてください。")
return
}
appState.frontendManagerState.setIsAnalyzing(true)
await appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, recordIO: 2 })
// set spectrogram (dio)
const imageDio = document.getElementById("body-image-container-img-dio") as HTMLImageElement
imageDio.src = "/tmp/analyze-dio.png?" + new Date().getTime()
imageDio.style.width = "100%"
// set spectrogram (harvest)
const imageHarvest = document.getElementById("body-image-container-img-harvest") as HTMLImageElement
imageHarvest.src = "/tmp/analyze-harvest.png?" + new Date().getTime()
imageHarvest.style.width = "100%"
appState.frontendManagerState.setIsAnalyzing(false)
}
const startClassName = recording ? "body-button-active" : "body-button-stanby"
const stopClassName = recording ? "body-button-stanby" : "body-button-active"
const analyzeClassName = appState.frontendManagerState.isAnalyzing ? "body-button-active" : "body-button-stanby"
const analyzeLabel = appState.frontendManagerState.isAnalyzing ? "wait..." : "Analyze"
return (
<>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Analyzer(Experimental)</div>
<div className="body-button-container">
</div>
</div>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-2 ">
Sampling
</div>
<div className="body-button-container">
<div onClick={onRecordStartClicked} className={startClassName}>Start</div>
<div onClick={onRecordStopClicked} className={stopClassName}>Stop</div>
<div onClick={onRecordAnalizeClicked} className={analyzeClassName}>{analyzeLabel}</div>
</div>
</div>
<div className="body-row split-3-2-2-3 left-padding-1 guided">
<div className="body-item-title left-padding-2 ">
<div>
Play
</div>
<select className="body-select-50 left-margin-2" value={audioOutputForGUI} onChange={(e) => {
setAudioOutputForGUI(e.target.value)
const wavInput = document.getElementById("body-wav-container-wav-input") as HTMLAudioElement
const wavOutput = document.getElementById("body-wav-container-wav-output") as HTMLAudioElement
//@ts-ignore
wavInput.setSinkId(e.target.value)
//@ts-ignore
wavOutput.setSinkId(e.target.value)
}}>
{
outputAudioDeviceInfo.map(x => {
return <option key={x.deviceId} value={x.deviceId}>{x.label}</option>
})
}
</select>
</div>
{/* <div>
<div className="body-wav-container">
<div className="body-wav-container-title">Input</div>
<div className="body-wav-container-title">Output</div>
</div>
<div className="body-wav-container">
<div className="body-wav-container-wav">
<audio src="" id="body-wav-container-wav-input"></audio>
</div>
<div className="body-wav-container-wav" >
<audio src="" id="body-wav-container-wav-output"></audio>
</div>
</div>
</div> */}
<div>
<div className="body-wav-container-title">Input</div>
<div className="body-wav-container-wav">
<audio src="" id="body-wav-container-wav-input"></audio>
</div>
</div>
<div >
<div className="body-wav-container-title">Output</div>
<div className="body-wav-container-wav" >
<audio src="" id="body-wav-container-wav-output"></audio>
</div>
</div>
<div></div>
</div>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-2 ">
Spectrogram
</div>
<div>
<div className="body-image-container">
<div className="body-image-container-title">PyWorld Dio</div>
<div className="body-image-container-title">PyWorld Harvest</div>
</div>
<div className="body-image-container">
<div className="body-image-container-img" >
<img src="" alt="" id="body-image-container-img-dio" />
</div>
<div className="body-image-container-img">
<img src="" alt="" id="body-image-container-img-harvest" />
</div>
</div>
</div>
</div>
</>
)
}, [appState.serverSetting.serverSetting.recordIO, appState.serverSetting.updateServerSettings, outputAudioDeviceInfo, audioOutputForGUI, appState.frontendManagerState.isAnalyzing, appState.frontendManagerState.isConverting])
const QualityControlContent = useMemo(() => {
return (
<>
{noiseControlRow}
{gainControlRow}
{f0DetectorRow}
<div className="body-row divider"></div>
{recordIORow}
</>
)
}, [gainControlRow, noiseControlRow, f0DetectorRow, recordIORow])
const qualityControl = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openQualityControlCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openQualityControlCheckbox.updateState(!appState.frontendManagerState.stateControls.openQualityControlCheckbox.checked()) }}>
Quality Control
</span>
</div>
<div className="partition-content">
{QualityControlContent}
</div>
</div>
</>
)
}, [QualityControlContent])
return {
qualityControl,
}
}

View File

@ -1,294 +0,0 @@
import { BufferSize, Protocol, SampleRate, VoiceChangerMode } from "@dannadori/voice-changer-client-js"
import React, { useMemo, useState } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
export type UseAdvancedSettingProps = {
clientState: ClientState
}
export type AdvancedSettingState = {
advancedSetting: JSX.Element;
}
export const useAdvancedSetting = (props: UseAdvancedSettingProps): AdvancedSettingState => {
const [showAdvancedSetting, setShowAdvancedSetting] = useState<boolean>(false)
const mmvcServerUrlRow = useMemo(() => {
const onSetServerClicked = async () => {
const input = document.getElementById("mmvc-server-url") as HTMLInputElement
props.clientState.clientSetting.setServerUrl(input.value)
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">MMVC Server</div>
<div className="body-input-container">
<input type="text" defaultValue={props.clientState.clientSetting.setting.mmvcServerUrl} id="mmvc-server-url" className="body-item-input" />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetServerClicked}>set</div>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.mmvcServerUrl, props.clientState.clientSetting.setServerUrl])
const protocolRow = useMemo(() => {
const onProtocolChanged = async (val: Protocol) => {
props.clientState.clientSetting.setProtocol(val)
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Protocol</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.clientSetting.setting.protocol} onChange={(e) => {
onProtocolChanged(e.target.value as
Protocol)
}}>
{
Object.values(Protocol).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.protocol, props.clientState.clientSetting.setProtocol])
const sampleRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Sample Rate</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.clientSetting.setting.sampleRate} onChange={(e) => {
props.clientState.clientSetting.setSampleRate(Number(e.target.value) as SampleRate)
}}>
{
Object.values(SampleRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.sampleRate, props.clientState.clientSetting.setSampleRate])
const bufferSizeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Buffer Size</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.clientSetting.setting.bufferSize} onChange={(e) => {
props.clientState.clientSetting.setBufferSize(Number(e.target.value) as BufferSize)
}}>
{
Object.values(BufferSize).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.bufferSize, props.clientState.clientSetting.setBufferSize])
const convertChunkNumRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Convert Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={props.clientState.serverSetting.setting.convertChunkNum} onChange={(e) => {
props.clientState.serverSetting.setConvertChunkNum(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.serverSetting.setting.convertChunkNum, props.clientState.serverSetting.setConvertChunkNum])
const minConvertSizeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Min Convert Size(byte)</div>
<div className="body-input-container">
<input type="number" min={0} max={8196} step={8196} value={props.clientState.serverSetting.setting.minConvertSize} onChange={(e) => {
props.clientState.serverSetting.setMinConvertSize(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.serverSetting.setting.minConvertSize, props.clientState.serverSetting.setMinConvertSize])
const crossFadeOverlapRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Overlap Rate</div>
<div className="body-input-container">
<input type="number" min={0.1} max={1} step={0.1} value={props.clientState.serverSetting.setting.crossFadeOverlapRate} onChange={(e) => {
props.clientState.serverSetting.setCrossFadeOverlapRate(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.serverSetting.setting.crossFadeOverlapRate, props.clientState.serverSetting.setCrossFadeOverlapRate])
const crossFadeOffsetRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Offset Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={props.clientState.serverSetting.setting.crossFadeOffsetRate} onChange={(e) => {
props.clientState.serverSetting.setCrossFadeOffsetRate(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.serverSetting.setting.crossFadeOffsetRate, props.clientState.serverSetting.setCrossFadeOffsetRate])
const crossFadeEndRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade End Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={props.clientState.serverSetting.setting.crossFadeEndRate} onChange={(e) => {
props.clientState.serverSetting.setCrossFadeEndRate(Number(e.target.value))
}} />
</div>
</div>
)
}, [props.clientState.serverSetting.setting.crossFadeEndRate, props.clientState.serverSetting.setCrossFadeEndRate])
const vfForceDisableRow = useMemo(() => {
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">VF Disabled</div>
<div>
<input type="checkbox" checked={props.clientState.clientSetting.setting.forceVfDisable} onChange={(e) => {
props.clientState.clientSetting.setVfForceDisabled(e.target.checked)
}} />
</div>
<div className="body-button-container">
</div>
</div>
)
}, [props.clientState.clientSetting.setting.forceVfDisable, props.clientState.clientSetting.setVfForceDisabled])
const voiceChangeModeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">Voice Change Mode</div>
<div className="body-select-container">
<select className="body-select" value={props.clientState.clientSetting.setting.voiceChangerMode} onChange={(e) => {
props.clientState.clientSetting.setVoiceChangerMode(e.target.value as VoiceChangerMode)
}}>
{
Object.values(VoiceChangerMode).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [props.clientState.clientSetting.setting.voiceChangerMode, props.clientState.clientSetting.setVoiceChangerMode])
const workletSettingRow = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Num</div>
<div className="body-input-container">
<input type="number" min={50} max={300} step={1} value={props.clientState.workletSetting.setting.numTrancateTreshold} onChange={(e) => {
props.clientState.workletSetting.setSetting({
...props.clientState.workletSetting.setting,
numTrancateTreshold: Number(e.target.value)
})
}} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Vol</div>
<div className="body-input-container">
<input type="number" min={0.0001} max={0.0009} step={0.0001} value={props.clientState.workletSetting.setting.volTrancateThreshold} onChange={(e) => {
props.clientState.workletSetting.setSetting({
...props.clientState.workletSetting.setting,
volTrancateThreshold: Number(e.target.value)
})
}} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Vol Length</div>
<div className="body-input-container">
<input type="number" min={16} max={128} step={1} value={props.clientState.workletSetting.setting.volTrancateLength} onChange={(e) => {
props.clientState.workletSetting.setSetting({
...props.clientState.workletSetting.setting,
volTrancateLength: Number(e.target.value)
})
}} />
</div>
</div>
</>
)
}, [props.clientState.workletSetting.setting, props.clientState.workletSetting.setSetting])
const advanceSettingContent = useMemo(() => {
if (!showAdvancedSetting) return <></>
return (
<>
<div className="body-row divider"></div>
{mmvcServerUrlRow}
{protocolRow}
<div className="body-row divider"></div>
{sampleRateRow}
{bufferSizeRow}
<div className="body-row divider"></div>
{convertChunkNumRow}
{minConvertSizeRow}
{crossFadeOverlapRateRow}
{crossFadeOffsetRateRow}
{crossFadeEndRateRow}
<div className="body-row divider"></div>
{vfForceDisableRow}
{voiceChangeModeRow}
<div className="body-row divider"></div>
{workletSettingRow}
<div className="body-row divider"></div>
</>
)
}, [showAdvancedSetting, mmvcServerUrlRow, protocolRow, sampleRateRow, bufferSizeRow, convertChunkNumRow, minConvertSizeRow, crossFadeOverlapRateRow, crossFadeOffsetRateRow, crossFadeEndRateRow, vfForceDisableRow, voiceChangeModeRow, workletSettingRow])
const advancedSetting = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Advanced Setting</div>
<div>
<input type="checkbox" checked={showAdvancedSetting} onChange={(e) => {
setShowAdvancedSetting(e.target.checked)
}} /> show
</div>
</div>
{advanceSettingContent}
</>
)
}, [showAdvancedSetting, advanceSettingContent])
return {
advancedSetting,
}
}

View File

@ -0,0 +1,197 @@
import React, { useEffect, useMemo } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
export const useSpeakerSetting = () => {
const appState = useAppState()
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openSpeakerSettingCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
const calcDefaultF0Factor = (srcId: number, dstId: number) => {
const src = appState.clientSetting.clientSetting.correspondences?.find(x => {
return x.sid == srcId
})
const dst = appState.clientSetting.clientSetting.correspondences?.find(x => {
return x.sid == dstId
})
const recommendedF0Factor = dst && src ? dst.correspondence / src.correspondence : 0
return recommendedF0Factor
}
useEffect(() => {
const recF0 = calcDefaultF0Factor(appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId)
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Factor: recF0 })
}, [appState.clientSetting.clientSetting.correspondences])
const srcIdRow = useMemo(() => {
const selected = appState.clientSetting.clientSetting.correspondences?.find(x => {
return x.sid == appState.serverSetting.serverSetting.srcId
})
return (
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Source Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.serverSetting.srcId} onChange={(e) => {
const recF0 = calcDefaultF0Factor(Number(e.target.value), appState.serverSetting.serverSetting.dstId)
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, srcId: Number(e.target.value), f0Factor: recF0 })
}}>
{
appState.clientSetting.clientSetting.correspondences?.map(x => {
return <option key={x.sid} value={x.sid}>{x.dirname}({x.sid})</option>
})
}
</select>
</div>
<div className="body-item-text">
<div>F0: {selected?.correspondence.toFixed(1) || ""}</div>
</div>
<div className="body-item-text"></div>
</div>
)
}, [appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.clientSetting.clientSetting.correspondences, appState.serverSetting.updateServerSettings])
const dstIdRow = useMemo(() => {
const selected = appState.clientSetting.clientSetting.correspondences?.find(x => {
return x.sid == appState.serverSetting.serverSetting.dstId
})
return (
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Destination Speaker Id</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.serverSetting.dstId} onChange={(e) => {
const recF0 = calcDefaultF0Factor(appState.serverSetting.serverSetting.srcId, Number(e.target.value))
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, dstId: Number(e.target.value), f0Factor: recF0 })
}}>
{
// appState.clientSetting.setting.speakers.map(x => {
// return <option key={x.id} value={x.id}>{x.name}({x.id})</option>
// })
appState.clientSetting.clientSetting.correspondences?.map(x => {
return <option key={x.sid} value={x.sid}>{x.dirname}({x.sid})</option>
})
}
</select>
</div>
<div className="body-item-text">
<div>F0: {selected?.correspondence.toFixed(1) || ""}</div>
</div>
<div className="body-item-text"></div>
</div>
)
}, [appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.clientSetting.clientSetting.correspondences, appState.serverSetting.updateServerSettings])
// const editSpeakerIdMappingRow = useMemo(() => {
// const onSetSpeakerMappingClicked = async () => {
// const targetId = editSpeakerTargetId
// const targetName = editSpeakerTargetName
// const targetSpeaker = appState.clientSetting.setting.speakers.find(x => { return x.id == targetId })
// if (targetSpeaker) {
// if (targetName.length == 0) { // Delete
// const newSpeakers = appState.clientSetting.setting.speakers.filter(x => { return x.id != targetId })
// appState.clientSetting.setSpeakers(newSpeakers)
// } else { // Update
// targetSpeaker.name = targetName
// appState.clientSetting.setSpeakers([...appState.clientSetting.setting.speakers])
// }
// } else {
// if (targetName.length == 0) { // Noop
// } else {// add
// appState.clientSetting.setting.speakers.push({
// id: targetId,
// name: targetName
// })
// appState.clientSetting.setSpeakers([...appState.clientSetting.setting.speakers])
// }
// }
// }
// return (
// <div className="body-row split-3-1-2-4 left-padding-1 guided">
// <div className="body-item-title left-padding-1">Edit Speaker Mapping</div>
// <div className="body-input-container">
// <input type="number" min={1} max={256} step={1} value={editSpeakerTargetId} onChange={(e) => {
// const id = Number(e.target.value)
// setEditSpeakerTargetId(id)
// setEditSpeakerTargetName(appState.clientSetting.setting.speakers.find(x => { return x.id == id })?.name || "")
// }} />
// </div>
// <div className="body-input-container">
// <input type="text" value={editSpeakerTargetName} onChange={(e) => {
// setEditSpeakerTargetName(e.target.value)
// }} />
// </div>
// <div className="body-button-container">
// <div className="body-button" onClick={onSetSpeakerMappingClicked}>set</div>
// </div>
// </div>
// )
// }, [appState.clientSetting.setting.speakers, editSpeakerTargetId, editSpeakerTargetName])
const f0FactorRow = useMemo(() => {
const src = appState.clientSetting.clientSetting.correspondences?.find(x => {
return x.sid == appState.serverSetting.serverSetting.srcId
})
const dst = appState.clientSetting.clientSetting.correspondences?.find(x => {
return x.sid == appState.serverSetting.serverSetting.dstId
})
const recommendedF0Factor = dst && src ? dst.correspondence / src.correspondence : 0
return (
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">F0 Factor</div>
<div className="body-input-container">
<input type="range" className="body-item-input-slider" min="0.1" max="5.0" step="0.1" value={appState.serverSetting.serverSetting.f0Factor || 0} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, f0Factor: Number(e.target.value) })
}}></input>
<span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.f0Factor?.toFixed(1) || 0}</span>
</div>
<div className="body-item-text"></div>
<div className="body-item-text">recommend: {recommendedF0Factor.toFixed(1)}</div>
</div>
)
}, [appState.serverSetting.serverSetting.f0Factor, appState.serverSetting.serverSetting.srcId, appState.serverSetting.serverSetting.dstId, appState.clientSetting.clientSetting.correspondences, appState.serverSetting.updateServerSettings])
const speakerSetting = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openSpeakerSettingCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openSpeakerSettingCheckbox.updateState(!appState.frontendManagerState.stateControls.openSpeakerSettingCheckbox.checked()) }}>
Speaker Setting
</span>
</div>
<div className="partition-content">
{srcIdRow}
{dstIdRow}
{f0FactorRow}
</div>
</div>
</>
)
}, [srcIdRow, dstIdRow, f0FactorRow])
return {
speakerSetting,
}
}

View File

@ -0,0 +1,84 @@
import React, { useMemo } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
export type ConvertSettingState = {
convertSetting: JSX.Element;
}
export const useConvertSetting = (): ConvertSettingState => {
const appState = useAppState()
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openConverterSettingCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
const inputChunkNumRow = useMemo(() => {
return (
<div className="body-row split-3-2-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Input Chunk Num(128sample/chunk)</div>
<div className="body-input-container">
<input type="number" min={1} max={256} step={1} value={appState.workletNodeSetting.workletNodeSetting.inputChunkNum} onChange={(e) => {
appState.workletNodeSetting.updateWorkletNodeSetting({ ...appState.workletNodeSetting.workletNodeSetting, inputChunkNum: Number(e.target.value) })
}} />
</div>
<div className="body-item-text">
<div>buff: {(appState.workletNodeSetting.workletNodeSetting.inputChunkNum * 128 * 1000 / 48000).toFixed(1)}ms</div>
</div>
<div className="body-item-text"></div>
</div>
)
}, [appState.workletNodeSetting.workletNodeSetting.inputChunkNum, appState.workletNodeSetting.updateWorkletNodeSetting])
const gpuRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">GPU</div>
<div className="body-input-container">
<input type="number" min={-2} max={5} step={1} value={appState.serverSetting.serverSetting.gpu} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, gpu: Number(e.target.value) })
}} />
</div>
</div>
)
}, [appState.serverSetting.serverSetting.gpu, appState.serverSetting.updateServerSettings])
const convertSetting = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openConverterSettingCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openConverterSettingCheckbox.updateState(!appState.frontendManagerState.stateControls.openConverterSettingCheckbox.checked()) }}>
Converter Setting
</span>
</div>
<div className="partition-content">
{inputChunkNumRow}
{gpuRow}
</div>
</div>
</>
)
}, [inputChunkNumRow, gpuRow])
return {
convertSetting,
}
}

View File

@ -1,109 +0,0 @@
import React, { useMemo, useState } from "react"
import { ClientState } from "@dannadori/voice-changer-client-js";
export type UseServerControlProps = {
clientState: ClientState
}
export const useServerControl = (props: UseServerControlProps) => {
const [isStarted, setIsStarted] = useState<boolean>(false)
const startButtonRow = useMemo(() => {
const onStartClicked = async () => {
setIsStarted(true)
await props.clientState.clientSetting.start()
}
const onStopClicked = async () => {
setIsStarted(false)
console.log("stop click1")
await props.clientState.clientSetting.stop()
console.log("stop click2")
}
const startClassName = isStarted ? "body-button-active" : "body-button-stanby"
const stopClassName = isStarted ? "body-button-stanby" : "body-button-active"
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Start</div>
<div className="body-button-container">
<div onClick={onStartClicked} className={startClassName}>start</div>
<div onClick={onStopClicked} className={stopClassName}>stop</div>
</div>
<div className="body-input-container">
</div>
</div>
)
}, [isStarted, props.clientState.clientSetting.start, props.clientState.clientSetting.stop])
const performanceRow = useMemo(() => {
return (
<>
<div className="body-row split-3-1-1-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">monitor:</div>
<div className="body-item-text">vol<span className="body-item-text-small">(rms)</span></div>
<div className="body-item-text">buf<span className="body-item-text-small">(ms)</span></div>
<div className="body-item-text">res<span className="body-item-text-small">(ms)</span></div>
<div className="body-item-text"></div>
</div>
<div className="body-row split-3-1-1-1-4 left-padding-1 guided">
<div className="body-item-title left-padding-1"></div>
<div className="body-item-text">{props.clientState.volume.toFixed(4)}</div>
<div className="body-item-text">{props.clientState.bufferingTime}</div>
<div className="body-item-text">{props.clientState.responseTime}</div>
<div className="body-item-text"></div>
</div>
</>
)
}, [props.clientState.volume, props.clientState.bufferingTime, props.clientState.responseTime])
const infoRow = useMemo(() => {
const onReloadClicked = async () => {
const info = await props.clientState.getInfo()
console.log("info", info)
}
return (
<>
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">Model Info:</div>
<div className="body-item-text">
<span className="body-item-text-item">{props.clientState.serverSetting.serverInfo?.configFile || ""}</span>
<span className="body-item-text-item">{props.clientState.serverSetting.serverInfo?.pyTorchModelFile || ""}</span>
<span className="body-item-text-item">{props.clientState.serverSetting.serverInfo?.onnxModelFile || ""}</span>
</div>
<div className="body-button-container">
<div className="body-button" onClick={onReloadClicked}>reload</div>
</div>
</div>
</>
)
}, [props.clientState.getInfo, props.clientState.serverSetting.serverInfo])
const serverControl = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1">
<div className="body-sub-section-title">Server Control</div>
<div className="body-select-container">
</div>
</div>
{startButtonRow}
{performanceRow}
{infoRow}
</>
)
}, [startButtonRow, performanceRow, infoRow])
return {
serverControl,
}
}

View File

@ -0,0 +1,266 @@
import { CrossFadeOverlapSize, DownSamplingMode, InputSampleRate, Protocol, SampleRate } from "@dannadori/voice-changer-client-js"
import React, { useMemo } from "react"
import { useAppState } from "./001_provider/001_AppStateProvider";
import { AnimationTypes, HeaderButton, HeaderButtonProps } from "./components/101_HeaderButton";
export type AdvancedSettingState = {
advancedSetting: JSX.Element;
}
export const useAdvancedSetting = (): AdvancedSettingState => {
const appState = useAppState()
const accodionButton = useMemo(() => {
const accodionButtonProps: HeaderButtonProps = {
stateControlCheckbox: appState.frontendManagerState.stateControls.openAdvancedSettingCheckbox,
tooltip: "Open/Close",
onIcon: ["fas", "caret-up"],
offIcon: ["fas", "caret-up"],
animation: AnimationTypes.spinner,
tooltipClass: "tooltip-right",
};
return <HeaderButton {...accodionButtonProps}></HeaderButton>;
}, []);
const mmvcServerUrlRow = useMemo(() => {
const onSetServerClicked = async () => {
const input = document.getElementById("mmvc-server-url") as HTMLInputElement
appState.clientSetting.setServerUrl(input.value)
}
return (
<div className="body-row split-3-3-4 left-padding-1 guided">
<div className="body-item-title left-padding-1">MMVC Server</div>
<div className="body-input-container">
<input type="text" defaultValue={appState.workletNodeSetting.workletNodeSetting.serverUrl} id="mmvc-server-url" className="body-item-input" />
</div>
<div className="body-button-container">
<div className="body-button" onClick={onSetServerClicked}>set</div>
</div>
</div>
)
}, [appState.workletNodeSetting.workletNodeSetting.serverUrl, appState.clientSetting.setServerUrl])
const protocolRow = useMemo(() => {
const onProtocolChanged = async (val: Protocol) => {
appState.workletNodeSetting.updateWorkletNodeSetting({ ...appState.workletNodeSetting.workletNodeSetting, protocol: val })
}
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Protocol</div>
<div className="body-select-container">
<select className="body-select" value={appState.workletNodeSetting.workletNodeSetting.protocol} onChange={(e) => {
onProtocolChanged(e.target.value as
Protocol)
}}>
{
Object.values(Protocol).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.workletNodeSetting.workletNodeSetting.protocol, appState.workletNodeSetting.updateWorkletNodeSetting])
const sampleRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Sample Rate</div>
<div className="body-select-container">
<select className="body-select" value={appState.clientSetting.clientSetting.sampleRate} onChange={(e) => {
appState.clientSetting.updateClientSetting({ ...appState.clientSetting.clientSetting, sampleRate: Number(e.target.value) as SampleRate })
}}>
{
Object.values(SampleRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.clientSetting.clientSetting.sampleRate, appState.clientSetting.updateClientSetting])
const sendingSampleRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Sending Sample Rate</div>
<div className="body-select-container">
<select className="body-select" value={appState.workletNodeSetting.workletNodeSetting.sendingSampleRate} onChange={(e) => {
appState.workletNodeSetting.updateWorkletNodeSetting({ ...appState.workletNodeSetting.workletNodeSetting, sendingSampleRate: Number(e.target.value) as InputSampleRate })
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, inputSampleRate: Number(e.target.value) as InputSampleRate })
}}>
{
Object.values(InputSampleRate).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.workletNodeSetting.workletNodeSetting.sendingSampleRate, appState.workletNodeSetting.updateWorkletNodeSetting, appState.serverSetting.updateServerSettings])
const crossFadeOverlapSizeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Overlap Size</div>
<div className="body-select-container">
<select className="body-select" value={appState.serverSetting.serverSetting.crossFadeOverlapSize} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, crossFadeOverlapSize: Number(e.target.value) as CrossFadeOverlapSize })
}}>
{
Object.values(CrossFadeOverlapSize).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.serverSetting.serverSetting.crossFadeOverlapSize, appState.serverSetting.updateServerSettings])
const crossFadeOffsetRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade Offset Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={appState.serverSetting.serverSetting.crossFadeOffsetRate} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, crossFadeOffsetRate: Number(e.target.value) })
}} />
</div>
</div>
)
}, [appState.serverSetting.serverSetting.crossFadeOffsetRate, appState.serverSetting.updateServerSettings])
const crossFadeEndRateRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Cross Fade End Rate</div>
<div className="body-input-container">
<input type="number" min={0} max={1} step={0.1} value={appState.serverSetting.serverSetting.crossFadeEndRate} onChange={(e) => {
appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, crossFadeEndRate: Number(e.target.value) })
}} />
</div>
</div>
)
}, [appState.serverSetting.serverSetting.crossFadeEndRate, appState.serverSetting.updateServerSettings])
const downSamplingModeRow = useMemo(() => {
return (
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1 ">DownSamplingMode</div>
<div className="body-select-container">
<select className="body-select" value={appState.workletNodeSetting.workletNodeSetting.downSamplingMode} onChange={(e) => {
appState.workletNodeSetting.updateWorkletNodeSetting({ ...appState.workletNodeSetting.workletNodeSetting, downSamplingMode: e.target.value as DownSamplingMode })
}}>
{
Object.values(DownSamplingMode).map(x => {
return <option key={x} value={x}>{x}</option>
})
}
</select>
</div>
</div>
)
}, [appState.workletNodeSetting.workletNodeSetting.downSamplingMode, appState.workletNodeSetting.updateWorkletNodeSetting])
const workletSettingRow = useMemo(() => {
return (
<>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Num</div>
<div className="body-input-container">
<input type="number" min={5} max={300} step={1} value={appState.workletSetting.setting.numTrancateTreshold} onChange={(e) => {
appState.workletSetting.setSetting({
...appState.workletSetting.setting,
numTrancateTreshold: Number(e.target.value)
})
}} />
</div>
</div>
{/* v.1.5.xより Silent skipは廃止 */}
{/* <div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Vol</div>
<div className="body-input-container">
<input type="number" min={0.0001} max={0.0009} step={0.0001} value={appState.workletSetting.setting.volTrancateThreshold} onChange={(e) => {
appState.workletSetting.setSetting({
...appState.workletSetting.setting,
volTrancateThreshold: Number(e.target.value)
})
}} />
</div>
</div>
<div className="body-row split-3-7 left-padding-1 guided">
<div className="body-item-title left-padding-1">Trancate Vol Length</div>
<div className="body-input-container">
<input type="number" min={16} max={128} step={1} value={appState.workletSetting.setting.volTrancateLength} onChange={(e) => {
appState.workletSetting.setSetting({
...appState.workletSetting.setting,
volTrancateLength: Number(e.target.value)
})
}} />
</div>
</div> */}
</>
)
}, [appState.workletSetting.setting, appState.workletSetting.setSetting])
const advanceSettingContent = useMemo(() => {
return (
<>
<div className="body-row divider"></div>
{mmvcServerUrlRow}
{protocolRow}
<div className="body-row divider"></div>
{sampleRateRow}
{sendingSampleRateRow}
<div className="body-row divider"></div>
{crossFadeOverlapSizeRow}
{crossFadeOffsetRateRow}
{crossFadeEndRateRow}
<div className="body-row divider"></div>
{workletSettingRow}
<div className="body-row divider"></div>
{downSamplingModeRow}
</>
)
}, [mmvcServerUrlRow, protocolRow, sampleRateRow, sendingSampleRateRow, crossFadeOverlapSizeRow, crossFadeOffsetRateRow, crossFadeEndRateRow, workletSettingRow, downSamplingModeRow])
const advancedSetting = useMemo(() => {
return (
<>
{appState.frontendManagerState.stateControls.openAdvancedSettingCheckbox.trigger}
<div className="partition">
<div className="partition-header">
<span className="caret">
{accodionButton}
</span>
<span className="title" onClick={() => { appState.frontendManagerState.stateControls.openAdvancedSettingCheckbox.updateState(!appState.frontendManagerState.stateControls.openAdvancedSettingCheckbox.checked()) }}>
Advanced Setting
</span>
</div>
<div className="partition-content">
{advanceSettingContent}
</div>
</div>
</>
)
}, [advanceSettingContent])
return {
advancedSetting,
}
}

View File

@ -0,0 +1,37 @@
import { IconName, IconPrefix } from "@fortawesome/free-regular-svg-icons";
import { FontAwesomeIcon } from "@fortawesome/react-fontawesome";
import React, { useMemo } from "react";
import { StateControlCheckbox } from "../hooks/useStateControlCheckbox";
export const AnimationTypes = {
colored: "colored",
spinner: "spinner",
} as const;
export type AnimationTypes = typeof AnimationTypes[keyof typeof AnimationTypes];
export type HeaderButtonProps = {
stateControlCheckbox: StateControlCheckbox;
tooltip: string;
onIcon: [IconPrefix, IconName];
offIcon: [IconPrefix, IconName];
animation: AnimationTypes;
tooltipClass?: string;
};
export const HeaderButton = (props: HeaderButtonProps) => {
const headerButton = useMemo(() => {
const tooltipClass = props.tooltipClass || "tooltip-bottom";
return (
<div className={`rotate-button-container ${tooltipClass}`} data-tooltip={props.tooltip}>
{props.stateControlCheckbox.trigger}
<label htmlFor={props.stateControlCheckbox.className} className="rotate-lable">
<div className={props.animation}>
<FontAwesomeIcon icon={props.onIcon} className="spin-on" />
<FontAwesomeIcon icon={props.offIcon} className="spin-off" />
</div>
</label>
</div>
);
}, []);
return headerButton;
};

View File

@ -6,4 +6,19 @@ export const AUDIO_ELEMENT_FOR_TEST_CONVERTED = "audio-test-converted"
export const AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK = "audio-test-converted-echoback" export const AUDIO_ELEMENT_FOR_TEST_CONVERTED_ECHOBACK = "audio-test-converted-echoback"
export const INDEXEDDB_KEY_AUDIO_OUTPUT = "INDEXEDDB_KEY_AUDIO_OUTPUT" export const INDEXEDDB_KEY_AUDIO_OUTPUT = "INDEXEDDB_KEY_AUDIO_OUTPUT"
// State Control Checkbox
export const OpenServerControlCheckbox = "open-server-control-checkbox"
export const OpenModelSettingCheckbox = "open-model-setting-checkbox"
export const OpenDeviceSettingCheckbox = "open-device-setting-checkbox"
export const OpenQualityControlCheckbox = "open-quality-control-checkbox"
export const OpenSpeakerSettingCheckbox = "open-speaker-setting-checkbox"
export const OpenConverterSettingCheckbox = "open-converter-setting-checkbox"
export const OpenAdvancedSettingCheckbox = "open-advanced-setting-checkbox"

View File

@ -0,0 +1,70 @@
/* 前提条件 */
.rotate-button-container {
height: var(--header-height);
width: var(--header-height);
position: relative;
}
.rotate-button {
display: none;
}
.rotate-button ~ .rotate-lable {
padding: 2px;
position: absolute;
transition: all 0.3s;
cursor: pointer;
height: var(--header-height);
width: var(--header-height);
}
.rotate-button ~ .rotate-lable > * {
width: 100%;
height: 100%;
float: left;
transition: all 0.3s;
.spin-on {
width: 100%;
height: 100%;
display: none;
}
.spin-off {
width: 100%;
height: 100%;
display: blcok;
}
}
.rotate-button ~ .rotate-lable > .colored {
color: rgba(200, 200, 200, 0.8);
background: rgba(0, 0, 0, 1);
transition: all 0.3s;
.spin-on {
display: none;
}
.spin-off {
display: block;
}
}
.rotate-button:checked ~ .rotate-lable > .colored {
color: rgba(50, 240, 50, 0.8);
background: rgba(60, 60, 60, 1);
transition: all 0.3s;
.spin-on {
display: block;
}
.spin-off {
display: none;
}
}
.rotate-button:checked ~ .rotate-lable > .spinner {
width: 100%;
height: 100%;
transform: rotate(180deg);
transition: all 0.3s;
box-sizing: border-box;
.spin-on {
display: block;
}
.spin-off {
display: none;
}
}

View File

@ -1,6 +1,8 @@
@import url("https://fonts.googleapis.com/css2?family=Chicle&family=Poppins:ital,wght@0,200;0,400;0,600;1,200;1,400;1,600&display=swap"); @import url("https://fonts.googleapis.com/css2?family=Chicle&family=Poppins:ital,wght@0,200;0,400;0,600;1,200;1,400;1,600&display=swap");
@import url("https://fonts.googleapis.com/css2?family=Yusei+Magic&display=swap"); @import url("https://fonts.googleapis.com/css2?family=Yusei+Magic&display=swap");
@import "./101_RotatedButton.css";
@import "./Error.css";
:root { :root {
--text-color: #333; --text-color: #333;
--company-color1: rgba(64, 119, 187, 1); --company-color1: rgba(64, 119, 187, 1);
@ -11,7 +13,7 @@
--company-color3-alpha: rgba(255, 255, 255, 0.3); --company-color3-alpha: rgba(255, 255, 255, 0.3);
--global-shadow-color: rgba(0, 0, 0, 0.4); --global-shadow-color: rgba(0, 0, 0, 0.4);
--sidebar-transition-time: 0.3s; --sidebar-transition-time: 0.2s;
--sidebar-transition-time-quick: 0.1s; --sidebar-transition-time-quick: 0.1s;
--sidebar-transition-animation: ease-in-out; --sidebar-transition-animation: ease-in-out;
@ -52,6 +54,14 @@ body {
height: 100%; height: 100%;
width: 100%; width: 100%;
} }
.first-gesture {
background: rgba(200, 0, 0, 0.2);
width: 100%;
height: 100%;
position: absolute;
}
/* Main + Section Partition*/
.main-body { .main-body {
height: 100%; height: 100%;
width: 100%; width: 100%;
@ -60,9 +70,72 @@ body {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
font-size: 1rem; font-size: 1rem;
user-select: none;
/* Title */
.top-title {
.title {
font-size: 3rem;
}
.top-title-version {
margin-left: 2rem;
font-size: 1.2rem;
background: linear-gradient(transparent 60%, yellow 30%);
}
.belongings {
margin-left: 1rem;
margin-right: 1rem;
.link {
margin-left: 1rem;
}
}
}
/* Partition */
.partition {
width: 100%;
.partition-header {
font-weight: 700;
color: rgb(71, 69, 69);
display: flex;
.caret {
width: 2rem;
}
.title {
font-size: 1.1rem;
}
.belongings {
font-weight: 400;
font-size: 0.8rem;
display: flex;
flex-direction: row;
align-items: flex-end;
margin-left: 10px;
.belongings-checkbox {
margin-bottom: 3px;
}
}
}
.partition-content {
position: static;
overflow-y: hidden;
}
.row-split {
}
}
} }
.body-row { .state-control-checkbox:checked + .partition .partition-content {
max-height: 700px;
background: rgba(255, 255, 255, 0.3);
transition: all var(--sidebar-transition-time) var(--sidebar-transition-animation);
} }
.state-control-checkbox + .partition .partition-content {
max-height: 0px;
background: rgba(233, 233, 255, 0.3);
transition: all var(--sidebar-transition-time) var(--sidebar-transition-animation);
}
/* ROW */
.split-6-4 { .split-6-4 {
display: flex; display: flex;
width: 100%; width: 100%;
@ -107,6 +180,21 @@ body {
} }
} }
.split-2-8 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 20%;
}
& > div:nth-child(2) {
left: 20%;
width: 80%;
}
}
.split-3-3-4 { .split-3-3-4 {
display: flex; display: flex;
width: 100%; width: 100%;
@ -125,6 +213,87 @@ body {
width: 40%; width: 40%;
} }
} }
.split-2-5-3 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 20%;
}
& > div:nth-child(2) {
left: 20%;
width: 50%;
}
& > div:nth-child(3) {
left: 70%;
width: 30%;
}
}
.split-4-4-2 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 40%;
}
& > div:nth-child(2) {
left: 40%;
width: 40%;
}
& > div:nth-child(3) {
left: 80%;
width: 20%;
}
}
.split-3-2-2-3 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 30%;
}
& > div:nth-child(2) {
left: 30%;
width: 20%;
}
& > div:nth-child(3) {
left: 50%;
width: 20%;
}
& > div:nth-child(4) {
left: 70%;
width: 30%;
}
}
.split-3-2-3-2 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 30%;
}
& > div:nth-child(2) {
left: 30%;
width: 20%;
}
& > div:nth-child(3) {
left: 50%;
width: 30%;
}
& > div:nth-child(4) {
left: 80%;
width: 20%;
}
}
.split-3-1-2-4 { .split-3-1-2-4 {
display: flex; display: flex;
width: 100%; width: 100%;
@ -147,6 +316,28 @@ body {
width: 40%; width: 40%;
} }
} }
.split-3-2-1-4 {
display: flex;
width: 100%;
justify-content: center;
margin: 1px 0px 1px 0px;
& > div:nth-child(1) {
left: 0px;
width: 30%;
}
& > div:nth-child(2) {
left: 30%;
width: 20%;
}
& > div:nth-child(3) {
left: 50%;
width: 10%;
}
& > div:nth-child(4) {
left: 60%;
width: 40%;
}
}
.split-3-2-2-2-1 { .split-3-2-2-2-1 {
display: flex; display: flex;
width: 100%; width: 100%;
@ -224,32 +415,11 @@ body {
} }
.divider { .divider {
height: 4px; height: 0.8rem;
/* background-color: rgba(16, 210, 113, 0.1); */ /* background-color: rgba(16, 210, 113, 0.1); */
background-color: rgba(31, 42, 36, 0.1); background-color: rgba(31, 42, 36, 0.1);
} }
.body-top-title {
font-size: 3rem;
}
.body-top-title-belongings {
display: flex;
align-items: flex-end;
justify-content: flex-start;
& > div {
margin-left: 10px;
margin-right: 10px;
}
& > .belonging-item {
& > .link {
text-decoration: none;
& > span {
font-size: small;
}
}
}
}
.body-section-title { .body-section-title {
font-size: 1.5rem; font-size: 1.5rem;
color: rgb(51, 49, 49); color: rgb(51, 49, 49);
@ -262,9 +432,13 @@ body {
.body-item-title { .body-item-title {
color: rgb(51, 99, 49); color: rgb(51, 99, 49);
display: flex;
} }
.body-item-text { .body-item-text {
color: rgb(30, 30, 30); color: rgb(30, 30, 30);
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
.body-item-text-item { .body-item-text-item {
padding-left: 1rem; padding-left: 1rem;
} }
@ -273,8 +447,20 @@ body {
color: rgb(30, 30, 30); color: rgb(30, 30, 30);
font-size: 0.7rem; font-size: 0.7rem;
} }
.body-input-container {
display: flex;
}
.body-item-input { .body-item-input {
width: 90%; width: 60%;
}
.body-item-input-slider {
width: 60%;
}
.body-item-input-slider-label {
margin-right: 1rem;
}
.body-item-input-slider-val {
margin-left: 1rem;
} }
.body-button-container { .body-button-container {
@ -321,8 +507,32 @@ body {
} }
} }
} }
.body-select-container { .body-select {
.body-select { color: rgb(30, 30, 30);
color: rgb(30, 30, 30); max-width: 100%;
}
.body-select-50 {
color: rgb(30, 30, 30);
max-width: 50%;
height: 1.5rem;
}
.body-image-container,
.body-wav-container {
display: flex;
width: 100%;
.body-image-container-title,
.body-wav-container-title {
width: 20%;
}
.body-image-container-img,
.body-wav-container-wav {
width: 80%;
} }
} }
.donate-img {
border-radius: 35px;
height: 1.5rem;
}

View File

@ -0,0 +1,30 @@
.error-container {
margin: 2rem;
.top-error-message {
color: #44a;
font-size: 2rem;
font-weight: 100;
}
.top-error-description {
color: #444;
font-size: 1rem;
font-weight: 100;
}
.error-detail {
margin-top: 2rem;
padding: 1rem;
border: 1px solid;
.error-name {
font-weight: 700;
}
.error-message {
margin-top: 0.5rem;
}
.error-info-container {
margin-top: 0.5rem;
font-size: 0.8rem;
.error-info-line {
}
}
}
}

View File

@ -0,0 +1,100 @@
import React, { useMemo, useRef } from "react";
import { useEffect } from "react";
export type StateControlCheckbox = {
trigger: JSX.Element;
updateState: (newVal: boolean) => void;
checked: () => boolean
className: string;
};
export const useStateControlCheckbox = (className: string, changeCallback?: (newVal: boolean) => void): StateControlCheckbox => {
const currentValForTriggerCallbackRef = useRef<boolean>(false);
// (4) トリガチェックボックス
const callback = useMemo(() => {
// console.log("generate callback function", className);
return (newVal: boolean) => {
if (!changeCallback) {
return;
}
// 値が同じときはスルー (== 初期値(undefined)か、値が違ったのみ発火)
if (currentValForTriggerCallbackRef.current === newVal) {
return;
}
// 初期値(undefined)か、値が違ったのみ発火
currentValForTriggerCallbackRef.current = newVal;
changeCallback(currentValForTriggerCallbackRef.current);
};
}, []);
const trigger = useMemo(() => {
if (changeCallback) {
return (
<input
type="checkbox"
className={`${className} state-control-checkbox rotate-button`}
id={`${className}`}
onChange={(e) => {
callback(e.target.checked);
}}
/>
);
} else {
return <input type="checkbox" className={`${className} state-control-checkbox rotate-button`} id={`${className}`} />;
}
}, []);
const checked = useMemo(() => {
return () => {
const checkboxes = document.querySelectorAll(`.${className}`);
if (checkboxes.length == 0) {
return false
}
const box = checkboxes[0] as HTMLInputElement
return box.checked
}
}, []);
useEffect(() => {
const checkboxes = document.querySelectorAll(`.${className}`);
// (1) On/Off同期
checkboxes.forEach((x) => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
x.onchange = (ev) => {
updateState(ev.target.checked);
};
});
// (2) 全エレメントoff
const removers = document.querySelectorAll(`.${className}-remover`);
removers.forEach((x) => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
x.onclick = (ev) => {
if (ev.target.className.indexOf(`${className}-remover`) > 0) {
updateState(false);
}
};
});
}, []);
// (3) ステート変更
const updateState = useMemo(() => {
return (newVal: boolean) => {
const currentCheckboxes = document.querySelectorAll(`.${className}`);
currentCheckboxes.forEach((y) => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
y.checked = newVal;
});
if (changeCallback) {
callback(newVal);
}
};
}, []);
return {
trigger,
updateState,
checked,
className,
};
};

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
{ {
"name": "@dannadori/voice-changer-client-js", "name": "@dannadori/voice-changer-client-js",
"version": "1.0.66", "version": "1.0.79",
"description": "", "description": "",
"main": "dist/index.js", "main": "dist/index.js",
"directories": { "directories": {
@ -27,32 +27,30 @@
"license": "ISC", "license": "ISC",
"devDependencies": { "devDependencies": {
"@types/audioworklet": "^0.0.36", "@types/audioworklet": "^0.0.36",
"@types/node": "^18.11.18", "@types/node": "^18.14.0",
"@types/react": "18.0.27", "@types/react": "18.0.28",
"@types/react-dom": "18.0.10", "@types/react-dom": "18.0.11",
"eslint": "^8.32.0", "eslint": "^8.34.0",
"eslint-config-prettier": "^8.6.0", "eslint-config-prettier": "^8.6.0",
"eslint-plugin-prettier": "^4.2.1", "eslint-plugin-prettier": "^4.2.1",
"eslint-plugin-react": "^7.32.1", "eslint-plugin-react": "^7.32.2",
"eslint-webpack-plugin": "^3.2.0", "eslint-webpack-plugin": "^4.0.0",
"npm-run-all": "^4.1.5", "npm-run-all": "^4.1.5",
"prettier": "^2.8.3", "prettier": "^2.8.4",
"raw-loader": "^4.0.2", "raw-loader": "^4.0.2",
"rimraf": "^4.1.2", "rimraf": "^4.1.2",
"ts-loader": "^9.4.2", "ts-loader": "^9.4.2",
"typescript": "^4.9.4", "typescript": "^4.9.5",
"webpack": "^5.75.0", "webpack": "^5.75.0",
"webpack-cli": "^5.0.1", "webpack-cli": "^5.0.1",
"webpack-dev-server": "^4.11.1" "webpack-dev-server": "^4.11.1"
}, },
"dependencies": { "dependencies": {
"@types/readable-stream": "^2.3.15", "@types/readable-stream": "^2.3.15",
"amazon-chime-sdk-js": "^3.10.0", "amazon-chime-sdk-js": "^3.11.0",
"localforage": "^1.10.0", "localforage": "^1.10.0",
"microphone-stream": "^6.0.1",
"react": "^18.2.0", "react": "^18.2.0",
"react-dom": "^18.2.0", "react-dom": "^18.2.0",
"readable-stream": "^4.3.0", "socket.io-client": "^4.6.0"
"socket.io-client": "^4.5.4"
} }
} }

View File

@ -1,8 +1,15 @@
export declare const RequestType: { export declare const RequestType: {
readonly voice: "voice"; readonly voice: "voice";
readonly config: "config"; readonly config: "config";
readonly start: "start";
readonly stop: "stop";
}; };
export type RequestType = typeof RequestType[keyof typeof RequestType]; export type RequestType = typeof RequestType[keyof typeof RequestType];
export declare const ResponseType: {
readonly volume: "volume";
readonly inputData: "inputData";
};
export type ResponseType = typeof ResponseType[keyof typeof ResponseType];
export type VoiceChangerWorkletProcessorRequest = { export type VoiceChangerWorkletProcessorRequest = {
requestType: RequestType; requestType: RequestType;
voice: ArrayBuffer; voice: ArrayBuffer;
@ -10,3 +17,9 @@ export type VoiceChangerWorkletProcessorRequest = {
volTrancateThreshold: number; volTrancateThreshold: number;
volTrancateLength: number; volTrancateLength: number;
}; };
export type VoiceChangerWorkletProcessorResponse = {
responseType: ResponseType;
volume?: number;
recordData?: Float32Array[];
inputData?: Float32Array;
};

View File

@ -1,292 +0,0 @@
import { io, Socket } from "socket.io-client";
import { DefaultEventsMap } from "@socket.io/component-emitter";
import { Duplex, DuplexOptions } from "readable-stream";
import { Protocol, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION } from "./const";
export type Callbacks = {
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer) => void
}
export type AudioStreamerListeners = {
notifySendBufferingTime: (time: number) => void
notifyResponseTime: (time: number) => void
notifyException: (code: VOICE_CHANGER_CLIENT_EXCEPTION, message: string) => void
}
export type AudioStreamerSettings = {
serverUrl: string;
protocol: Protocol;
inputChunkNum: number;
voiceChangerMode: VoiceChangerMode;
}
export class AudioStreamer extends Duplex {
private callbacks: Callbacks
private audioStreamerListeners: AudioStreamerListeners
private protocol: Protocol = "sio"
private serverUrl = ""
private socket: Socket<DefaultEventsMap, DefaultEventsMap> | null = null
private voiceChangerMode: VoiceChangerMode = "realtime"
private inputChunkNum = 128
private requestChunks: ArrayBuffer[] = []
private recordChunks: ArrayBuffer[] = []
private isRecording = false
// performance monitor
private bufferStart = 0;
constructor(callbacks: Callbacks, audioStreamerListeners: AudioStreamerListeners, options?: DuplexOptions) {
super(options);
this.callbacks = callbacks
this.audioStreamerListeners = audioStreamerListeners
}
private createSocketIO = () => {
if (this.socket) {
this.socket.close()
}
if (this.protocol === "sio") {
this.socket = io(this.serverUrl + "/test");
this.socket.on('connect_error', (err) => {
this.audioStreamerListeners.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_CONNECT_FAILED, `[SIO] rconnection failed ${err}`)
})
this.socket.on('connect', () => console.log(`[SIO] sonnect to ${this.serverUrl}`));
this.socket.on('response', (response: any[]) => {
const cur = Date.now()
const responseTime = cur - response[0]
const result = response[1] as ArrayBuffer
if (result.byteLength < 128 * 2) {
this.audioStreamerListeners.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_INVALID_RESPONSE, `[SIO] recevied data is too short ${result.byteLength}`)
} else {
this.callbacks.onVoiceReceived(this.voiceChangerMode, response[1])
this.audioStreamerListeners.notifyResponseTime(responseTime)
}
});
}
}
// Option Change
setServerUrl = (serverUrl: string) => {
this.serverUrl = serverUrl
console.log(`[AudioStreamer] Server Setting:${this.serverUrl} ${this.protocol}`)
this.createSocketIO()// mode check is done in the method.
}
setProtocol = (mode: Protocol) => {
this.protocol = mode
console.log(`[AudioStreamer] Server Setting:${this.serverUrl} ${this.protocol}`)
this.createSocketIO()// mode check is done in the method.
}
setInputChunkNum = (num: number) => {
this.inputChunkNum = num
}
setVoiceChangerMode = (val: VoiceChangerMode) => {
this.voiceChangerMode = val
}
getSettings = (): AudioStreamerSettings => {
return {
serverUrl: this.serverUrl,
protocol: this.protocol,
inputChunkNum: this.inputChunkNum,
voiceChangerMode: this.voiceChangerMode
}
}
// Main Process
//// Pipe from mic stream
_write = (chunk: AudioBuffer, _encoding: any, callback: any) => {
const buffer = chunk.getChannelData(0);
// console.log("SAMPLERATE:", chunk.sampleRate, chunk.numberOfChannels, chunk.length, buffer)
if (this.voiceChangerMode === "realtime") {
this._write_realtime(buffer)
} else {
this._write_record(buffer)
}
callback();
}
private _write_realtime = (buffer: Float32Array) => {
// bufferSize個のデータ48Khzが入ってくる。
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
//// バイトサイズは周波数変換で(x1/2), 16bit(2byte)で(x2)
const arrayBuffer = new ArrayBuffer((buffer.length / 2) * 2)
const dataView = new DataView(arrayBuffer);
for (let i = 0; i < buffer.length; i++) {
if (i % 2 == 0) {
let s = Math.max(-1, Math.min(1, buffer[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF
// 2分の1個目で2バイトずつ進むので((i/2)*2)
dataView.setInt16((i / 2) * 2, s, true);
}
}
// 256byte(最低バッファサイズ256から間引いた個数x2byte)をchunkとして管理
const chunkByteSize = 256 // (const.ts ★1)
for (let i = 0; i < arrayBuffer.byteLength / chunkByteSize; i++) {
const ab = arrayBuffer.slice(i * chunkByteSize, (i + 1) * chunkByteSize)
this.requestChunks.push(ab)
}
//// リクエストバッファの中身が、リクエスト送信数と違う場合は処理終了。
if (this.requestChunks.length < this.inputChunkNum) {
return
}
// リクエスト用の入れ物を作成
const windowByteLength = this.requestChunks.reduce((prev, cur) => {
return prev + cur.byteLength
}, 0)
const newBuffer = new Uint8Array(windowByteLength);
// リクエストのデータをセット
this.requestChunks.reduce((prev, cur) => {
newBuffer.set(new Uint8Array(cur), prev)
return prev + cur.byteLength
}, 0)
// console.log("send buff length", newBuffer.length)
this.sendBuffer(newBuffer)
this.requestChunks = []
this.audioStreamerListeners.notifySendBufferingTime(Date.now() - this.bufferStart)
this.bufferStart = Date.now()
}
private _write_record = (buffer: Float32Array) => {
if (!this.isRecording) { return }
// buffer(for48Khz)x16bit * chunksize / 2(for24Khz)
const sendBuffer = new ArrayBuffer(buffer.length * 2 / 2);
const sendDataView = new DataView(sendBuffer);
for (var i = 0; i < buffer.length; i++) {
if (i % 2 == 0) {
let s = Math.max(-1, Math.min(1, buffer[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF
sendDataView.setInt16(i, s, true);
// if (i % 3000 === 0) {
// console.log("buffer_converting", s, buffer[i])
// }
}
}
this.recordChunks.push(sendBuffer)
}
// Near Realtime用のトリガ
sendRecordedData = () => {
const length = this.recordChunks.reduce((prev, cur) => {
return prev + cur.byteLength
}, 0)
const newBuffer = new Uint8Array(length);
this.recordChunks.reduce((prev, cur) => {
newBuffer.set(new Uint8Array(cur), prev)
return prev + cur.byteLength
}, 0)
this.sendBuffer(newBuffer)
}
startRecord = () => {
this.recordChunks = []
this.isRecording = true
}
stopRecord = () => {
this.isRecording = false
}
private sendBuffer = async (newBuffer: Uint8Array) => {
// if (this.serverUrl.length == 0) {
// // console.warn("no server url")
// // return
// // throw "no server url"
// }
const timestamp = Date.now()
// console.log("REQUEST_MESSAGE:", [this.gpu, this.srcId, this.dstId, timestamp, newBuffer.buffer])
// console.log("SERVER_URL", this.serverUrl, this.protocol)
// const convertChunkNum = this.voiceChangerMode === "realtime" ? this.requestParamas.convertChunkNum : 0
if (this.protocol === "sio") {
if (!this.socket) {
console.warn(`sio is not initialized`)
return
}
// console.log("emit!")
this.socket.emit('request_message', [
// this.requestParamas.gpu,
// this.requestParamas.srcId,
// this.requestParamas.dstId,
timestamp,
// convertChunkNum,
// this.requestParamas.crossFadeLowerValue,
// this.requestParamas.crossFadeOffsetRate,
// this.requestParamas.crossFadeEndRate,
newBuffer.buffer]);
} else {
const res = await postVoice(
this.serverUrl + "/test",
// this.requestParamas.gpu,
// this.requestParamas.srcId,
// this.requestParamas.dstId,
timestamp,
// convertChunkNum,
// this.requestParamas.crossFadeLowerValue,
// this.requestParamas.crossFadeOffsetRate,
// this.requestParamas.crossFadeEndRate,
newBuffer.buffer)
if (res.byteLength < 128 * 2) {
this.audioStreamerListeners.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_REST_INVALID_RESPONSE, `[REST] recevied data is too short ${res.byteLength}`)
} else {
this.callbacks.onVoiceReceived(this.voiceChangerMode, res)
this.audioStreamerListeners.notifyResponseTime(Date.now() - timestamp)
}
}
}
}
export const postVoice = async (
url: string,
// gpu: number,
// srcId: number,
// dstId: number,
timestamp: number,
// convertChunkNum: number,
// crossFadeLowerValue: number,
// crossFadeOffsetRate: number,
// crossFadeEndRate: number,
buffer: ArrayBuffer) => {
const obj = {
// gpu,
// srcId,
// dstId,
timestamp,
// convertChunkNum,
// crossFadeLowerValue,
// crossFadeOffsetRate,
// crossFadeEndRate,
buffer: Buffer.from(buffer).toString('base64')
};
const body = JSON.stringify(obj);
const res = await fetch(`${url}`, {
method: "POST",
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json'
},
body: body
})
const receivedJson = await res.json()
const changedVoiceBase64 = receivedJson["changedVoiceBase64"]
const buf = Buffer.from(changedVoiceBase64, "base64")
const ab = new ArrayBuffer(buf.length);
// console.log("RECIV", buf.length)
const view = new Uint8Array(ab);
for (let i = 0; i < buf.length; ++i) {
view[i] = buf[i];
}
return ab
}

View File

@ -7,6 +7,12 @@ type FileChunk = {
} }
export class ServerConfigurator { export class ServerConfigurator {
private serverUrl = "" private serverUrl = ""
setServerUrl = (serverUrl: string) => {
this.serverUrl = serverUrl
console.log(`[ServerConfigurator] Server URL: ${this.serverUrl}`)
}
getSettings = async () => { getSettings = async () => {
const url = this.serverUrl + "/info" const url = this.serverUrl + "/info"
const info = await new Promise<ServerInfo>((resolve) => { const info = await new Promise<ServerInfo>((resolve) => {
@ -21,11 +27,6 @@ export class ServerConfigurator {
return info return info
} }
setServerUrl = (serverUrl: string) => {
this.serverUrl = serverUrl
console.log(`[ServerConfigurator] Server URL: ${this.serverUrl}`)
}
updateSettings = async (key: ServerSettingKey, val: string) => { updateSettings = async (key: ServerSettingKey, val: string) => {
const url = this.serverUrl + "/update_setteings" const url = this.serverUrl + "/update_setteings"
const info = await new Promise<ServerInfo>(async (resolve) => { const info = await new Promise<ServerInfo>(async (resolve) => {
@ -124,4 +125,5 @@ export class ServerConfigurator {
}) })
return await info return await info
} }
} }

View File

@ -1,17 +1,14 @@
import { VoiceChangerWorkletNode, VolumeListener } from "./VoiceChangerWorkletNode"; import { VoiceChangerWorkletNode, VoiceChangerWorkletListener } from "./VoiceChangerWorkletNode";
// @ts-ignore // @ts-ignore
import workerjs from "raw-loader!../worklet/dist/index.js"; import workerjs from "raw-loader!../worklet/dist/index.js";
import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js"; import { VoiceFocusDeviceTransformer, VoiceFocusTransformDevice } from "amazon-chime-sdk-js";
import { createDummyMediaStream, validateUrl } from "./util"; import { createDummyMediaStream, validateUrl } from "./util";
import { BufferSize, DefaultVoiceChangerClientSetting, Protocol, ServerSettingKey, VoiceChangerMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletSetting } from "./const"; import { DefaultVoiceChangerClientSetting, ServerSettingKey, VoiceChangerClientSetting, WorkletNodeSetting, WorkletSetting } from "./const";
import MicrophoneStream from "microphone-stream";
import { AudioStreamer, Callbacks, AudioStreamerListeners } from "./AudioStreamer";
import { ServerConfigurator } from "./ServerConfigurator"; import { ServerConfigurator } from "./ServerConfigurator";
import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor";
// オーディオデータの流れ // オーディオデータの流れ
// input node(mic or MediaStream) -> [vf node] -> microphne stream -> audio streamer -> // input node(mic or MediaStream) -> [vf node] -> [vc node] ->
// sio/rest server -> audio streamer-> vc node -> output node // sio/rest server -> [vc node] -> output node
import { BlockingQueue } from "./utils/BlockingQueue"; import { BlockingQueue } from "./utils/BlockingQueue";
@ -24,57 +21,22 @@ export class VoiceChangerClient {
private currentMediaStream: MediaStream | null = null private currentMediaStream: MediaStream | null = null
private currentMediaStreamAudioSourceNode: MediaStreamAudioSourceNode | null = null private currentMediaStreamAudioSourceNode: MediaStreamAudioSourceNode | null = null
private outputNodeFromVF: MediaStreamAudioDestinationNode | null = null private inputGainNode: GainNode | null = null
private micStream: MicrophoneStream | null = null private outputGainNode: GainNode | null = null
private audioStreamer!: AudioStreamer
private vcNode!: VoiceChangerWorkletNode private vcNode!: VoiceChangerWorkletNode
private currentMediaStreamAudioDestinationNode!: MediaStreamAudioDestinationNode private currentMediaStreamAudioDestinationNode!: MediaStreamAudioDestinationNode
private promiseForInitialize: Promise<void> private promiseForInitialize: Promise<void>
private _isVoiceChanging = false private _isVoiceChanging = false
private setting: VoiceChangerClientSetting = DefaultVoiceChangerClientSetting
private sslCertified: string[] = [] private sslCertified: string[] = []
private sem = new BlockingQueue<number>(); private sem = new BlockingQueue<number>();
private callbacks: Callbacks = { constructor(ctx: AudioContext, vfEnable: boolean, voiceChangerWorkletListener: VoiceChangerWorkletListener) {
onVoiceReceived: (voiceChangerMode: VoiceChangerMode, data: ArrayBuffer): void => {
// console.log(voiceChangerMode, data)
if (voiceChangerMode === "realtime") {
const req: VoiceChangerWorkletProcessorRequest = {
requestType: "voice",
voice: data,
numTrancateTreshold: 0,
volTrancateThreshold: 0,
volTrancateLength: 0
}
this.vcNode.postReceivedVoice(req)
return
}
// For Near Realtime Mode
console.log("near realtime mode")
const i16Data = new Int16Array(data)
const f32Data = new Float32Array(i16Data.length)
// https://stackoverflow.com/questions/35234551/javascript-converting-from-int16-to-float32
i16Data.forEach((x, i) => {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float
})
const source = this.ctx.createBufferSource();
const buffer = this.ctx.createBuffer(1, f32Data.length, 24000);
buffer.getChannelData(0).set(f32Data);
source.buffer = buffer;
source.start();
source.connect(this.currentMediaStreamAudioDestinationNode)
}
}
constructor(ctx: AudioContext, vfEnable: boolean, audioStreamerListeners: AudioStreamerListeners, volumeListener: VolumeListener) {
this.sem.enqueue(0); this.sem.enqueue(0);
this.configurator = new ServerConfigurator() this.configurator = new ServerConfigurator()
this.ctx = ctx this.ctx = ctx
@ -83,19 +45,17 @@ export class VoiceChangerClient {
const scriptUrl = URL.createObjectURL(new Blob([workerjs], { type: "text/javascript" })); const scriptUrl = URL.createObjectURL(new Blob([workerjs], { type: "text/javascript" }));
await this.ctx.audioWorklet.addModule(scriptUrl) await this.ctx.audioWorklet.addModule(scriptUrl)
this.vcNode = new VoiceChangerWorkletNode(this.ctx, volumeListener); // vc node this.vcNode = new VoiceChangerWorkletNode(this.ctx, voiceChangerWorkletListener); // vc node
this.currentMediaStreamAudioDestinationNode = this.ctx.createMediaStreamDestination() // output node this.currentMediaStreamAudioDestinationNode = this.ctx.createMediaStreamDestination() // output node
this.vcNode.connect(this.currentMediaStreamAudioDestinationNode) // vc node -> output node this.outputGainNode = this.ctx.createGain()
// (vc nodeにはaudio streamerのcallbackでデータが投げ込まれる) this.outputGainNode.gain.value = this.setting.outputGain
this.audioStreamer = new AudioStreamer(this.callbacks, audioStreamerListeners, { objectMode: true, }) this.vcNode.connect(this.outputGainNode) // vc node -> output node
this.audioStreamer.setInputChunkNum(DefaultVoiceChangerClientSetting.inputChunkNum) this.outputGainNode.connect(this.currentMediaStreamAudioDestinationNode)
this.audioStreamer.setVoiceChangerMode(DefaultVoiceChangerClientSetting.voiceChangerMode)
if (this.vfEnable) { if (this.vfEnable) {
this.vf = await VoiceFocusDeviceTransformer.create({ variant: 'c20' }) this.vf = await VoiceFocusDeviceTransformer.create({ variant: 'c20' })
const dummyMediaStream = createDummyMediaStream(this.ctx) const dummyMediaStream = createDummyMediaStream(this.ctx)
this.currentDevice = (await this.vf.createTransformDevice(dummyMediaStream)) || null; this.currentDevice = (await this.vf.createTransformDevice(dummyMediaStream)) || null;
this.outputNodeFromVF = this.ctx.createMediaStreamDestination();
} }
resolve() resolve()
}) })
@ -109,7 +69,6 @@ export class VoiceChangerClient {
this.sem.enqueue(num + 1); this.sem.enqueue(num + 1);
}; };
isInitialized = async () => { isInitialized = async () => {
if (this.promiseForInitialize) { if (this.promiseForInitialize) {
await this.promiseForInitialize await this.promiseForInitialize
@ -117,9 +76,14 @@ export class VoiceChangerClient {
return true return true
} }
// forceVfDisable is for the condition that vf is enabled in constructor. /////////////////////////////////////////////////////
setup = async (input: string | MediaStream, bufferSize: BufferSize, forceVfDisable: boolean = false) => { // オペレーション
/////////////////////////////////////////////////////
/// Operations ///
setup = async () => {
const lockNum = await this.lock() const lockNum = await this.lock()
console.log(`Input Setup=> echo: ${this.setting.echoCancel}, noise1: ${this.setting.noiseSuppression}, noise2: ${this.setting.noiseSuppression2}`)
// condition check // condition check
if (!this.vcNode) { if (!this.vcNode) {
console.warn("vc node is not initialized.") console.warn("vc node is not initialized.")
@ -132,43 +96,58 @@ export class VoiceChangerClient {
this.currentMediaStream.getTracks().forEach(x => { x.stop() }) this.currentMediaStream.getTracks().forEach(x => { x.stop() })
this.currentMediaStream = null this.currentMediaStream = null
} }
if (typeof input == "string") {
this.currentMediaStream = await navigator.mediaDevices.getUserMedia({ //// Input デバイスがnullの時はmicStreamを止めてリターン
audio: { deviceId: input } if (!this.setting.audioInput) {
}) console.log(`Input Setup=> client mic is disabled.`)
} else { this.vcNode.stop()
this.currentMediaStream = input await this.unlock(lockNum)
return
} }
// create mic stream if (typeof this.setting.audioInput == "string") {
if (this.micStream) { this.currentMediaStream = await navigator.mediaDevices.getUserMedia({
this.micStream.unpipe() audio: {
this.micStream.destroy() deviceId: this.setting.audioInput,
this.micStream = null channelCount: 1,
sampleRate: this.setting.sampleRate,
sampleSize: 16,
autoGainControl: false,
echoCancellation: this.setting.echoCancel,
noiseSuppression: this.setting.noiseSuppression
}
})
// this.currentMediaStream.getAudioTracks().forEach((x) => {
// console.log("MIC Setting(cap)", x.getCapabilities())
// console.log("MIC Setting(const)", x.getConstraints())
// console.log("MIC Setting(setting)", x.getSettings())
// })
} else {
this.currentMediaStream = this.setting.audioInput
} }
this.micStream = new MicrophoneStream({
objectMode: true,
bufferSize: bufferSize,
context: this.ctx
})
// connect nodes. // connect nodes.
if (this.currentDevice && forceVfDisable == false) { this.currentMediaStreamAudioSourceNode = this.ctx.createMediaStreamSource(this.currentMediaStream)
this.currentMediaStreamAudioSourceNode = this.ctx.createMediaStreamSource(this.currentMediaStream) // input node this.inputGainNode = this.ctx.createGain()
this.inputGainNode.gain.value = this.setting.inputGain
this.currentMediaStreamAudioSourceNode.connect(this.inputGainNode)
if (this.currentDevice && this.setting.noiseSuppression2) {
this.currentDevice.chooseNewInnerDevice(this.currentMediaStream) this.currentDevice.chooseNewInnerDevice(this.currentMediaStream)
const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node const voiceFocusNode = await this.currentDevice.createAudioNode(this.ctx); // vf node
this.currentMediaStreamAudioSourceNode.connect(voiceFocusNode.start) // input node -> vf node this.inputGainNode.connect(voiceFocusNode.start) // input node -> vf node
voiceFocusNode.end.connect(this.outputNodeFromVF!) voiceFocusNode.end.connect(this.vcNode)
this.micStream.setStream(this.outputNodeFromVF!.stream) // vf node -> mic stream
} else { } else {
console.log("VF disabled") // console.log("input___ media stream", this.currentMediaStream)
this.micStream.setStream(this.currentMediaStream) // input device -> mic stream // this.currentMediaStream.getTracks().forEach(x => {
} // console.log("input___ media stream set", x.getSettings())
this.micStream.pipe(this.audioStreamer) // mic stream -> audio streamer // console.log("input___ media stream con", x.getConstraints())
if (!this._isVoiceChanging) { // console.log("input___ media stream cap", x.getCapabilities())
this.micStream.pauseRecording() // })
} else { // console.log("input___ media node", this.currentMediaStreamAudioSourceNode)
this.micStream.playRecording() // console.log("input___ gain node", this.inputGainNode.channelCount, this.inputGainNode)
this.inputGainNode.connect(this.vcNode)
} }
console.log("Input Setup=> success")
await this.unlock(lockNum) await this.unlock(lockNum)
} }
get stream(): MediaStream { get stream(): MediaStream {
@ -176,22 +155,21 @@ export class VoiceChangerClient {
} }
start = () => { start = () => {
if (!this.micStream) { this.vcNode.start()
throw `Exception:${VOICE_CHANGER_CLIENT_EXCEPTION.ERR_MIC_STREAM_NOT_INITIALIZED}`
return
}
this.micStream.playRecording()
this._isVoiceChanging = true this._isVoiceChanging = true
} }
stop = () => { stop = () => {
if (!this.micStream) { return } this.vcNode.stop()
this.micStream.pauseRecording()
this._isVoiceChanging = false this._isVoiceChanging = false
} }
get isVoiceChanging(): boolean { get isVoiceChanging(): boolean {
return this._isVoiceChanging return this._isVoiceChanging
} }
// Audio Streamer Settingg
////////////////////////
/// 設定
//////////////////////////////
setServerUrl = (serverUrl: string, openTab: boolean = false) => { setServerUrl = (serverUrl: string, openTab: boolean = false) => {
const url = validateUrl(serverUrl) const url = validateUrl(serverUrl)
const pageUrl = `${location.protocol}//${location.host}` const pageUrl = `${location.protocol}//${location.host}`
@ -207,35 +185,58 @@ export class VoiceChangerClient {
} }
} }
} }
this.audioStreamer.setServerUrl(url) this.vcNode.updateSetting({ ...this.vcNode.getSettings(), serverUrl: url })
this.configurator.setServerUrl(url) this.configurator.setServerUrl(url)
} }
setProtocol = (mode: Protocol) => { updateClientSetting = (setting: VoiceChangerClientSetting) => {
this.audioStreamer.setProtocol(mode) console.log(`[VoiceChangerClient] Updating Client Setting,`, this.setting, setting)
} let reconstructInputRequired = false
if (
setInputChunkNum = (num: number) => { this.setting.audioInput != setting.audioInput ||
this.audioStreamer.setInputChunkNum(num) this.setting.echoCancel != setting.echoCancel ||
} this.setting.noiseSuppression != setting.noiseSuppression ||
this.setting.noiseSuppression2 != setting.noiseSuppression2 ||
setVoiceChangerMode = (val: VoiceChangerMode) => { this.setting.sampleRate != setting.sampleRate
this.audioStreamer.setVoiceChangerMode(val) ) {
} reconstructInputRequired = true
}
// configure worklet
configureWorklet = (setting: WorkletSetting) => { if (this.setting.inputGain != setting.inputGain) {
const req: VoiceChangerWorkletProcessorRequest = { this.setInputGain(setting.inputGain)
requestType: "config", }
voice: new ArrayBuffer(1), if (this.setting.outputGain != setting.outputGain) {
numTrancateTreshold: setting.numTrancateTreshold, this.setOutputGain(setting.outputGain)
volTrancateThreshold: setting.volTrancateThreshold, }
volTrancateLength: setting.volTrancateLength
this.setting = setting
if (reconstructInputRequired) {
this.setup()
} }
this.vcNode.postReceivedVoice(req)
} }
// Configurator Method setInputGain = (val: number) => {
this.setting.inputGain = val
if (!this.inputGainNode) {
return
}
this.inputGainNode.gain.value = val
}
setOutputGain = (val: number) => {
if (!this.outputGainNode) {
return
}
this.outputGainNode.gain.value = val
}
/////////////////////////////////////////////////////
// コンポーネント設定、操作
/////////////////////////////////////////////////////
//## Server ##//
updateServerSettings = (key: ServerSettingKey, val: string) => {
return this.configurator.updateSettings(key, val)
}
uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => { uploadFile = (buf: ArrayBuffer, filename: string, onprogress: (progress: number, end: boolean) => void) => {
return this.configurator.uploadFile(buf, filename, onprogress) return this.configurator.uploadFile(buf, filename, onprogress)
} }
@ -245,18 +246,38 @@ export class VoiceChangerClient {
loadModel = (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null) => { loadModel = (configFilename: string, pyTorchModelFilename: string | null, onnxModelFilename: string | null) => {
return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename) return this.configurator.loadModel(configFilename, pyTorchModelFilename, onnxModelFilename)
} }
updateServerSettings = (key: ServerSettingKey, val: string) => {
return this.configurator.updateSettings(key, val) //## Worklet ##//
configureWorklet = (setting: WorkletSetting) => {
this.vcNode.configure(setting)
}
startOutputRecording = () => {
this.vcNode.startOutputRecording()
}
stopOutputRecording = () => {
return this.vcNode.stopOutputRecording()
} }
//## Worklet Node ##//
updateWorkletNodeSetting = (setting: WorkletNodeSetting) => {
this.vcNode.updateSetting(setting)
}
/////////////////////////////////////////////////////
// 情報取得
/////////////////////////////////////////////////////
// Information // Information
getClientSettings = () => { getClientSettings = () => {
return this.audioStreamer.getSettings() return this.vcNode.getSettings()
} }
getServerSettings = () => { getServerSettings = () => {
return this.configurator.getSettings() return this.configurator.getSettings()
} }
getSocketId = () => {
return this.vcNode.getSocketId()
}
} }

View File

@ -1,26 +1,332 @@
import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor"; import { VoiceChangerWorkletProcessorRequest } from "./@types/voice-changer-worklet-processor";
import { DefaultWorkletNodeSetting, DownSamplingMode, VOICE_CHANGER_CLIENT_EXCEPTION, WorkletNodeSetting, WorkletSetting } from "./const";
import { io, Socket } from "socket.io-client";
import { DefaultEventsMap } from "@socket.io/component-emitter";
export type VolumeListener = { export type VoiceChangerWorkletListener = {
notifyVolume: (vol: number) => void notifyVolume: (vol: number) => void
notifySendBufferingTime: (time: number) => void
notifyResponseTime: (time: number, perf?: number[]) => void
notifyException: (code: VOICE_CHANGER_CLIENT_EXCEPTION, message: string) => void
} }
export class VoiceChangerWorkletNode extends AudioWorkletNode { export class VoiceChangerWorkletNode extends AudioWorkletNode {
private listener: VolumeListener private listener: VoiceChangerWorkletListener
constructor(context: AudioContext, listener: VolumeListener) {
private setting: WorkletNodeSetting = DefaultWorkletNodeSetting
private requestChunks: ArrayBuffer[] = []
private socket: Socket<DefaultEventsMap, DefaultEventsMap> | null = null
// performance monitor
private bufferStart = 0;
private isOutputRecording = false;
private recordingOutputChunk: Float32Array[] = []
constructor(context: AudioContext, listener: VoiceChangerWorkletListener) {
super(context, "voice-changer-worklet-processor"); super(context, "voice-changer-worklet-processor");
this.port.onmessage = this.handleMessage.bind(this); this.port.onmessage = this.handleMessage.bind(this);
this.listener = listener this.listener = listener
this.createSocketIO()
console.log(`[worklet_node][voice-changer-worklet-processor] created.`); console.log(`[worklet_node][voice-changer-worklet-processor] created.`);
} }
postReceivedVoice = (req: VoiceChangerWorkletProcessorRequest) => { // 設定
this.port.postMessage({ updateSetting = (setting: WorkletNodeSetting) => {
request: req console.log(`[WorkletNode] Updating WorkletNode Setting,`, this.setting, setting)
}, [req.voice]); let recreateSocketIoRequired = false
if (this.setting.serverUrl != setting.serverUrl || this.setting.protocol != setting.protocol) {
recreateSocketIoRequired = true
}
this.setting = setting
if (recreateSocketIoRequired) {
this.createSocketIO()
}
}
getSettings = (): WorkletNodeSetting => {
return this.setting
}
getSocketId = () => {
return this.socket?.id
}
// 処理
private createSocketIO = () => {
if (this.socket) {
this.socket.close()
}
if (this.setting.protocol === "sio") {
this.socket = io(this.setting.serverUrl + "/test");
this.socket.on('connect_error', (err) => {
this.listener.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_CONNECT_FAILED, `[SIO] rconnection failed ${err}`)
})
this.socket.on('connect', () => {
console.log(`[SIO] sonnect to ${this.setting.serverUrl}`)
console.log(`[SIO] ${this.socket?.id}`)
});
this.socket.on('response', (response: any[]) => {
const cur = Date.now()
const responseTime = cur - response[0]
const result = response[1] as ArrayBuffer
const perf = response[2]
if (result.byteLength < 128 * 2) {
this.listener.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_SIO_INVALID_RESPONSE, `[SIO] recevied data is too short ${result.byteLength}`)
} else {
this.postReceivedVoice(response[1])
this.listener.notifyResponseTime(responseTime, perf)
}
});
}
}
private postReceivedVoice = (data: ArrayBuffer) => {
// Int16 to Float
const i16Data = new Int16Array(data)
const f32Data = new Float32Array(i16Data.length)
// console.log(`[worklet] f32DataLength${f32Data.length} i16DataLength${i16Data.length}`)
i16Data.forEach((x, i) => {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float
})
// アップサンプリング
let upSampledBuffer: Float32Array | null = null
if (this.setting.sendingSampleRate == 48000) {
upSampledBuffer = f32Data
} else {
upSampledBuffer = new Float32Array(f32Data.length * 2)
for (let i = 0; i < f32Data.length; i++) {
const currentFrame = f32Data[i]
const nextFrame = i + 1 < f32Data.length ? f32Data[i + 1] : f32Data[i]
upSampledBuffer[i * 2] = currentFrame
upSampledBuffer[i * 2 + 1] = (currentFrame + nextFrame) / 2
}
}
const req: VoiceChangerWorkletProcessorRequest = {
requestType: "voice",
voice: upSampledBuffer,
numTrancateTreshold: 0,
volTrancateThreshold: 0,
volTrancateLength: 0
}
this.port.postMessage(req)
if (this.isOutputRecording) {
this.recordingOutputChunk.push(upSampledBuffer)
}
}
private _averageDownsampleBuffer(buffer: Float32Array, originalSampleRate: number, destinationSamplerate: number) {
if (originalSampleRate == destinationSamplerate) {
return buffer;
}
if (destinationSamplerate > originalSampleRate) {
throw "downsampling rate show be smaller than original sample rate";
}
const sampleRateRatio = originalSampleRate / destinationSamplerate;
const newLength = Math.round(buffer.length / sampleRateRatio);
const result = new Float32Array(newLength);
let offsetResult = 0;
let offsetBuffer = 0;
while (offsetResult < result.length) {
var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
// Use average value of skipped samples
var accum = 0, count = 0;
for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
accum += buffer[i];
count++;
}
result[offsetResult] = accum / count;
// Or you can simply get rid of the skipped samples:
// result[offsetResult] = buffer[nextOffsetBuffer];
offsetResult++;
offsetBuffer = nextOffsetBuffer;
}
return result;
} }
handleMessage(event: any) { handleMessage(event: any) {
// console.log(`[Node:handleMessage_] `, event.data.volume); // console.log(`[Node:handleMessage_] `, event.data.volume);
this.listener.notifyVolume(event.data.volume as number) if (event.data.responseType === "volume") {
this.listener.notifyVolume(event.data.volume as number)
} else if (event.data.responseType === "inputData") {
const inputData = event.data.inputData as Float32Array
// console.log("receive input data", inputData)
// ダウンサンプリング
let downsampledBuffer: Float32Array | null = null
if (this.setting.sendingSampleRate == 48000) {
downsampledBuffer = inputData
} else if (this.setting.downSamplingMode == DownSamplingMode.decimate) {
//////// (Kind 1) 間引き //////////
//// 48000Hz で入ってくるので間引いて24000Hzに変換する。
downsampledBuffer = new Float32Array(inputData.length / 2);
for (let i = 0; i < inputData.length; i++) {
if (i % 2 == 0) {
downsampledBuffer[i / 2] = inputData[i]
}
}
} else {
//////// (Kind 2) 平均 //////////
// downsampledBuffer = this._averageDownsampleBuffer(buffer, 48000, 24000)
downsampledBuffer = this._averageDownsampleBuffer(inputData, 48000, this.setting.sendingSampleRate)
}
// Float to Int16
const arrayBuffer = new ArrayBuffer(downsampledBuffer.length * 2)
const dataView = new DataView(arrayBuffer);
for (let i = 0; i < downsampledBuffer.length; i++) {
let s = Math.max(-1, Math.min(1, downsampledBuffer[i]));
s = s < 0 ? s * 0x8000 : s * 0x7FFF
dataView.setInt16(i * 2, s, true);
}
// バッファリング
this.requestChunks.push(arrayBuffer)
//// リクエストバッファの中身が、リクエスト送信数と違う場合は処理終了。
if (this.requestChunks.length < this.setting.inputChunkNum) {
return
}
// リクエスト用の入れ物を作成
const windowByteLength = this.requestChunks.reduce((prev, cur) => {
return prev + cur.byteLength
}, 0)
const newBuffer = new Uint8Array(windowByteLength);
// リクエストのデータをセット
this.requestChunks.reduce((prev, cur) => {
newBuffer.set(new Uint8Array(cur), prev)
return prev + cur.byteLength
}, 0)
this.sendBuffer(newBuffer)
this.requestChunks = []
this.listener.notifySendBufferingTime(Date.now() - this.bufferStart)
this.bufferStart = Date.now()
} else {
console.warn(`[worklet_node][voice-changer-worklet-processor] unknown response ${event.data.responseType}`, event.data)
}
} }
private sendBuffer = async (newBuffer: Uint8Array) => {
const timestamp = Date.now()
if (this.setting.protocol === "sio") {
if (!this.socket) {
console.warn(`sio is not initialized`)
return
}
// console.log("emit!")
this.socket.emit('request_message', [
timestamp,
newBuffer.buffer]);
} else {
const res = await postVoice(
this.setting.serverUrl + "/test",
timestamp,
newBuffer.buffer)
if (res.byteLength < 128 * 2) {
this.listener.notifyException(VOICE_CHANGER_CLIENT_EXCEPTION.ERR_REST_INVALID_RESPONSE, `[REST] recevied data is too short ${res.byteLength}`)
} else {
this.postReceivedVoice(res)
this.listener.notifyResponseTime(Date.now() - timestamp)
}
}
}
configure = (setting: WorkletSetting) => {
const req: VoiceChangerWorkletProcessorRequest = {
requestType: "config",
voice: new ArrayBuffer(1),
numTrancateTreshold: setting.numTrancateTreshold,
volTrancateThreshold: setting.volTrancateThreshold,
volTrancateLength: setting.volTrancateLength
}
this.port.postMessage(req)
}
start = () => {
const req: VoiceChangerWorkletProcessorRequest = {
requestType: "start",
voice: new ArrayBuffer(1),
numTrancateTreshold: 0,
volTrancateThreshold: 0,
volTrancateLength: 0
}
this.port.postMessage(req)
}
stop = () => {
const req: VoiceChangerWorkletProcessorRequest = {
requestType: "stop",
voice: new ArrayBuffer(1),
numTrancateTreshold: 0,
volTrancateThreshold: 0,
volTrancateLength: 0
}
this.port.postMessage(req)
}
startOutputRecording = () => {
this.recordingOutputChunk = []
this.isOutputRecording = true
}
stopOutputRecording = () => {
this.isOutputRecording = false
const dataSize = this.recordingOutputChunk.reduce((prev, cur) => {
return prev + cur.length
}, 0)
const samples = new Float32Array(dataSize);
let sampleIndex = 0
for (let i = 0; i < this.recordingOutputChunk.length; i++) {
for (let j = 0; j < this.recordingOutputChunk[i].length; j++) {
samples[sampleIndex] = this.recordingOutputChunk[i][j];
sampleIndex++;
}
}
return samples
}
}
export const postVoice = async (
url: string,
timestamp: number,
buffer: ArrayBuffer) => {
const obj = {
timestamp,
buffer: Buffer.from(buffer).toString('base64')
};
const body = JSON.stringify(obj);
const res = await fetch(`${url}`, {
method: "POST",
headers: {
'Accept': 'application/json',
'Content-Type': 'application/json'
},
body: body
})
const receivedJson = await res.json()
const changedVoiceBase64 = receivedJson["changedVoiceBase64"]
const buf = Buffer.from(changedVoiceBase64, "base64")
const ab = new ArrayBuffer(buf.length);
const view = new Uint8Array(ab);
for (let i = 0; i < buf.length; ++i) {
view[i] = buf[i];
}
return ab
} }

View File

@ -4,97 +4,22 @@
// 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec // 24000sample -> 1sec, 128sample(1chunk) -> 5.333msec
// 187.5chunk -> 1sec // 187.5chunk -> 1sec
// types ///////////////////////
export type VoiceChangerServerSetting = { // サーバセッティング
convertChunkNum: number, // VITSに入力する変換サイズ。(入力データの2倍以上の大きさで指定。それより小さいものが指定された場合は、サーバ側で自動的に入力の2倍のサイズが設定される。) ///////////////////////
minConvertSize: number, // この値より小さい場合にこの値に揃える。 export const InputSampleRate = {
srcId: number,
dstId: number,
gpu: number,
crossFadeLowerValue: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapRate: number,
framework: Framework
onnxExecutionProvider: OnnxExecutionProvider,
}
export type VoiceChangerClientSetting = {
audioInput: string | MediaStream | null,
mmvcServerUrl: string,
protocol: Protocol,
sampleRate: SampleRate, // 48000Hz
bufferSize: BufferSize, // 256, 512, 1024, 2048, 4096, 8192, 16384 (for mic stream)
inputChunkNum: number, // n of (256 x n) for send buffer
speakers: Speaker[],
forceVfDisable: boolean,
voiceChangerMode: VoiceChangerMode,
}
export type WorkletSetting = {
numTrancateTreshold: number,
volTrancateThreshold: number,
volTrancateLength: number
}
export type Speaker = {
"id": number,
"name": string,
}
export type ServerInfo = {
status: string
configFile: string,
pyTorchModelFile: string,
onnxModelFile: string,
convertChunkNum: number,
minConvertSize: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapRate: number,
gpu: number,
srcId: number,
dstId: number,
framework: Framework,
onnxExecutionProvider: string[]
}
// Consts
export const Protocol = {
"sio": "sio",
"rest": "rest",
} as const
export type Protocol = typeof Protocol[keyof typeof Protocol]
export const VoiceChangerMode = {
"realtime": "realtime",
"near-realtime": "near-realtime",
} as const
export type VoiceChangerMode = typeof VoiceChangerMode[keyof typeof VoiceChangerMode]
export const SampleRate = {
"48000": 48000, "48000": 48000,
"24000": 24000
} as const } as const
export type SampleRate = typeof SampleRate[keyof typeof SampleRate] export type InputSampleRate = typeof InputSampleRate[keyof typeof InputSampleRate]
export const BufferSize = { export const CrossFadeOverlapSize = {
"256": 256,
"512": 512,
"1024": 1024, "1024": 1024,
"2048": 2048, "2048": 2048,
"4096": 4096, "4096": 4096,
"8192": 8192,
"16384": 16384
} as const } as const
export type BufferSize = typeof BufferSize[keyof typeof BufferSize] export type CrossFadeOverlapSize = typeof CrossFadeOverlapSize[keyof typeof CrossFadeOverlapSize]
export const OnnxExecutionProvider = { export const OnnxExecutionProvider = {
"CPUExecutionProvider": "CPUExecutionProvider", "CPUExecutionProvider": "CPUExecutionProvider",
@ -107,78 +32,209 @@ export type OnnxExecutionProvider = typeof OnnxExecutionProvider[keyof typeof On
export const Framework = { export const Framework = {
"PyTorch": "PyTorch", "PyTorch": "PyTorch",
"ONNX": "ONNX", "ONNX": "ONNX",
} } as const
export type Framework = typeof Framework[keyof typeof Framework] export type Framework = typeof Framework[keyof typeof Framework]
export const F0Detector = {
"dio": "dio",
"harvest": "harvest",
} as const
export type F0Detector = typeof F0Detector[keyof typeof F0Detector]
export const ServerSettingKey = { export const ServerSettingKey = {
"srcId": "srcId", "srcId": "srcId",
"dstId": "dstId", "dstId": "dstId",
"convertChunkNum": "convertChunkNum",
"minConvertSize": "minConvertSize",
"gpu": "gpu", "gpu": "gpu",
"crossFadeOffsetRate": "crossFadeOffsetRate", "crossFadeOffsetRate": "crossFadeOffsetRate",
"crossFadeEndRate": "crossFadeEndRate", "crossFadeEndRate": "crossFadeEndRate",
"crossFadeOverlapRate": "crossFadeOverlapRate", "crossFadeOverlapSize": "crossFadeOverlapSize",
"framework": "framework", "framework": "framework",
"onnxExecutionProvider": "onnxExecutionProvider" "onnxExecutionProvider": "onnxExecutionProvider",
"f0Factor": "f0Factor",
"f0Detector": "f0Detector",
"recordIO": "recordIO",
"inputSampleRate": "inputSampleRate",
} as const } as const
export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey] export type ServerSettingKey = typeof ServerSettingKey[keyof typeof ServerSettingKey]
// Defaults
export const DefaultVoiceChangerServerSetting: VoiceChangerServerSetting = {
convertChunkNum: 32, //(★1)
minConvertSize: 0,
srcId: 107,
dstId: 100,
gpu: 0,
crossFadeLowerValue: 0.1,
crossFadeOffsetRate: 0.1,
crossFadeEndRate: 0.9,
crossFadeOverlapRate: 0.5,
framework: "ONNX",
onnxExecutionProvider: "CPUExecutionProvider"
export type VoiceChangerServerSetting = {
srcId: number,
dstId: number,
gpu: number,
crossFadeOffsetRate: number,
crossFadeEndRate: number,
crossFadeOverlapSize: CrossFadeOverlapSize,
framework: Framework
onnxExecutionProvider: OnnxExecutionProvider,
f0Factor: number
f0Detector: F0Detector // dio or harvest
recordIO: number // 0:off, 1:on
inputSampleRate: InputSampleRate
}
export type ServerInfo = VoiceChangerServerSetting & {
status: string
configFile: string,
pyTorchModelFile: string,
onnxModelFile: string,
onnxExecutionProviders: OnnxExecutionProvider[]
}
export const DefaultServerSetting: ServerInfo = {
srcId: 0,
dstId: 101,
gpu: 0,
crossFadeOffsetRate: 0.0,
crossFadeEndRate: 1.0,
crossFadeOverlapSize: CrossFadeOverlapSize[1024],
framework: Framework.PyTorch,
f0Factor: 1.0,
onnxExecutionProvider: OnnxExecutionProvider.CPUExecutionProvider,
f0Detector: F0Detector.dio,
recordIO: 0,
inputSampleRate: 24000,
//
status: "ok",
configFile: "",
pyTorchModelFile: "",
onnxModelFile: "",
onnxExecutionProviders: []
}
///////////////////////
// Workletセッティング
///////////////////////
export type WorkletSetting = {
numTrancateTreshold: number,
volTrancateThreshold: number,
volTrancateLength: number
}
export const DefaultWorkletSetting: WorkletSetting = {
numTrancateTreshold: 100,
volTrancateThreshold: 0.0005,
volTrancateLength: 32
}
///////////////////////
// Worklet Nodeセッティング
///////////////////////
export const Protocol = {
"sio": "sio",
"rest": "rest",
} as const
export type Protocol = typeof Protocol[keyof typeof Protocol]
export const SendingSampleRate = {
"48000": 48000,
"24000": 24000
} as const
export type SendingSampleRate = typeof SendingSampleRate[keyof typeof SendingSampleRate]
export const DownSamplingMode = {
"decimate": "decimate",
"average": "average"
} as const
export type DownSamplingMode = typeof DownSamplingMode[keyof typeof DownSamplingMode]
export type WorkletNodeSetting = {
serverUrl: string,
protocol: Protocol,
sendingSampleRate: SendingSampleRate,
inputChunkNum: number,
downSamplingMode: DownSamplingMode,
}
export const DefaultWorkletNodeSetting: WorkletNodeSetting = {
serverUrl: "",
protocol: "sio",
sendingSampleRate: 24000,
inputChunkNum: 48,
downSamplingMode: "average"
}
///////////////////////
// クライアントセッティング
///////////////////////
export const SampleRate = {
"48000": 48000,
} as const
export type SampleRate = typeof SampleRate[keyof typeof SampleRate]
export type Speaker = {
"id": number,
"name": string,
}
export type Correspondence = {
"sid": number,
"correspondence": number,
"dirname": string
}
export type VoiceChangerClientSetting = {
audioInput: string | MediaStream | null,
sampleRate: SampleRate, // 48000Hz
echoCancel: boolean,
noiseSuppression: boolean,
noiseSuppression2: boolean
speakers: Speaker[],
correspondences: Correspondence[],
inputGain: number
outputGain: number
} }
export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = { export const DefaultVoiceChangerClientSetting: VoiceChangerClientSetting = {
audioInput: null, audioInput: null,
mmvcServerUrl: "",
protocol: "sio",
sampleRate: 48000, sampleRate: 48000,
bufferSize: 1024,
inputChunkNum: 48,
speakers: [ speakers: [
{ {
"id": 100, "id": 0,
"name": "ずんだもん"
},
{
"id": 107,
"name": "user" "name": "user"
}, },
{ {
"id": 101, "id": 101,
"name": "そら" "name": "ずんだもん"
}, },
{ {
"id": 102, "id": 102,
"name": "めたん" "name": "そら"
}, },
{ {
"id": 103, "id": 103,
"name": "めたん"
},
{
"id": 104,
"name": "つむぎ" "name": "つむぎ"
} }
], ],
forceVfDisable: false, correspondences: [],
voiceChangerMode: "realtime", echoCancel: false,
noiseSuppression: false,
noiseSuppression2: false,
inputGain: 1.0,
outputGain: 1.0
} }
export const DefaultWorkletSetting: WorkletSetting = {
numTrancateTreshold: 188,
volTrancateThreshold: 0.0005,
volTrancateLength: 32
}
////////////////////////////////////
// Exceptions
////////////////////////////////////
export const VOICE_CHANGER_CLIENT_EXCEPTION = { export const VOICE_CHANGER_CLIENT_EXCEPTION = {
ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED", ERR_SIO_CONNECT_FAILED: "ERR_SIO_CONNECT_FAILED",
ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE", ERR_SIO_INVALID_RESPONSE: "ERR_SIO_INVALID_RESPONSE",
@ -192,10 +248,11 @@ export type VOICE_CHANGER_CLIENT_EXCEPTION = typeof VOICE_CHANGER_CLIENT_EXCEPTI
//////////////////////////////////// ////////////////////////////////////
// indexedDB // indexedDB
//////////////////////////////////// ////////////////////////////////////
export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER" export const INDEXEDDB_DB_APP_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_V.1.5"
export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB" export const INDEXEDDB_DB_NAME = "INDEXEDDB_KEY_VOICE_CHANGER_DB_V.1.5"
export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT" export const INDEXEDDB_KEY_CLIENT = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_CLIENT"
export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER" export const INDEXEDDB_KEY_SERVER = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_SERVER"
export const INDEXEDDB_KEY_WORKLETNODE = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_WORKLETNODE"
export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA" export const INDEXEDDB_KEY_MODEL_DATA = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_MODEL_DATA"
export const INDEXEDDB_KEY_WORKLET = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_WORKLET" export const INDEXEDDB_KEY_WORKLET = "INDEXEDDB_KEY_VOICE_CHANGER_LIB_WORKLET"

View File

@ -2,6 +2,7 @@ import { useEffect, useMemo, useRef, useState } from "react"
import { VoiceChangerClient } from "../VoiceChangerClient" import { VoiceChangerClient } from "../VoiceChangerClient"
import { ClientSettingState, useClientSetting } from "./useClientSetting" import { ClientSettingState, useClientSetting } from "./useClientSetting"
import { ServerSettingState, useServerSetting } from "./useServerSetting" import { ServerSettingState, useServerSetting } from "./useServerSetting"
import { useWorkletNodeSetting, WorkletNodeSettingState } from "./useWorkletNodeSetting"
import { useWorkletSetting, WorkletSettingState } from "./useWorkletSetting" import { useWorkletSetting, WorkletSettingState } from "./useWorkletSetting"
export type UseClientProps = { export type UseClientProps = {
@ -10,22 +11,40 @@ export type UseClientProps = {
} }
export type ClientState = { export type ClientState = {
initialized: boolean
// 各種設定I/Fへの参照
workletSetting: WorkletSettingState workletSetting: WorkletSettingState
clientSetting: ClientSettingState clientSetting: ClientSettingState
workletNodeSetting: WorkletNodeSettingState
serverSetting: ServerSettingState serverSetting: ServerSettingState
// モニタリングデータ
bufferingTime: number; bufferingTime: number;
responseTime: number;
volume: number; volume: number;
performance: PerformanceData
// 情報取得
getInfo: () => Promise<void> getInfo: () => Promise<void>
// 設定クリア
clearSetting: () => Promise<void> clearSetting: () => Promise<void>
} }
export type PerformanceData = {
responseTime: number
preprocessTime: number
mainprocessTime: number
postprocessTime: number
}
const InitialPerformanceData: PerformanceData = {
responseTime: 0,
preprocessTime: 0,
mainprocessTime: 0,
postprocessTime: 0
}
export const useClient = (props: UseClientProps): ClientState => { export const useClient = (props: UseClientProps): ClientState => {
const [initialized, setInitialized] = useState<boolean>(false)
// (1-1) クライアント // (1-1) クライアント
const voiceChangerClientRef = useRef<VoiceChangerClient | null>(null) const voiceChangerClientRef = useRef<VoiceChangerClient | null>(null)
const [voiceChangerClient, setVoiceChangerClient] = useState<VoiceChangerClient | null>(voiceChangerClientRef.current) const [voiceChangerClient, setVoiceChangerClient] = useState<VoiceChangerClient | null>(voiceChangerClientRef.current)
@ -38,21 +57,20 @@ export const useClient = (props: UseClientProps): ClientState => {
}, []) }, [])
// (1-2) 各種設定 // (1-2) 各種設定I/F
const clientSetting = useClientSetting({ voiceChangerClient, audioContext: props.audioContext }) const clientSetting = useClientSetting({ voiceChangerClient, audioContext: props.audioContext })
const workletNodeSetting = useWorkletNodeSetting({ voiceChangerClient })
const workletSetting = useWorkletSetting({ voiceChangerClient }) const workletSetting = useWorkletSetting({ voiceChangerClient })
const serverSetting = useServerSetting({ voiceChangerClient }) const serverSetting = useServerSetting({ voiceChangerClient })
// (1-3) ステータス // (1-3) モニタリングデータ
const [bufferingTime, setBufferingTime] = useState<number>(0) const [bufferingTime, setBufferingTime] = useState<number>(0)
const [responseTime, setResponseTime] = useState<number>(0) const [performance, setPerformance] = useState<PerformanceData>(InitialPerformanceData)
const [volume, setVolume] = useState<number>(0) const [volume, setVolume] = useState<number>(0)
// (1-4) エラーステータス // (1-4) エラーステータス
const errorCountRef = useRef<number>(0) const errorCountRef = useRef<number>(0)
// (2-1) 初期化処理 // (2-1) 初期化処理
useEffect(() => { useEffect(() => {
const initialized = async () => { const initialized = async () => {
@ -63,8 +81,12 @@ export const useClient = (props: UseClientProps): ClientState => {
notifySendBufferingTime: (val: number) => { notifySendBufferingTime: (val: number) => {
setBufferingTime(val) setBufferingTime(val)
}, },
notifyResponseTime: (val: number) => { notifyResponseTime: (val: number, perf?: number[]) => {
setResponseTime(val) const responseTime = val
const preprocessTime = perf ? Math.ceil(perf[0] * 1000) : 0
const mainprocessTime = perf ? Math.ceil(perf[1] * 1000) : 0
const postprocessTime = perf ? Math.ceil(perf[2] * 1000) : 0
setPerformance({ responseTime, preprocessTime, mainprocessTime, postprocessTime })
}, },
notifyException: (mes: string) => { notifyException: (mes: string) => {
if (mes.length > 0) { if (mes.length > 0) {
@ -75,8 +97,7 @@ export const useClient = (props: UseClientProps): ClientState => {
errorCountRef.current = 0 errorCountRef.current = 0
} }
} }
} },
}, {
notifyVolume: (vol: number) => { notifyVolume: (vol: number) => {
setVolume(vol) setVolume(vol)
} }
@ -91,6 +112,7 @@ export const useClient = (props: UseClientProps): ClientState => {
audio.srcObject = voiceChangerClientRef.current.stream audio.srcObject = voiceChangerClientRef.current.stream
audio.play() audio.play()
initializedResolveRef.current!() initializedResolveRef.current!()
setInitialized(true)
} }
initialized() initialized()
}, [props.audioContext]) }, [props.audioContext])
@ -100,7 +122,7 @@ export const useClient = (props: UseClientProps): ClientState => {
const getInfo = useMemo(() => { const getInfo = useMemo(() => {
return async () => { return async () => {
await initializedPromise await initializedPromise
await clientSetting.reloadClientSetting() await clientSetting.reloadClientSetting() // 実質的な処理の意味はない
await serverSetting.reloadServerInfo() await serverSetting.reloadServerInfo()
} }
}, [clientSetting, serverSetting]) }, [clientSetting, serverSetting])
@ -108,20 +130,28 @@ export const useClient = (props: UseClientProps): ClientState => {
const clearSetting = async () => { const clearSetting = async () => {
await clientSetting.clearSetting() await clientSetting.clearSetting()
await workletNodeSetting.clearSetting()
await workletSetting.clearSetting() await workletSetting.clearSetting()
await serverSetting.clearSetting() await serverSetting.clearSetting()
} }
return { return {
bufferingTime, initialized,
responseTime, // 各種設定I/Fへの参照
volume,
getInfo,
clientSetting, clientSetting,
workletNodeSetting,
workletSetting, workletSetting,
serverSetting, serverSetting,
// モニタリングデータ
bufferingTime,
volume,
performance,
// 情報取得
getInfo,
// 設定クリア
clearSetting, clearSetting,
} }
} }

View File

@ -1,6 +1,6 @@
import { useState, useMemo, useRef, useEffect } from "react" import { useState, useMemo, useEffect } from "react"
import { VoiceChangerClientSetting, Protocol, BufferSize, VoiceChangerMode, SampleRate, Speaker, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT } from "../const"
import { createDummyMediaStream } from "../util" import { VoiceChangerClientSetting, DefaultVoiceChangerClientSetting, INDEXEDDB_KEY_CLIENT } from "../const"
import { VoiceChangerClient } from "../VoiceChangerClient" import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB" import { useIndexedDB } from "./useIndexedDB"
@ -10,17 +10,10 @@ export type UseClientSettingProps = {
} }
export type ClientSettingState = { export type ClientSettingState = {
setting: VoiceChangerClientSetting; clientSetting: VoiceChangerClientSetting;
clearSetting: () => Promise<void> clearSetting: () => Promise<void>
setServerUrl: (url: string) => void; setServerUrl: (url: string) => void;
setProtocol: (proto: Protocol) => void; updateClientSetting: (clientSetting: VoiceChangerClientSetting) => void
setAudioInput: (audioInput: string | MediaStream | null) => Promise<void>
setBufferSize: (bufferSize: BufferSize) => Promise<void>
setVfForceDisabled: (vfForceDisabled: boolean) => Promise<void>
setInputChunkNum: (num: number) => void;
setVoiceChangerMode: (mode: VoiceChangerMode) => void
setSampleRate: (num: SampleRate) => void
setSpeakers: (speakers: Speaker[]) => void
start: () => Promise<void> start: () => Promise<void>
stop: () => Promise<void> stop: () => Promise<void>
@ -28,54 +21,41 @@ export type ClientSettingState = {
} }
export const useClientSetting = (props: UseClientSettingProps): ClientSettingState => { export const useClientSetting = (props: UseClientSettingProps): ClientSettingState => {
const settingRef = useRef<VoiceChangerClientSetting>(DefaultVoiceChangerClientSetting) const [clientSetting, setClientSetting] = useState<VoiceChangerClientSetting>(DefaultVoiceChangerClientSetting)
const [setting, _setSetting] = useState<VoiceChangerClientSetting>(settingRef.current)
const { setItem, getItem, removeItem } = useIndexedDB() const { setItem, getItem, removeItem } = useIndexedDB()
// 初期化 その1 DBから取得 // 初期化 その1 DBから取得
useEffect(() => { useEffect(() => {
const loadCache = async () => { const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_CLIENT) const setting = await getItem(INDEXEDDB_KEY_CLIENT) as VoiceChangerClientSetting
if (!setting) { if (!setting) {
// デフォルト設定 return
console.log("No Chache",)
const params = new URLSearchParams(location.search);
const colab = params.get("colab")
if (colab == "true") {
settingRef.current.protocol = "rest"
settingRef.current.inputChunkNum = 64
} else {
settingRef.current.protocol = "sio"
settingRef.current.inputChunkNum = 32
}
} else {
settingRef.current = setting as VoiceChangerClientSetting
} }
_setSetting({ ...settingRef.current })
}
console.log("[ClientSetting] Load Setting from db", setting)
if (setting.audioInput == "null") {
setting.audioInput = null
}
if (setting) {
setClientSetting({ ...setting })
}
}
loadCache() loadCache()
}, []) }, [])
// 初期化 その2 クライアントに設定 // 初期化 その2 クライアントに設定
useEffect(() => { useEffect(() => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
props.voiceChangerClient.setServerUrl(settingRef.current.mmvcServerUrl) props.voiceChangerClient.updateClientSetting(clientSetting)
props.voiceChangerClient.setInputChunkNum(settingRef.current.inputChunkNum)
props.voiceChangerClient.setProtocol(settingRef.current.protocol)
props.voiceChangerClient.setVoiceChangerMode(settingRef.current.voiceChangerMode)
// Input, bufferSize, VoiceFocus Disableは_setInputで設定
_setInput()
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
const setSetting = async (setting: VoiceChangerClientSetting) => { const storeSetting = async (setting: VoiceChangerClientSetting) => {
const storeData = { ...setting } const storeData = { ...setting }
if (typeof storeData.audioInput != "string") { if (typeof storeData.audioInput != "string") {
storeData.audioInput = null storeData.audioInput = null
} }
setItem(INDEXEDDB_KEY_CLIENT, storeData) setItem(INDEXEDDB_KEY_CLIENT, storeData)
_setSetting(setting) setClientSetting(setting)
} }
const clearSetting = async () => { const clearSetting = async () => {
@ -85,99 +65,28 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
////////////// //////////////
// 設定 // 設定
///////////// /////////////
const updateClientSetting = useMemo(() => {
return (_clientSetting: VoiceChangerClientSetting) => {
if (!props.voiceChangerClient) return
for (let k in _clientSetting) {
const cur_v = clientSetting[k as keyof VoiceChangerClientSetting]
const new_v = _clientSetting[k as keyof VoiceChangerClientSetting]
if (cur_v != new_v) {
storeSetting(_clientSetting)
props.voiceChangerClient.updateClientSetting(_clientSetting)
break
}
}
}
}, [props.voiceChangerClient, clientSetting])
const setServerUrl = useMemo(() => { const setServerUrl = useMemo(() => {
return (url: string) => { return (url: string) => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
props.voiceChangerClient.setServerUrl(url, true) props.voiceChangerClient.setServerUrl(url, true)
settingRef.current.mmvcServerUrl = url
setSetting({ ...settingRef.current })
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
const setProtocol = useMemo(() => {
return (proto: Protocol) => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.setProtocol(proto)
settingRef.current.protocol = proto
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const _setInput = async () => {
if (!props.voiceChangerClient) return
// console.log("[useClient] setup!(0)", settingRef.current.audioInput)
if (!settingRef.current.audioInput || settingRef.current.audioInput == "none") {
// console.log("[useClient] setup!(1)", settingRef.current.audioInput)
const ms = createDummyMediaStream(props.audioContext!)
await props.voiceChangerClient.setup(ms, settingRef.current.bufferSize, settingRef.current.forceVfDisable)
} else {
// console.log("[useClient] setup!(2)", settingRef.current.audioInput)
await props.voiceChangerClient.setup(settingRef.current.audioInput, settingRef.current.bufferSize, settingRef.current.forceVfDisable)
}
}
const setAudioInput = useMemo(() => {
return async (audioInput: string | MediaStream | null) => {
if (!props.voiceChangerClient) return
settingRef.current.audioInput = audioInput
await _setInput()
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setBufferSize = useMemo(() => {
return async (bufferSize: BufferSize) => {
if (!props.voiceChangerClient) return
settingRef.current.bufferSize = bufferSize
await _setInput()
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setVfForceDisabled = useMemo(() => {
return async (vfForceDisabled: boolean) => {
if (!props.voiceChangerClient) return
settingRef.current.forceVfDisable = vfForceDisabled
await _setInput()
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setInputChunkNum = useMemo(() => {
return (num: number) => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.setInputChunkNum(num)
settingRef.current.inputChunkNum = num
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setVoiceChangerMode = useMemo(() => {
return (mode: VoiceChangerMode) => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.setVoiceChangerMode(mode)
settingRef.current.voiceChangerMode = mode
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setSampleRate = useMemo(() => {
return (num: SampleRate) => {
if (!props.voiceChangerClient) return
//props.voiceChangerClient.setSampleRate(num) // Not Implemented
settingRef.current.sampleRate = num
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
const setSpeakers = useMemo(() => {
return (speakers: Speaker[]) => {
if (!props.voiceChangerClient) return
settingRef.current.speakers = speakers
setSetting({ ...settingRef.current })
}
}, [props.voiceChangerClient])
////////////// //////////////
// 操作 // 操作
@ -186,10 +95,10 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
const start = useMemo(() => { const start = useMemo(() => {
return async () => { return async () => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
props.voiceChangerClient.setServerUrl(setting.mmvcServerUrl, true) // props.voiceChangerClient.setServerUrl(setting.mmvcServerUrl, true)
props.voiceChangerClient.start() props.voiceChangerClient.start()
} }
}, [setting.mmvcServerUrl, props.voiceChangerClient]) }, [props.voiceChangerClient])
// (2) stop // (2) stop
const stop = useMemo(() => { const stop = useMemo(() => {
return async () => { return async () => {
@ -204,19 +113,11 @@ export const useClientSetting = (props: UseClientSettingProps): ClientSettingSta
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
return { return {
setting, clientSetting,
clearSetting, clearSetting,
setServerUrl, setServerUrl,
setProtocol, updateClientSetting,
setAudioInput,
setBufferSize,
setVfForceDisabled,
setInputChunkNum,
setVoiceChangerMode,
setSampleRate,
setSpeakers,
start, start,
stop, stop,

View File

@ -1,15 +1,9 @@
import { useState, useMemo, useRef, useEffect } from "react" import { useState, useMemo, useEffect } from "react"
import { VoiceChangerServerSetting, ServerInfo, Framework, OnnxExecutionProvider, DefaultVoiceChangerServerSetting, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA } from "../const" import { VoiceChangerServerSetting, ServerInfo, ServerSettingKey, INDEXEDDB_KEY_SERVER, INDEXEDDB_KEY_MODEL_DATA, DefaultServerSetting } from "../const"
import { VoiceChangerClient } from "../VoiceChangerClient" import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB" import { useIndexedDB } from "./useIndexedDB"
// export type FileUploadSetting = {
// pyTorchModel: File | null
// configFile: File | null
// onnxModel: File | null
// }
type ModelData = { type ModelData = {
file?: File file?: File
data?: ArrayBuffer data?: ArrayBuffer
@ -22,7 +16,6 @@ export type FileUploadSetting = {
configFile: ModelData | null configFile: ModelData | null
} }
const InitialFileUploadSetting: FileUploadSetting = { const InitialFileUploadSetting: FileUploadSetting = {
pyTorchModel: null, pyTorchModel: null,
configFile: null, configFile: null,
@ -34,44 +27,34 @@ export type UseServerSettingProps = {
} }
export type ServerSettingState = { export type ServerSettingState = {
setting: VoiceChangerServerSetting; serverSetting: ServerInfo
updateServerSettings: (setting: ServerInfo) => Promise<void>
clearSetting: () => Promise<void> clearSetting: () => Promise<void>
serverInfo: ServerInfo | undefined;
fileUploadSetting: FileUploadSetting
setFramework: (framework: Framework) => Promise<boolean>;
setOnnxExecutionProvider: (provider: OnnxExecutionProvider) => Promise<boolean>;
setSrcId: (num: number) => Promise<boolean>;
setDstId: (num: number) => Promise<boolean>;
setConvertChunkNum: (num: number) => Promise<boolean>;
setMinConvertSize: (num: number) => Promise<boolean>
setGpu: (num: number) => Promise<boolean>;
setCrossFadeOffsetRate: (num: number) => Promise<boolean>;
setCrossFadeEndRate: (num: number) => Promise<boolean>;
setCrossFadeOverlapRate: (num: number) => Promise<boolean>;
reloadServerInfo: () => Promise<void>; reloadServerInfo: () => Promise<void>;
fileUploadSetting: FileUploadSetting
setFileUploadSetting: (val: FileUploadSetting) => void setFileUploadSetting: (val: FileUploadSetting) => void
loadModel: () => Promise<void> loadModel: () => Promise<void>
uploadProgress: number uploadProgress: number
isUploading: boolean isUploading: boolean
} }
export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => { export const useServerSetting = (props: UseServerSettingProps): ServerSettingState => {
const settingRef = useRef<VoiceChangerServerSetting>(DefaultVoiceChangerServerSetting) // const settingRef = useRef<VoiceChangerServerSetting>(DefaultVoiceChangerServerSetting)
const [setting, _setSetting] = useState<VoiceChangerServerSetting>(settingRef.current) const [serverSetting, setServerSetting] = useState<ServerInfo>(DefaultServerSetting)
const [serverInfo, _setServerInfo] = useState<ServerInfo>()
const [fileUploadSetting, setFileUploadSetting] = useState<FileUploadSetting>(InitialFileUploadSetting) const [fileUploadSetting, setFileUploadSetting] = useState<FileUploadSetting>(InitialFileUploadSetting)
const { setItem, getItem, removeItem } = useIndexedDB() const { setItem, getItem, removeItem } = useIndexedDB()
// 初期化 その1 DBから取得 // DBから設定取得(キャッシュによる初期化)
useEffect(() => { useEffect(() => {
const loadCache = async () => { const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_SERVER) const setting = await getItem(INDEXEDDB_KEY_SERVER)
if (!setting) { if (!setting) {
} else { } else {
settingRef.current = setting as VoiceChangerServerSetting setServerSetting(setting as ServerInfo)
} }
_setSetting({ ...settingRef.current })
const fileuploadSetting = await getItem(INDEXEDDB_KEY_MODEL_DATA) const fileuploadSetting = await getItem(INDEXEDDB_KEY_MODEL_DATA)
if (!fileuploadSetting) { if (!fileuploadSetting) {
@ -82,114 +65,47 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
loadCache() loadCache()
}, []) }, [])
// 初期化 その2 クライアントに設定
// クライアントへ設定反映 (キャッシュ反映)
useEffect(() => { useEffect(() => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
props.voiceChangerClient.updateServerSettings(ServerSettingKey.framework, setting.framework) for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
props.voiceChangerClient.updateServerSettings(ServerSettingKey.onnxExecutionProvider, setting.onnxExecutionProvider) const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting
props.voiceChangerClient.updateServerSettings(ServerSettingKey.srcId, "" + setting.srcId) const v = serverSetting[k]
props.voiceChangerClient.updateServerSettings(ServerSettingKey.dstId, "" + setting.dstId) if (v) {
props.voiceChangerClient.updateServerSettings(ServerSettingKey.convertChunkNum, "" + setting.convertChunkNum) props.voiceChangerClient.updateServerSettings(k, "" + v)
props.voiceChangerClient.updateServerSettings(ServerSettingKey.minConvertSize, "" + setting.minConvertSize) }
props.voiceChangerClient.updateServerSettings(ServerSettingKey.gpu, "" + setting.gpu) }
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOffsetRate, "" + setting.crossFadeOffsetRate) reloadServerInfo()
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeEndRate, "" + setting.crossFadeEndRate)
props.voiceChangerClient.updateServerSettings(ServerSettingKey.crossFadeOverlapRate, "" + setting.crossFadeOverlapRate)
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
////////////// //////////////
// 設定 // 設定
///////////// /////////////
//// サーバに設定後、反映された情報と照合して値が一致していることを確認。一致していない場合はalert const updateServerSettings = useMemo(() => {
const _set_and_store = async (key: ServerSettingKey, newVal: string) => { return async (setting: ServerInfo) => {
if (!props.voiceChangerClient) return false if (!props.voiceChangerClient) return
for (let i = 0; i < Object.values(ServerSettingKey).length; i++) {
const k = Object.values(ServerSettingKey)[i] as keyof VoiceChangerServerSetting
const cur_v = serverSetting[k]
const new_v = setting[k]
if (cur_v != new_v) {
const res = await props.voiceChangerClient.updateServerSettings(k, "" + new_v)
if (res.onnxExecutionProviders.length > 0) {
res.onnxExecutionProvider = res.onnxExecutionProviders[0]
} else {
res.onnxExecutionProvider = "CPUExecutionProvider"
}
const res = await props.voiceChangerClient.updateServerSettings(key, "" + newVal) setServerSetting(res)
const storeData = { ...res }
_setServerInfo(res) storeData.recordIO = 0
if (newVal == res[key]) { setItem(INDEXEDDB_KEY_SERVER, storeData)
const newSetting: VoiceChangerServerSetting = { }
...settingRef.current,
convertChunkNum: res.convertChunkNum,
minConvertSize: res.minConvertSize,
srcId: res.srcId,
dstId: res.dstId,
gpu: res.gpu,
crossFadeOffsetRate: res.crossFadeOffsetRate,
crossFadeEndRate: res.crossFadeEndRate,
crossFadeOverlapRate: res.crossFadeOverlapRate,
framework: res.framework,
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider
} }
_setSetting(newSetting)
setItem(INDEXEDDB_KEY_SERVER, newSetting)
return true
} else {
alert(`[ServerSetting] 設定が反映されていません([key:${key}, new:${newVal}, res:${res[key]}])。モデルの切り替えの場合、処理が非同期で行われるため反映されていないように見える場合があります。サーバコントロールのリロードボタンを押すとGUIに反映されるます。`)
return false
} }
}, [props.voiceChangerClient, serverSetting])
}
const setFramework = useMemo(() => {
return async (framework: Framework) => {
return await _set_and_store(ServerSettingKey.framework, "" + framework)
}
}, [props.voiceChangerClient])
const setOnnxExecutionProvider = useMemo(() => {
return async (provider: OnnxExecutionProvider) => {
return await _set_and_store(ServerSettingKey.onnxExecutionProvider, "" + provider)
}
}, [props.voiceChangerClient])
const setSrcId = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.srcId, "" + num)
}
}, [props.voiceChangerClient])
const setDstId = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.dstId, "" + num)
}
}, [props.voiceChangerClient])
const setConvertChunkNum = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.convertChunkNum, "" + num)
}
}, [props.voiceChangerClient])
const setMinConvertSize = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.minConvertSize, "" + num)
}
}, [props.voiceChangerClient])
const setGpu = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.gpu, "" + num)
}
}, [props.voiceChangerClient])
const setCrossFadeOffsetRate = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.crossFadeOffsetRate, "" + num)
}
}, [props.voiceChangerClient])
const setCrossFadeEndRate = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.crossFadeEndRate, "" + num)
}
}, [props.voiceChangerClient])
const setCrossFadeOverlapRate = useMemo(() => {
return async (num: number) => {
return await _set_and_store(ServerSettingKey.crossFadeOverlapRate, "" + num)
}
}, [props.voiceChangerClient])
////////////// //////////////
// 操作 // 操作
@ -267,69 +183,16 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
} }
}, [fileUploadSetting, props.voiceChangerClient]) }, [fileUploadSetting, props.voiceChangerClient])
// const _uploadFile = useMemo(() => {
// return async (file: File, onprogress: (progress: number, end: boolean) => void) => {
// if (!props.voiceChangerClient) return
// const num = await props.voiceChangerClient.uploadFile(file, onprogress)
// const res = await props.voiceChangerClient.concatUploadedFile(file, num)
// console.log("uploaded", num, res)
// }
// }, [props.voiceChangerClient])
// const loadModel = useMemo(() => {
// return async () => {
// if (!fileUploadSetting.pyTorchModel && !fileUploadSetting.onnxModel) {
// alert("PyTorchモデルとONNXモデルのどちらか一つ以上指定する必要があります。")
// return
// }
// if (!fileUploadSetting.configFile) {
// alert("Configファイルを指定する必要があります。")
// return
// }
// if (!props.voiceChangerClient) return
// setUploadProgress(0)
// setIsUploading(true)
// const models = [fileUploadSetting.pyTorchModel, fileUploadSetting.onnxModel].filter(x => { return x != null }) as File[]
// for (let i = 0; i < models.length; i++) {
// const progRate = 1 / models.length
// const progOffset = 100 * i * progRate
// await _uploadFile(models[i], (progress: number, _end: boolean) => {
// // console.log(progress * progRate + progOffset, end, progRate,)
// setUploadProgress(progress * progRate + progOffset)
// })
// }
// await _uploadFile(fileUploadSetting.configFile, (progress: number, end: boolean) => {
// console.log(progress, end)
// })
// await props.voiceChangerClient.loadModel(fileUploadSetting.configFile, fileUploadSetting.pyTorchModel, fileUploadSetting.onnxModel)
// setUploadProgress(0)
// setIsUploading(false)
// reloadServerInfo()
// }
// }, [fileUploadSetting, props.voiceChangerClient])
const reloadServerInfo = useMemo(() => { const reloadServerInfo = useMemo(() => {
return async () => { return async () => {
console.log("reload server info")
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
const res = await props.voiceChangerClient.getServerSettings() const res = await props.voiceChangerClient.getServerSettings()
_setServerInfo(res) setServerSetting(res)
_setSetting({ const storeData = { ...res }
...settingRef.current, storeData.recordIO = 0
convertChunkNum: res.convertChunkNum, setItem(INDEXEDDB_KEY_SERVER, storeData)
srcId: res.srcId,
dstId: res.dstId,
gpu: res.gpu,
crossFadeOffsetRate: res.crossFadeOffsetRate,
crossFadeEndRate: res.crossFadeEndRate,
crossFadeOverlapRate: res.crossFadeOverlapRate,
framework: res.framework,
onnxExecutionProvider: (!!res.onnxExecutionProvider && res.onnxExecutionProvider.length > 0) ? res.onnxExecutionProvider[0] as OnnxExecutionProvider : DefaultVoiceChangerServerSetting.onnxExecutionProvider
})
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
@ -340,21 +203,12 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
return { return {
setting, serverSetting,
updateServerSettings,
clearSetting, clearSetting,
serverInfo,
fileUploadSetting,
setFramework,
setOnnxExecutionProvider,
setSrcId,
setDstId,
setConvertChunkNum,
setMinConvertSize,
setGpu,
setCrossFadeOffsetRate,
setCrossFadeEndRate,
setCrossFadeOverlapRate,
reloadServerInfo, reloadServerInfo,
fileUploadSetting,
setFileUploadSetting, setFileUploadSetting,
loadModel, loadModel,
uploadProgress, uploadProgress,

View File

@ -0,0 +1,90 @@
import { useState, useMemo, useEffect } from "react"
import { DefaultWorkletNodeSetting, INDEXEDDB_KEY_WORKLETNODE, WorkletNodeSetting } from "../const"
import { VoiceChangerClient } from "../VoiceChangerClient"
import { useIndexedDB } from "./useIndexedDB"
export type UseWorkletNodeSettingProps = {
voiceChangerClient: VoiceChangerClient | null
}
export type WorkletNodeSettingState = {
workletNodeSetting: WorkletNodeSetting;
clearSetting: () => Promise<void>
updateWorkletNodeSetting: (setting: WorkletNodeSetting) => void
startOutputRecording: () => void
stopOutputRecording: () => Promise<Float32Array>
}
export const useWorkletNodeSetting = (props: UseWorkletNodeSettingProps): WorkletNodeSettingState => {
const [workletNodeSetting, _setWorkletNodeSetting] = useState<WorkletNodeSetting>(DefaultWorkletNodeSetting)
const { setItem, getItem, removeItem } = useIndexedDB()
// 初期化 その1 DBから取得
useEffect(() => {
const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_WORKLETNODE) as WorkletNodeSetting
if (setting) {
_setWorkletNodeSetting(setting)
}
}
loadCache()
}, [])
// 初期化 その2 クライアントに設定
useEffect(() => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.setServerUrl(workletNodeSetting.serverUrl)
props.voiceChangerClient.updateWorkletNodeSetting(workletNodeSetting)
}, [props.voiceChangerClient])
const clearSetting = async () => {
await removeItem(INDEXEDDB_KEY_WORKLETNODE)
}
//////////////
// 設定
/////////////
const updateWorkletNodeSetting = useMemo(() => {
return (_workletNodeSetting: WorkletNodeSetting) => {
if (!props.voiceChangerClient) return
for (let k in _workletNodeSetting) {
const cur_v = workletNodeSetting[k as keyof WorkletNodeSetting]
const new_v = _workletNodeSetting[k as keyof WorkletNodeSetting]
if (cur_v != new_v) {
_setWorkletNodeSetting(_workletNodeSetting)
setItem(INDEXEDDB_KEY_WORKLETNODE, _workletNodeSetting)
props.voiceChangerClient.updateWorkletNodeSetting(_workletNodeSetting)
break
}
}
}
}, [props.voiceChangerClient, workletNodeSetting])
const startOutputRecording = useMemo(() => {
return () => {
if (!props.voiceChangerClient) return
props.voiceChangerClient.startOutputRecording()
}
}, [props.voiceChangerClient])
const stopOutputRecording = useMemo(() => {
return async () => {
if (!props.voiceChangerClient) return new Float32Array()
return props.voiceChangerClient.stopOutputRecording()
}
}, [props.voiceChangerClient])
return {
workletNodeSetting,
clearSetting,
updateWorkletNodeSetting,
startOutputRecording,
stopOutputRecording
}
}

View File

@ -11,12 +11,13 @@ export type WorkletSettingState = {
setting: WorkletSetting; setting: WorkletSetting;
clearSetting: () => Promise<void> clearSetting: () => Promise<void>
setSetting: (setting: WorkletSetting) => void; setSetting: (setting: WorkletSetting) => void;
} }
export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => { export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSettingState => {
const [setting, _setSetting] = useState<WorkletSetting>(DefaultWorkletSetting) const [setting, _setSetting] = useState<WorkletSetting>(DefaultWorkletSetting)
const { setItem, getItem, removeItem } = useIndexedDB() const { setItem, getItem, removeItem } = useIndexedDB()
// 初期化 その1 DBから取得 // DBから設定取得(キャッシュによる初期化)
useEffect(() => { useEffect(() => {
const loadCache = async () => { const loadCache = async () => {
const setting = await getItem(INDEXEDDB_KEY_WORKLET) const setting = await getItem(INDEXEDDB_KEY_WORKLET)
@ -32,7 +33,7 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting
}) })
} else { } else {
_setSetting({ _setSetting({
numTrancateTreshold: 150, numTrancateTreshold: 100,
volTrancateThreshold: 0.0005, volTrancateThreshold: 0.0005,
volTrancateLength: 32, volTrancateLength: 32,
}) })
@ -46,27 +47,31 @@ export const useWorkletSetting = (props: UseWorkletSettingProps): WorkletSetting
loadCache() loadCache()
}, []) }, [])
// 初期化 その2 クライアントに設定 // クライアントへ設定反映 初期化, 設定変更
useEffect(() => { useEffect(() => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
props.voiceChangerClient.configureWorklet(setting) props.voiceChangerClient.configureWorklet(setting)
}, [props.voiceChangerClient, setting]) }, [props.voiceChangerClient, setting])
// 設定 _setSettingがトリガでuseEffectが呼ばれて、workletに設定が飛ぶ
const setSetting = useMemo(() => { const setSetting = useMemo(() => {
return (setting: WorkletSetting) => { return (setting: WorkletSetting) => {
if (!props.voiceChangerClient) return if (!props.voiceChangerClient) return
props.voiceChangerClient.configureWorklet(setting)
_setSetting(setting) _setSetting(setting)
setItem(INDEXEDDB_KEY_WORKLET, setting) setItem(INDEXEDDB_KEY_WORKLET, setting)
} }
}, [props.voiceChangerClient]) }, [props.voiceChangerClient])
// その他 オペレーション
const clearSetting = async () => { const clearSetting = async () => {
await removeItem(INDEXEDDB_KEY_WORKLET) await removeItem(INDEXEDDB_KEY_WORKLET)
} }
return { return {
setting, setting,
clearSetting, clearSetting,
setSetting setSetting,
} }
} }

View File

@ -59,7 +59,7 @@ export const fileSelectorAsDataURL = async (regex: string) => {
export const validateUrl = (url: string) => { export const validateUrl = (url: string) => {
if (url.endsWith("/")) { if (url?.endsWith("/")) {
return url.substring(0, url.length - 1) return url.substring(0, url.length - 1)
} }
return url return url

View File

@ -4,7 +4,6 @@
"declaration": true, "declaration": true,
"outDir": "./dist", "outDir": "./dist",
/* */ /* */
"forceConsistentCasingInFileNames": true, "forceConsistentCasingInFileNames": true,

View File

@ -1,7 +1,7 @@
{ {
"compilerOptions": { "compilerOptions": {
"target": "ES2020", "target": "ES2020",
"lib":["ES2020"], "lib": ["ES2020"],
"outDir": "./worklet/dist", "outDir": "./worklet/dist",
"declaration": true, "declaration": true,
/* */ /* */

View File

@ -4,6 +4,9 @@ module.exports = {
entry: "./src/index.ts", entry: "./src/index.ts",
resolve: { resolve: {
extensions: [".ts", ".js"], extensions: [".ts", ".js"],
fallback: {
// "buffer": false
}
}, },
module: { module: {
rules: [ rules: [
@ -26,12 +29,6 @@ module.exports = {
libraryTarget: "umd", libraryTarget: "umd",
globalObject: "typeof self !== 'undefined' ? self : this", globalObject: "typeof self !== 'undefined' ? self : this",
}, },
plugins: [
new webpack.ProvidePlugin({
Buffer: ["buffer", "Buffer"],
process: "process/browser",
}),
],
externals: { externals: {
react: "react", react: "react",
"react-dom": "reactDOM", "react-dom": "reactDOM",

View File

@ -1,17 +1,35 @@
export const RequestType = { export const RequestType = {
"voice": "voice", "voice": "voice",
"config": "config" "config": "config",
"start": "start",
"stop": "stop"
} as const } as const
export type RequestType = typeof RequestType[keyof typeof RequestType] export type RequestType = typeof RequestType[keyof typeof RequestType]
export const ResponseType = {
"volume": "volume",
"inputData": "inputData"
} as const
export type ResponseType = typeof ResponseType[keyof typeof ResponseType]
export type VoiceChangerWorkletProcessorRequest = { export type VoiceChangerWorkletProcessorRequest = {
requestType: RequestType, requestType: RequestType,
voice: ArrayBuffer, voice: Float32Array,
numTrancateTreshold: number numTrancateTreshold: number
volTrancateThreshold: number volTrancateThreshold: number
volTrancateLength: number volTrancateLength: number
} }
export type VoiceChangerWorkletProcessorResponse = {
responseType: ResponseType,
volume?: number,
recordData?: Float32Array[]
inputData?: Float32Array
}
class VoiceChangerWorkletProcessor extends AudioWorkletProcessor { class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
private BLOCK_SIZE = 128 private BLOCK_SIZE = 128
private initialized = false; private initialized = false;
@ -21,6 +39,8 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
private volTrancateLength = 32 private volTrancateLength = 32
private volTrancateCount = 0 private volTrancateCount = 0
private isRecording = false
playBuffer: Float32Array[] = [] playBuffer: Float32Array[] = []
/** /**
* @constructor * @constructor
@ -40,25 +60,29 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
} }
handleMessage(event: any) { handleMessage(event: any) {
const request = event.data.request as VoiceChangerWorkletProcessorRequest const request = event.data as VoiceChangerWorkletProcessorRequest
if (request.requestType === "config") { if (request.requestType === "config") {
this.numTrancateTreshold = request.numTrancateTreshold this.numTrancateTreshold = request.numTrancateTreshold
this.volTrancateLength = request.volTrancateLength this.volTrancateLength = request.volTrancateLength
this.volTrancateThreshold = request.volTrancateThreshold this.volTrancateThreshold = request.volTrancateThreshold
console.log("[worklet] worklet configured", request) console.log("[worklet] worklet configured", request)
return return
} else if (request.requestType === "start") {
if (this.isRecording) {
console.warn("[worklet] recoring is already started")
return
}
this.isRecording = true
return
} else if (request.requestType === "stop") {
if (!this.isRecording) {
console.warn("[worklet] recoring is not started")
return
}
this.isRecording = false
return
} }
const arrayBuffer = request.voice
// データは(int16)で受信
const i16Data = new Int16Array(arrayBuffer)
const f32Data = new Float32Array(i16Data.length)
// console.log(`[worklet] f32DataLength${f32Data.length} i16DataLength${i16Data.length}`)
i16Data.forEach((x, i) => {
const float = (x >= 0x8000) ? -(0x10000 - x) / 0x8000 : x / 0x7FFF;
f32Data[i] = float
})
if (this.playBuffer.length > this.numTrancateTreshold) { if (this.playBuffer.length > this.numTrancateTreshold) {
console.log("[worklet] Buffer truncated") console.log("[worklet] Buffer truncated")
while (this.playBuffer.length > 2) { while (this.playBuffer.length > 2) {
@ -66,31 +90,35 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
} }
} }
// アップサンプリングしてPlayバッファに蓄積 const f32Data = request.voice
let f32Block: Float32Array const chunkNum = f32Data.length / this.BLOCK_SIZE
for (let i = 0; i < f32Data.length; i++) { for (let i = 0; i < chunkNum; i++) {
const frameIndexInBlock = (i * 2) % this.BLOCK_SIZE // const block = f32Data.slice(i * this.BLOCK_SIZE, (i + 1) * this.BLOCK_SIZE)
if (frameIndexInBlock === 0) { this.playBuffer.push(block)
f32Block = new Float32Array(this.BLOCK_SIZE)
}
const currentFrame = f32Data[i]
const nextFrame = i + 1 < f32Data.length ? f32Data[i + 1] : f32Data[i]
f32Block![frameIndexInBlock] = currentFrame
f32Block![frameIndexInBlock + 1] = (currentFrame + nextFrame) / 2
if (f32Block!.length === frameIndexInBlock + 2) {
this.playBuffer.push(f32Block!)
}
} }
} }
pushData = (inputData: Float32Array) => {
const volumeResponse: VoiceChangerWorkletProcessorResponse = {
responseType: ResponseType.inputData,
inputData: inputData
}
this.port.postMessage(volumeResponse);
}
process(_inputs: Float32Array[][], outputs: Float32Array[][], _parameters: Record<string, Float32Array>) { process(_inputs: Float32Array[][], outputs: Float32Array[][], _parameters: Record<string, Float32Array>) {
if (!this.initialized) { if (!this.initialized) {
console.warn("[worklet] worklet_process not ready"); console.warn("[worklet] worklet_process not ready");
return true; return true;
} }
if (this.isRecording) {
if (_inputs.length > 0 && _inputs[0].length > 0) {
this.pushData(_inputs[0][0])
}
}
if (this.playBuffer.length === 0) { if (this.playBuffer.length === 0) {
// console.log("[worklet] no play buffer") // console.log("[worklet] no play buffer")
return true return true
@ -111,16 +139,23 @@ class VoiceChangerWorkletProcessor extends AudioWorkletProcessor {
} }
// V.1.5.0よりsilent skipで音飛びするようになったので無効化
if (this.volTrancateCount < this.volTrancateLength || this.volTrancateLength < 0) { if (this.volTrancateCount < this.volTrancateLength || this.volTrancateLength < 0) {
break break
} else { } else {
break
// console.log("silent...skip") // console.log("silent...skip")
} }
} }
if (voice) { if (voice) {
this.port.postMessage({ volume: this.volume }); const volumeResponse: VoiceChangerWorkletProcessorResponse = {
responseType: ResponseType.volume,
volume: this.volume
}
this.port.postMessage(volumeResponse);
outputs[0][0].set(voice) outputs[0][0].set(voice)
outputs[0][1].set(voice)
} }
return true; return true;

View File

@ -1,6 +0,0 @@
{
"tabWidth": 4,
"useTabs": false,
"semi": true,
"printWidth": 360
}

View File

@ -1,22 +0,0 @@
module.exports = {
packagerConfig: {},
rebuildConfig: {},
makers: [
{
name: '@electron-forge/maker-squirrel',
config: {},
},
{
name: '@electron-forge/maker-zip',
platforms: ['darwin'],
},
{
name: '@electron-forge/maker-deb',
config: {},
},
{
name: '@electron-forge/maker-rpm',
config: {},
},
],
};

View File

@ -1,54 +0,0 @@
const { app, dialog, BrowserWindow } = require('electron')
const parseArgs = require('electron-args');
const cli = parseArgs(`
voice-changer-native-client
Usage
$ <command> <url>
Options
--help show help
--version show version
--url,-u open client
Examples
$ voice-changer-native-client http://localhost:18888/
`, {
alias: {
u: 'url'
},
default: {
url: "http://localhost:18888/"
}
});
console.log(cli.flags);
console.log(cli.flags["url"]);
const url = cli.flags["url"]
const createWindow = () => {
const win = new BrowserWindow({
width: 800,
height: 600
})
app.on('certificate-error', function (event, webContents, url, error, certificate, callback) {
event.preventDefault();
callback(true);
});
win.loadURL(url)
}
app.whenReady().then(() => {
createWindow()
app.on('activate', () => {
if (BrowserWindow.getAllWindows().length === 0) createWindow()
})
})
app.on('window-all-closed', () => {
if (process.platform !== 'darwin') app.quit()
})

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +0,0 @@
{
"name": "voice-changer-native-client",
"version": "1.0.0",
"description": "",
"main": "main.js",
"scripts": {
"start": "electron-forge start",
"test": "echo \"Error: no test specified\" && exit 1",
"package": "electron-forge package",
"make": "electron-forge make"
},
"author": "",
"license": "ISC",
"devDependencies": {
"@electron-forge/cli": "^6.0.4",
"@electron-forge/maker-deb": "^6.0.4",
"@electron-forge/maker-rpm": "^6.0.4",
"@electron-forge/maker-squirrel": "^6.0.4",
"@electron-forge/maker-zip": "^6.0.4",
"electron": "^22.1.0",
"prettier": "^2.8.3",
"rimraf": "^4.1.1",
"typescript": "^4.9.4"
},
"dependencies": {
"electron-args": "^0.1.0",
"electron-squirrel-startup": "^1.0.0"
}
}

View File

@ -0,0 +1,31 @@
import pyaudio
import json
if __name__ == '__main__':
audio = pyaudio.PyAudio()
audio_input_devices = []
audio_output_devices = []
audio_devices = {}
host_apis = []
for api_index in range(audio.get_host_api_count()):
host_apis.append(audio.get_host_api_info_by_index(api_index)['name'])
for x in range(0, audio.get_device_count()):
device = audio.get_device_info_by_index(x)
try:
deviceName = device['name'].encode('shift-jis').decode('utf-8')
except (UnicodeDecodeError, UnicodeEncodeError):
deviceName = device['name']
deviceIndex = device['index']
hostAPI = host_apis[device['hostApi']]
if device['maxInputChannels'] > 0:
audio_input_devices.append({"kind": "audioinput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
if device['maxOutputChannels'] > 0:
audio_output_devices.append({"kind": "audiooutput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
audio_devices["audio_input_devices"] = audio_input_devices
audio_devices["audio_output_devices"] = audio_output_devices
json_str = json.dumps(audio_devices, indent=2, ensure_ascii=True)
print(json_str)

155
client/python/vc_client.py Normal file
View File

@ -0,0 +1,155 @@
import argparse
import pyaudio
import wave
import struct
import socketio
import ssl
from datetime import datetime
import time
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
import signal
import sys
import numpy as np
BUFFER_SIZE = 2048
def setupArgParser():
parser = argparse.ArgumentParser()
parser.add_argument("--url", type=str, default="http://localhost:18888", help="url")
parser.add_argument("--input", type=int, required=True, help="input device index")
parser.add_argument("--output", type=int, default=-1, help="input device index")
parser.add_argument("--to", type=str, default="", help="sid")
return parser
class MockStream:
def __init__(self, sampling_rate):
self.sampling_rate = sampling_rate
self.start_count = 2
self.end_count = 2
self.fr = None
self.fw = None
def open_inputfile(self, input_filename):
self.fr = wave.open(input_filename, 'rb')
def open_outputfile(self, output_filename):
self.fw = wave.open(output_filename, 'wb')
self.fw.setnchannels(1)
self.fw.setsampwidth(2)
self.fw.setframerate(self.sampling_rate)
def read(self, length, exception_on_overflow=False):
if self.start_count > 0:
wav = bytes(length * 2)
self.start_count -= 1
else:
wav = self.fr.readframes(length)
if len(wav) <= 0:
wav = bytes(length * 2)
self.end_count -= 1
if self.end_count < 0:
Hyperparameters.VC_END_FLAG = True
return wav
def write(self, wav):
self.fw.writeframes(wav)
def stop_stream(self):
pass
def close(self):
if self.fr != None:
self.fr.close()
self.fr = None
if self.fw != None:
self.fw.close()
self.fw = None
class MyCustomNamespace(socketio.ClientNamespace):
def __init__(self, namespace: str, audio_output_stream, file_output_stream):
super().__init__(namespace)
self.audio_output_stream = audio_output_stream
self.file_output_stream = file_output_stream
def on_connect(self):
print(f'connected')
def on_disconnect(self):
print(f'disconnected')
def on_response(self, msg):
timestamp = msg[0]
responseTime = time.time() * 1000 - timestamp
data = msg[1]
perf = msg[2]
print(f"RT:{responseTime}msec", perf)
# unpackedData = struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)
if self.file_output_stream != None:
self.file_output_stream.write(data)
if self.audio_output_stream != None:
self.audio_output_stream.write(data)
if __name__ == '__main__':
parser = setupArgParser()
args, unknown = parser.parse_known_args()
url = args.url
inputDevice = args.input
outputDevice = args.output
to = args.to
audio = pyaudio.PyAudio()
audio_input_stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=24000,
frames_per_buffer=BUFFER_SIZE,
input_device_index=inputDevice,
input=True)
print("output device", outputDevice)
if outputDevice >= 0:
audio_output_stream = audio.open(
format=pyaudio.paInt16,
channels=1,
rate=24000,
frames_per_buffer=BUFFER_SIZE,
output_device_index=outputDevice,
output=True)
else:
audio_output_stream = None
# mock_stream_out = MockStream(24000)
# mock_stream_out.open_outputfile("test.wav")
mock_stream_out = None
# mock_stream_in = MockStream(24000)
# mock_stream_in.open_outputfile("test_in.wav")
my_namespace = MyCustomNamespace("/test", audio_output_stream, mock_stream_out)
sio = socketio.Client(ssl_verify=False)
sio.register_namespace(my_namespace)
sio.connect(url)
try:
while True:
in_wav = audio_input_stream.read(BUFFER_SIZE, exception_on_overflow=False)
sio.emit('request_message', [time.time() * 1000, in_wav], namespace="/test")
except KeyboardInterrupt:
audio_input_stream.stop_stream()
audio_input_stream.close()
audio_output_stream.stop_stream()
audio_output_stream.close()
audio.terminate()
mock_stream_out.close()

View File

@ -23,7 +23,7 @@ RUN cd MMVC_Client && git checkout 04f3fec4fd82dea6657026ec4e1cd80fb29a415c && c
WORKDIR / WORKDIR /
ADD dummy / ADD dummy /
RUN git clone --depth 1 https://github.com/w-okada/voice-changer.git RUN git clone --depth 1 https://github.com/w-okada/voice-changer.git
######### #########

56
docker_trainer/Dockerfile Normal file
View File

@ -0,0 +1,56 @@
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 as base
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get -y update && apt-get install -y emacs git curl python3-pip libsndfile1-dev ffmpeg
RUN pip install torch==1.13.1
RUN pip install torchaudio==0.13.1
RUN pip install pydub==0.25.1
RUN pip install tqdm==4.64.1
RUN pip install librosa==0.9.2
#RUN pip install librosa==0.8.1
RUN pip install psutil==5.9.4
#RUN pip install psutil==5.4.8
RUN pip install tensorboard==2.11.2
RUN pip install pytz==2022.7.1
RUN pip install pyworld==0.3.2
RUN pip install retry==0.9.2
#RUN pip install numpy==1.21.6
RUN pip install h5py==3.8.0
#RUN pip install h5py==3.1.0
RUN pip install matplotlib==3.6.3
#RUN pip install matplotlib==3.2.2
RUN pip install onnx==1.13.0
RUN pip install onnxruntime==1.14.0
RUN pip install onnxsim==0.4.17
ADD /warmup.py /
RUN python3 warmup.py
ADD dummy /
RUN git clone -b v1.5.0.0_SiFiGAN https://github.com/isletennos/MMVC_Trainer.git
WORKDIR /MMVC_Trainer/
#RUN git checkout c242d3d1cf7f768af70d9735082ca2bdd90c45f3
RUN git checkout 8cca023f5f709c70c2c2fc3e880cb1a119e18f44
RUN git clone https://github.com/isletennos/MMVC_Client.git
WORKDIR /MMVC_Trainer/MMVC_Client
#RUN git checkout 3374a1177b73e3f6d600e5dbe93af033c36ee120
RUN git checkout 1424609e53c79e2d629add10ae4bfb16fc0c3c82
WORKDIR /
# ↓ テストスクリプトはTrainerのrootに置くとmodelsがconflictする。
ADD /scripts /MMVC_Trainer/MMVC_Client/python/
ADD /model/D_v15_best.pth /MMVC_Trainer/fine_model/
ADD /model/G_v15_best.pth /MMVC_Trainer/fine_model/
RUN cp -r /MMVC_Trainer/configs /MMVC_Trainer/configs_org
WORKDIR /MMVC_Trainer/

69
docker_trainer/README.md Normal file
View File

@ -0,0 +1,69 @@
MMVC Server
----
# 起動方法
(1) Datasetを`trainer/dataset`におく
```sh
trainer/dataset/
├── 00_myvoice
│   ├── text
│   │   ├── emotion001.txt
│   │   ├── emotion002.txt
...
│   │   └── emotion100.txt
│   └── wav
│   ├── emotion001.wav
│   ├── emotion002.wav
...
│   └── emotion100.wav
├── 1205_zundamon
│   ├── text
│   │   ├── emoNormal_001.txt
│   │   ├── emoNormal_002.txt
...
│   │   └── emoNormal_100.txt
│   └── wav
│   ├── emoNormal_001.wav
│   ├── emoNormal_002.wav
...
│   └── emoNormal_100.wav
├── 344_tsumugi
│   ├── text
│   │   ├── VOICEACTRESS100_001.txt
│   │   ├── VOICEACTRESS100_002.txt
...
│   │   └── emoNormal_100.txt
│   └── wav
│   ├── VOICEACTRESS100_001.wav
│   ├── VOICEACTRESS100_002.wav
...
│   └── emoNormal_100.wav
└── multi_speaker_correspondence.txt
```
(2) start_trainer.shをrootにコピー
(3) `bash start_trainer.sh`を実行
(4) Docker内で次のコマンドを実行
batch sizeは適宜調整
```sh
$ cp configs_org/baseconfig.json configs/
$ python3 normalize.py True
$ python3 create_dataset.py -f train_config -s 24000 -m dataset/multi_speaker_correspondence.txt
$ tensorboard --logdir logs --port 5000 --bind_all &
# batch size 変更
$ python3 train_ms.py -c configs/train_config.json -m 20220306_24000 -fg fine_model/G_v15_best.pth -fd fine_model/D_v15_best.pth
$ python3 train_ms.py -c configs/train_config.json -m 20220306_24000
```
(x) テスト
```
$ python3 MMVC_Client/python/conver_test.py -m logs/G_40000.pth -c configs/train_config.json -s 0 -t 101 --input dataset/00_myvoice/wav/emotion011.wav --output dataset/test.wav --f0_scale 3
```
(X) onnx
python3 onnx_export.py --config_file logs/train_config.json --convert_pth logs/G_220000.pth

2
docker_trainer/model/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,208 @@
from features import SignalGenerator, dilated_factor
from scipy.interpolate import interp1d
import torch
import numpy as np
import json
import os
hann_window = {}
class TextAudioSpeakerCollate():
""" Zero-pads model inputs and targets
"""
def __init__(
self,
sample_rate,
hop_size,
f0_factor=1.0,
dense_factors=[0.5, 1, 4, 8],
upsample_scales=[8, 4, 2, 2],
sine_amp=0.1,
noise_amp=0.003,
signal_types=["sine"],
):
self.dense_factors = dense_factors
self.prod_upsample_scales = np.cumprod(upsample_scales)
self.sample_rate = sample_rate
self.signal_generator = SignalGenerator(
sample_rate=sample_rate,
hop_size=hop_size,
sine_amp=sine_amp,
noise_amp=noise_amp,
signal_types=signal_types,
)
self.f0_factor = f0_factor
def __call__(self, batch):
"""Collate's training batch from normalized text, audio and speaker identities
PARAMS
------
batch: [text_normalized, spec_normalized, wav_normalized, sid, note]
"""
spec_lengths = torch.LongTensor(len(batch))
sid = torch.LongTensor(len(batch))
spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), batch[0][0].size(1))
f0_padded = torch.FloatTensor(len(batch), 1, batch[0][2].size(0))
# 返り値の初期化
spec_padded.zero_()
f0_padded.zero_()
# dfs
dfs_batch = [[] for _ in range(len(self.dense_factors))]
# row spec, sid, f0
for i in range(len(batch)):
row = batch[i]
spec = row[0]
spec_padded[i, :, :spec.size(1)] = spec
spec_lengths[i] = spec.size(1)
sid[i] = row[1]
# 推論時 f0/cf0にf0の倍率を乗算してf0/cf0を求める
f0 = row[2] * self.f0_factor
f0_padded[i, :, :f0.size(0)] = f0
# dfs
dfs = []
# dilated_factor の入力はnumpy!!
for df, us in zip(self.dense_factors, self.prod_upsample_scales):
dfs += [
np.repeat(dilated_factor(torch.unsqueeze(f0, dim=1).to('cpu').detach().numpy(), self.sample_rate, df), us)
]
# よくわからないけど、後で論文ちゃんと読む
for i in range(len(self.dense_factors)):
dfs_batch[i] += [
dfs[i].astype(np.float32).reshape(-1, 1)
] # [(T', 1), ...]
# よくわからないdfsを転置
for i in range(len(self.dense_factors)):
dfs_batch[i] = torch.FloatTensor(np.array(dfs_batch[i])).transpose(
2, 1
) # (B, 1, T')
# f0/cf0を実際に使うSignalに変換する
in_batch = self.signal_generator(f0_padded)
return spec_padded, spec_lengths, sid, in_batch, dfs_batch
def convert_continuos_f0(f0, f0_size):
# get start and end of f0
if (f0 == 0).all():
return np.zeros((f0_size,))
start_f0 = f0[f0 != 0][0]
end_f0 = f0[f0 != 0][-1]
# padding start and end of f0 sequence
cf0 = f0
start_idx = np.where(cf0 == start_f0)[0][0]
end_idx = np.where(cf0 == end_f0)[0][-1]
cf0[:start_idx] = start_f0
cf0[end_idx:] = end_f0
# get non-zero frame index
nz_frames = np.where(cf0 != 0)[0]
# perform linear interpolation
f = interp1d(nz_frames, cf0[nz_frames], bounds_error=False, fill_value=0.0)
cf0_ = f(np.arange(0, f0_size))
# print(cf0.shape, cf0_.shape, f0.shape, f0_size)
# print(cf0_)
return f(np.arange(0, f0_size))
def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
if torch.min(y) < -1.:
print('min value is ', torch.min(y))
if torch.max(y) > 1.:
print('max value is ', torch.max(y))
dtype_device = str(y.dtype) + '_' + str(y.device)
wnsize_dtype_device = str(win_size) + '_' + dtype_device
if wnsize_dtype_device not in hann_window:
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode='reflect')
y = y.squeeze(1)
spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=True)
spec = torch.view_as_real(spec)
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
return spec
def get_hparams_from_file(config_path):
with open(config_path, "r", encoding="utf-8") as f:
data = f.read()
config = json.loads(data)
hparams = HParams(**config)
return hparams
class HParams():
def __init__(self, **kwargs):
for k, v in kwargs.items():
if type(v) == dict:
v = HParams(**v)
self[k] = v
def keys(self):
return self.__dict__.keys()
def items(self):
return self.__dict__.items()
def values(self):
return self.__dict__.values()
def __len__(self):
return len(self.__dict__)
def __getitem__(self, key):
return getattr(self, key)
def __setitem__(self, key, value):
return setattr(self, key, value)
def __contains__(self, key):
return key in self.__dict__
def __repr__(self):
return self.__dict__.__repr__()
def load_checkpoint(checkpoint_path, model, optimizer=None):
assert os.path.isfile(checkpoint_path), f"No such file or directory: {checkpoint_path}"
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
iteration = checkpoint_dict['iteration']
learning_rate = checkpoint_dict['learning_rate']
if optimizer is not None:
optimizer.load_state_dict(checkpoint_dict['optimizer'])
saved_state_dict = {
**checkpoint_dict['pe'],
**checkpoint_dict['flow'],
**checkpoint_dict['text_enc'],
**checkpoint_dict['dec'],
**checkpoint_dict['emb_g']
}
if hasattr(model, 'module'):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
new_state_dict = {}
for k, v in state_dict.items():
try:
new_state_dict[k] = saved_state_dict[k]
except:
new_state_dict[k] = v
if hasattr(model, 'module'):
model.module.load_state_dict(new_state_dict)
else:
model.load_state_dict(new_state_dict)
return model, optimizer, learning_rate, iteration

View File

@ -0,0 +1,104 @@
import sys
sys.path.append(".") # sifiganへのパスが必要。
import argparse
import torch
import numpy as np
from scipy.io.wavfile import write, read
import pyworld as pw
from logging import getLogger
# import utils
from models import SynthesizerTrn
# from mmvc_client import Hyperparameters # <- pyaudioなどが必要になるため必要なロジックのみコピペ
from client_modules import convert_continuos_f0, spectrogram_torch, TextAudioSpeakerCollate, get_hparams_from_file, load_checkpoint
logger = getLogger(__name__)
def setupArgParser():
parser = argparse.ArgumentParser()
parser.add_argument("-c", type=str, required=True, help="path for the config.json")
parser.add_argument("-m", type=str, help="path for the pytorch model file")
parser.add_argument("-o", type=str, help="path for the onnx model file")
parser.add_argument("-s", type=int, required=True, help="source speaker id")
parser.add_argument("-t", type=int, required=True, help="target speaker id")
parser.add_argument("--input", type=str, required=True, help="input wav file")
parser.add_argument("--output", type=str, required=True, help="outpu wav file")
parser.add_argument("--f0_scale", type=float, required=True, help="f0 scale")
return parser
def create_model(hps, pytorch_model_file):
net_g = SynthesizerTrn(
spec_channels=hps.data.filter_length // 2 + 1,
segment_size=hps.train.segment_size // hps.data.hop_length,
inter_channels=hps.model.inter_channels,
hidden_channels=hps.model.hidden_channels,
upsample_rates=hps.model.upsample_rates,
upsample_initial_channel=hps.model.upsample_initial_channel,
upsample_kernel_sizes=hps.model.upsample_kernel_sizes,
n_flow=hps.model.n_flow,
dec_out_channels=1,
dec_kernel_size=7,
n_speakers=hps.data.n_speakers,
gin_channels=hps.model.gin_channels,
requires_grad_pe=hps.requires_grad.pe,
requires_grad_flow=hps.requires_grad.flow,
requires_grad_text_enc=hps.requires_grad.text_enc,
requires_grad_dec=hps.requires_grad.dec
)
_ = net_g.eval()
_ = load_checkpoint(pytorch_model_file, net_g, None)
return net_g
def convert(hps, ssid, tsid, input, output, f0_scale):
sr, signal = read(input)
signal = signal / hps.data.max_wav_value
_f0, _time = pw.dio(signal, hps.data.sampling_rate, frame_period=5.5)
f0 = pw.stonemask(signal, _f0, _time, hps.data.sampling_rate)
f0 = convert_continuos_f0(f0, int(signal.shape[0] / hps.data.hop_length))
f0 = torch.from_numpy(f0.astype(np.float32))
signal = torch.from_numpy(signal.astype(np.float32)).clone()
signal = signal.unsqueeze(0)
spec = spectrogram_torch(signal, hps.data.filter_length, hps.data.sampling_rate, hps.data.hop_length, hps.data.win_length, center=False)
spec = torch.squeeze(spec, 0)
sid = torch.LongTensor([int(ssid)])
data = TextAudioSpeakerCollate(
sample_rate=hps.data.sampling_rate,
hop_size=hps.data.hop_length,
f0_factor=f0_scale
)([(spec, sid, f0)])
spec, spec_lengths, sid_src, sin, d = data
spec = spec.cuda()
spec_lengths = spec_lengths.cuda()
sid_src = sid_src.cuda()
sin = sin.cuda()
d = tuple([d[:1].cuda() for d in d])
sid_target = torch.LongTensor([tsid]).cuda()
audio = net_g.cuda().voice_conversion(spec, spec_lengths, sin, d, sid_src, sid_target)[0, 0].data.cpu().float().numpy()
# print(audio)
write(output, 24000, audio)
if __name__ == '__main__':
print("main")
parser = setupArgParser()
args = parser.parse_args()
CONFIG_PATH = args.c
hps = get_hparams_from_file(CONFIG_PATH)
pytorch_model_file = args.m
ssid = args.s
tsid = args.t
input = args.input
output = args.output
f0_scale = args.f0_scale
net_g = create_model(hps, pytorch_model_file)
convert(hps, ssid, tsid, input, output, f0_scale)

View File

@ -0,0 +1,18 @@
#!/bin/bash
set -eu
DOCKER_IMAGE=dannadori/trainer:20230221_085208
# DOCKER_IMAGE=trainer
docker run --gpus all --rm -ti \
-v `pwd`/trainer/dataset:/MMVC_Trainer/dataset \
-v `pwd`/trainer/configs:/MMVC_Trainer/configs \
-v `pwd`/trainer/F0:/MMVC_Trainer/F0 \
-v `pwd`/trainer/cF0:/MMVC_Trainer/cF0 \
-v `pwd`/trainer/units:/MMVC_Trainer/units \
-v `pwd`/trainer/logs:/MMVC_Trainer/logs \
-v `pwd`/trainer/filelists:/MMVC_Trainer/filelists \
-p 5000:5000 \
$DOCKER_IMAGE /bin/bash

3
docker_trainer/warmup.py Normal file
View File

@ -0,0 +1,3 @@
import torch
hubert = torch.hub.load("bshall/hubert:main", "hubert_soft")

View File

@ -6,10 +6,9 @@
"scripts": { "scripts": {
"build:docker": "date +%Y%m%d%H%M%S > docker/dummy && DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile docker/ -t voice-changer", "build:docker": "date +%Y%m%d%H%M%S > docker/dummy && DOCKER_BUILDKIT=1 docker build -f docker/Dockerfile docker/ -t voice-changer",
"build:docker:onnx": "DOCKER_BUILDKIT=1 docker build -f docker_onnx/Dockerfile docker/ -t onnx-converter", "build:docker:onnx": "DOCKER_BUILDKIT=1 docker build -f docker_onnx/Dockerfile docker/ -t onnx-converter",
"copy:frontend": "docker run -v `pwd`/frontend/dist:/frontend/dist --entrypoint /bin/bash -ti voice-changer -c \"cp -r /voice-changer-internal/frontend/dist/* /frontend/dist\"", "build:docker:trainer": "date +%Y%m%d%H%M%S > docker_trainer/dummy && DOCKER_BUILDKIT=1 docker build -f docker_trainer/Dockerfile docker_trainer/ -t trainer",
"copy:backend": "docker run -v `pwd`/demo:/demo --entrypoint /bin/bash -ti voice-changer -c \"cp -r /voice-changer-internal/voice-change-service/* /demo/ && rm -rf /demo/MMVC_Trainer/.git && rm -rf /demo/MMVC_Trainer/.gitignore \"",
"create:demo": "run-p copy:frontend copy:backend",
"push:docker": "bash script/001_pushDocker.sh", "push:docker": "bash script/001_pushDocker.sh",
"push:docker:trainer": "bash script/002_pushDockerTrainer.sh",
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1"
}, },
"repository": { "repository": {

View File

@ -0,0 +1,7 @@
#!/bin/bash
data_tag=`date +%Y%m%d_%H%M%S`
docker login
docker tag trainer dannadori/trainer:$data_tag
docker push dannadori/trainer:$data_tag

View File

@ -8,7 +8,11 @@ $ conda activate mmvc-server
$ pip install -r requirements.txt $ pip install -r requirements.txt
$ git clone https://github.com/isletennos/MMVC_Client.git $ git clone https://github.com/isletennos/MMVC_Client.git
$ cd MMVC_Client && git checkout 04f3fec4fd82dea6657026ec4e1cd80fb29a415c && cd - $ cd MMVC_Client && git checkout 3374a1177b73e3f6d600e5dbe93af033c36ee120 && cd -
$ git clone https://github.com/isletennos/MMVC_Trainer.git
$ cd MMVC_Trainer && git checkout c242d3d1cf7f768af70d9735082ca2bdd90c45f3 && cd -
$ python3 MMVCServerSIO.py -p 18888 --https true $ python3 MMVCServerSIO.py -p 18888 --https true
``` ```

View File

@ -16,6 +16,9 @@ NATIVE_CLIENT_FILE_MAC = os.path.join(sys._MEIPASS, "voice-changer-native-client
"voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client" "voice-changer-native-client") if hasattr(sys, "_MEIPASS") else "voice-changer-native-client"
TMP_DIR = os.path.join(tmpdir.name, "tmp_dir") if hasattr(sys, "_MEIPASS") else "tmp_dir"
os.makedirs(TMP_DIR, exist_ok=True)
# SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys" # SSL_KEY_DIR = os.path.join(sys._MEIPASS, "keys") if hasattr(sys, "_MEIPASS") else "keys"
# MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs" # MODEL_DIR = os.path.join(sys._MEIPASS, "logs") if hasattr(sys, "_MEIPASS") else "logs"
# UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir" # UPLOAD_DIR = os.path.join(sys._MEIPASS, "upload_dir") if hasattr(sys, "_MEIPASS") else "upload_dir"

View File

@ -9,7 +9,7 @@ from restapi.MMVC_Rest_Hello import MMVC_Rest_Hello
from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger from restapi.MMVC_Rest_VoiceChanger import MMVC_Rest_VoiceChanger
from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader from restapi.MMVC_Rest_Fileuploader import MMVC_Rest_Fileuploader
from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer from restapi.MMVC_Rest_Trainer import MMVC_Rest_Trainer
from const import frontend_path from const import frontend_path, TMP_DIR
class ValidationErrorLoggingRoute(APIRoute): class ValidationErrorLoggingRoute(APIRoute):
@ -27,10 +27,11 @@ class ValidationErrorLoggingRoute(APIRoute):
return custom_route_handler return custom_route_handler
class MMVC_Rest: class MMVC_Rest:
@classmethod @classmethod
def get_instance(cls, voiceChangerManager:VoiceChangerManager): def get_instance(cls, voiceChangerManager: VoiceChangerManager):
if not hasattr(cls, "_instance"): if not hasattr(cls, "_instance"):
app_fastapi = FastAPI() app_fastapi = FastAPI()
app_fastapi.router.route_class = ValidationErrorLoggingRoute app_fastapi.router.route_class = ValidationErrorLoggingRoute
@ -50,6 +51,8 @@ class MMVC_Rest:
app_fastapi.mount( app_fastapi.mount(
"/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static") "/recorder", StaticFiles(directory=f'{frontend_path}', html=True), name="static")
app_fastapi.mount(
"/tmp", StaticFiles(directory=f'{TMP_DIR}'), name="static")
restHello = MMVC_Rest_Hello() restHello = MMVC_Rest_Hello()
app_fastapi.include_router(restHello.router) app_fastapi.include_router(restHello.router)

View File

@ -1,6 +1,8 @@
import base64, struct import base64
import struct
import numpy as np import numpy as np
import traceback import traceback
import pyaudio
from fastapi import APIRouter from fastapi import APIRouter
from fastapi.encoders import jsonable_encoder from fastapi.encoders import jsonable_encoder
@ -10,17 +12,50 @@ from voice_changer.VoiceChangerManager import VoiceChangerManager
from pydantic import BaseModel from pydantic import BaseModel
import threading import threading
class VoiceModel(BaseModel): class VoiceModel(BaseModel):
timestamp: int timestamp: int
buffer: str buffer: str
class MMVC_Rest_VoiceChanger: class MMVC_Rest_VoiceChanger:
def __init__(self, voiceChangerManager:VoiceChangerManager): def __init__(self, voiceChangerManager: VoiceChangerManager):
self.voiceChangerManager = voiceChangerManager self.voiceChangerManager = voiceChangerManager
self.router = APIRouter() self.router = APIRouter()
self.router.add_api_route("/test", self.test, methods=["POST"]) self.router.add_api_route("/test", self.test, methods=["POST"])
self.router.add_api_route("/device", self.get_device, methods=["GET"])
self.tlock = threading.Lock() self.tlock = threading.Lock()
def get_device(self):
audio = pyaudio.PyAudio()
audio_input_devices = []
audio_output_devices = []
audio_devices = {}
host_apis = []
for api_index in range(audio.get_host_api_count()):
host_apis.append(audio.get_host_api_info_by_index(api_index)['name'])
for x in range(0, audio.get_device_count()):
device = audio.get_device_info_by_index(x)
try:
deviceName = device['name'].encode('shift-jis').decode('utf-8')
except (UnicodeDecodeError, UnicodeEncodeError):
deviceName = device['name']
deviceIndex = device['index']
hostAPI = host_apis[device['hostApi']]
if device['maxInputChannels'] > 0:
audio_input_devices.append({"kind": "audioinput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
if device['maxOutputChannels'] > 0:
audio_output_devices.append({"kind": "audiooutput", "index": deviceIndex, "name": deviceName, "hostAPI": hostAPI})
audio_devices["audio_input_devices"] = audio_input_devices
audio_devices["audio_output_devices"] = audio_output_devices
json_compatible_item_data = jsonable_encoder(audio_devices)
return JSONResponse(content=json_compatible_item_data)
def test(self, voice: VoiceModel): def test(self, voice: VoiceModel):
try: try:
@ -38,7 +73,7 @@ class MMVC_Rest_VoiceChanger:
# unpackedData.astype(np.int16)) # unpackedData.astype(np.int16))
self.tlock.acquire() self.tlock.acquire()
changedVoice = self.voiceChangerManager.changeVoice( unpackedData) changedVoice = self.voiceChangerManager.changeVoice(unpackedData)
self.tlock.release() self.tlock.release()
changedVoiceBase64 = base64.b64encode(changedVoice).decode('utf-8') changedVoiceBase64 = base64.b64encode(changedVoice).decode('utf-8')
@ -55,6 +90,3 @@ class MMVC_Rest_VoiceChanger:
print(traceback.format_exc()) print(traceback.format_exc())
self.tlock.release() self.tlock.release()
return str(e) return str(e)

View File

@ -1,22 +1,25 @@
import os, shutil import os
import shutil
from fastapi import UploadFile from fastapi import UploadFile
# UPLOAD_DIR = "model_upload_dir" # UPLOAD_DIR = "model_upload_dir"
def upload_file(upload_dirname:str, file:UploadFile, filename: str):
def upload_file(upload_dirname: str, file: UploadFile, filename: str):
if file and filename: if file and filename:
fileobj = file.file fileobj = file.file
upload_dir = open(os.path.join(upload_dirname, filename),'wb+') upload_dir = open(os.path.join(upload_dirname, filename), 'wb+')
shutil.copyfileobj(fileobj, upload_dir) shutil.copyfileobj(fileobj, upload_dir)
upload_dir.close() upload_dir.close()
return {"status":"OK", "msg": f"uploaded files {filename} "} return {"status": "OK", "msg": f"uploaded files {filename} "}
return {"status":"ERROR", "msg": "uploaded file is not found."} return {"status": "ERROR", "msg": "uploaded file is not found."}
def concat_file_chunks(upload_dirname:str, filename:str, chunkNum:int, dest_dirname:str):
def concat_file_chunks(upload_dirname: str, filename: str, chunkNum: int, dest_dirname: str):
target_file_name = os.path.join(dest_dirname, filename) target_file_name = os.path.join(dest_dirname, filename)
if os.path.exists(target_file_name): if os.path.exists(target_file_name):
os.unlink(target_file_name) os.remove(target_file_name)
with open(target_file_name, "ab") as target_file: with open(target_file_name, "ab") as target_file:
for i in range(chunkNum): for i in range(chunkNum):
chunkName = f"{filename}_{i}" chunkName = f"{filename}_{i}"
@ -24,7 +27,6 @@ def concat_file_chunks(upload_dirname:str, filename:str, chunkNum:int, dest_dirn
stored_chunk_file = open(chunk_file_path, 'rb') stored_chunk_file = open(chunk_file_path, 'rb')
target_file.write(stored_chunk_file.read()) target_file.write(stored_chunk_file.read())
stored_chunk_file.close() stored_chunk_file.close()
os.unlink(chunk_file_path) os.remove(chunk_file_path)
target_file.close() target_file.close()
return {"status":"OK", "msg": f"concat files {target_file_name} "} return {"status": "OK", "msg": f"concat files {target_file_name} "}

View File

@ -2,12 +2,13 @@
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
import os import os
def mod_get_model(modelFile:str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
return FileResponse(path=modelPath)
def mod_delete_model(modelFile:str): def mod_get_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile) modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
os.unlink(modelPath) return FileResponse(path=modelPath)
return {"Model deleted": f"{modelFile}"}
def mod_delete_model(modelFile: str):
modelPath = os.path.join("MMVC_Trainer/logs", modelFile)
os.remove(modelPath)
return {"Model deleted": f"{modelFile}"}

View File

@ -4,36 +4,38 @@ import numpy as np
import socketio import socketio
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
class MMVC_Namespace(socketio.AsyncNamespace): class MMVC_Namespace(socketio.AsyncNamespace):
def __init__(self, namespace:str, voiceChangerManager:VoiceChangerManager): def __init__(self, namespace: str, voiceChangerManager: VoiceChangerManager):
super().__init__(namespace) super().__init__(namespace)
self.voiceChangerManager = voiceChangerManager self.voiceChangerManager = voiceChangerManager
@classmethod @classmethod
def get_instance(cls, voiceChangerManager:VoiceChangerManager): def get_instance(cls, voiceChangerManager: VoiceChangerManager):
if not hasattr(cls, "_instance"): if not hasattr(cls, "_instance"):
cls._instance = cls("/test", voiceChangerManager) cls._instance = cls("/test", voiceChangerManager)
return cls._instance return cls._instance
def on_connect(self, sid, environ): def on_connect(self, sid, environ):
# print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S') , sid)) print('[{}] connet sid : {}'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'), sid))
pass pass
async def on_request_message(self, sid, msg): async def on_request_message(self, sid, msg):
timestamp = int(msg[0]) timestamp = int(msg[0])
data = msg[1] data = msg[1]
if(isinstance(data, str)): if (isinstance(data, str)):
print(type(data)) print(type(data))
print(data) print(data)
await self.emit('response', [timestamp, 0], to=sid) await self.emit('response', [timestamp, 0], to=sid)
else: else:
unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data)) unpackedData = np.array(struct.unpack('<%sh' % (len(data) // struct.calcsize('<h')), data))
audio1 = self.voiceChangerManager.changeVoice(unpackedData) # audio1, perf = self.voiceChangerManager.changeVoice(unpackedData)
# print("sio result:", len(audio1), audio1.shape) res = self.voiceChangerManager.changeVoice(unpackedData)
audio1 = res[0]
perf = res[1] if len(res) == 2 else [0, 0, 0]
bin = struct.pack('<%sh' % len(audio1), *audio1) bin = struct.pack('<%sh' % len(audio1), *audio1)
await self.emit('response', [timestamp, bin], to=sid) await self.emit('response', [timestamp, bin, perf], to=sid)
def on_disconnect(self, sid): def on_disconnect(self, sid):
# print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S'))) # print('[{}] disconnect'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
pass pass

View File

@ -4,31 +4,34 @@ from sio.MMVC_SocketIOServer import MMVC_SocketIOServer
from voice_changer.VoiceChangerManager import VoiceChangerManager from voice_changer.VoiceChangerManager import VoiceChangerManager
from const import frontend_path from const import frontend_path
class MMVC_SocketIOApp(): class MMVC_SocketIOApp():
@classmethod @classmethod
def get_instance(cls, app_fastapi, voiceChangerManager:VoiceChangerManager): def get_instance(cls, app_fastapi, voiceChangerManager: VoiceChangerManager):
if not hasattr(cls, "_instance"): if not hasattr(cls, "_instance"):
sio = MMVC_SocketIOServer.get_instance(voiceChangerManager) sio = MMVC_SocketIOServer.get_instance(voiceChangerManager)
app_socketio = socketio.ASGIApp( app_socketio = socketio.ASGIApp(
sio, sio,
other_asgi_app=app_fastapi, other_asgi_app=app_fastapi,
static_files={ static_files={
'/assets/icons/github.svg': { '/assets/icons/github.svg': {
'filename': f'{frontend_path}/assets/icons/github.svg', 'filename': f'{frontend_path}/assets/icons/github.svg',
'content_type': 'image/svg+xml' 'content_type': 'image/svg+xml'
}, },
'/assets/icons/help-circle.svg': { '/assets/icons/help-circle.svg': {
'filename': f'{frontend_path}/assets/icons/help-circle.svg', 'filename': f'{frontend_path}/assets/icons/help-circle.svg',
'content_type': 'image/svg+xml' 'content_type': 'image/svg+xml'
}, },
'': f'{frontend_path}', '/buymeacoffee.png': {
'filename': f'{frontend_path}/assets/buymeacoffee.png',
'content_type': 'image/png'
},
'': f'{frontend_path}',
'/': f'{frontend_path}/index.html', '/': f'{frontend_path}/index.html',
} }
) )
cls._instance = app_socketio cls._instance = app_socketio
return cls._instance return cls._instance
return cls._instance return cls._instance

2
server/tmp_dir/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -1,38 +1,112 @@
from const import ERROR_NO_ONNX_SESSION from const import ERROR_NO_ONNX_SESSION, TMP_DIR
import torch import torch
import os import os
import traceback import traceback
import numpy as np import numpy as np
from dataclasses import dataclass, asdict from dataclasses import dataclass, asdict
import resampy
import onnxruntime import onnxruntime
from symbols import symbols from symbols import symbols
from models import SynthesizerTrn from models import SynthesizerTrn
from voice_changer.TrainerFunctions import TextAudioSpeakerCollate, spectrogram_torch, load_checkpoint, get_hparams_from_file import pyworld as pw
from voice_changer.client_modules import convert_continuos_f0, spectrogram_torch, TextAudioSpeakerCollate, get_hparams_from_file, load_checkpoint
import time
providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"] providers = ['OpenVINOExecutionProvider', "CUDAExecutionProvider", "DmlExecutionProvider", "CPUExecutionProvider"]
import wave
import matplotlib
matplotlib.use('Agg')
import pylab
import librosa
import librosa.display
SAMPLING_RATE = 24000
class MockStream:
"""gi
オーディオストリーミング入出力をファイル入出力にそのまま置き換えるためのモック
"""
def __init__(self, sampling_rate):
self.sampling_rate = sampling_rate
self.start_count = 2
self.end_count = 2
self.fr = None
self.fw = None
def open_inputfile(self, input_filename):
self.fr = wave.open(input_filename, 'rb')
def open_outputfile(self, output_filename):
self.fw = wave.open(output_filename, 'wb')
self.fw.setnchannels(1)
self.fw.setsampwidth(2)
self.fw.setframerate(self.sampling_rate)
def read(self, length, exception_on_overflow=False):
if self.start_count > 0:
wav = bytes(length * 2)
self.start_count -= 1 # 最初の2回はダミーの空データ送る
else:
wav = self.fr.readframes(length)
if len(wav) <= 0: # データなくなってから最後の2回はダミーの空データを送る
wav = bytes(length * 2)
self.end_count -= 1
if self.end_count < 0:
Hyperparameters.VC_END_FLAG = True
return wav
def write(self, wav):
self.fw.writeframes(wav)
def stop_stream(self):
pass
def close(self):
if self.fr != None:
self.fr.close()
self.fr = None
if self.fw != None:
self.fw.close()
self.fw = None
@dataclass @dataclass
class VocieChangerSettings(): class VocieChangerSettings():
gpu: int = 0 gpu: int = 0
srcId: int = 107 srcId: int = 0
dstId: int = 100 dstId: int = 101
inputSampleRate: int = 24000 # 48000 or 24000
crossFadeOffsetRate: float = 0.1 crossFadeOffsetRate: float = 0.1
crossFadeEndRate: float = 0.9 crossFadeEndRate: float = 0.9
crossFadeOverlapRate: float = 0.9 crossFadeOverlapSize: int = 4096
convertChunkNum: int = 32
minConvertSize: int = 0 f0Factor: float = 1.0
framework: str = "ONNX" # PyTorch or ONNX f0Detector: str = "dio" # dio or harvest
recordIO: int = 0 # 0:off, 1:on
framework: str = "PyTorch" # PyTorch or ONNX
pyTorchModelFile: str = "" pyTorchModelFile: str = ""
onnxModelFile: str = "" onnxModelFile: str = ""
configFile: str = "" configFile: str = ""
# ↓mutableな物だけ列挙 # ↓mutableな物だけ列挙
intData = ["gpu", "srcId", "dstId", "convertChunkNum", "minConvertSize"] intData = ["gpu", "srcId", "dstId", "inputSampleRate", "crossFadeOverlapSize", "recordIO"]
floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "crossFadeOverlapRate"] floatData = ["crossFadeOffsetRate", "crossFadeEndRate", "f0Factor"]
strData = ["framework"] strData = ["framework", "f0Detector"]
def readMicrophone(queue, sid, deviceIndex):
print("READ MIC", queue, sid, deviceIndex)
class VoiceChanger(): class VoiceChanger():
@ -45,7 +119,7 @@ class VoiceChanger():
self.onnx_session = None self.onnx_session = None
self.currentCrossFadeOffsetRate = 0 self.currentCrossFadeOffsetRate = 0
self.currentCrossFadeEndRate = 0 self.currentCrossFadeEndRate = 0
self.currentCrossFadeOverlapRate = 0 self.currentCrossFadeOverlapSize = 0
self.gpu_num = torch.cuda.device_count() self.gpu_num = torch.cuda.device_count()
self.text_norm = torch.LongTensor([0, 6, 0]) self.text_norm = torch.LongTensor([0, 6, 0])
@ -55,6 +129,33 @@ class VoiceChanger():
print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})") print(f"VoiceChanger Initialized (GPU_NUM:{self.gpu_num}, mps_enabled:{self.mps_enabled})")
def _setupRecordIO(self):
# IO Recorder Setup
if hasattr(self, "stream_out"):
self.stream_out.close()
mock_stream_out = MockStream(24000)
stream_output_file = os.path.join(TMP_DIR, "out.wav")
if os.path.exists(stream_output_file):
print("delete old analyze file.", stream_output_file)
os.remove(stream_output_file)
else:
print("old analyze file not exist.", stream_output_file)
mock_stream_out.open_outputfile(stream_output_file)
self.stream_out = mock_stream_out
if hasattr(self, "stream_in"):
self.stream_in.close()
mock_stream_in = MockStream(24000)
stream_input_file = os.path.join(TMP_DIR, "in.wav")
if os.path.exists(stream_input_file):
print("delete old analyze file.", stream_input_file)
os.remove(stream_input_file)
else:
print("old analyze file not exist.", stream_output_file)
mock_stream_in.open_outputfile(stream_input_file)
self.stream_in = mock_stream_in
def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None): def loadModel(self, config: str, pyTorch_model_file: str = None, onnx_model_file: str = None):
self.settings.configFile = config self.settings.configFile = config
self.hps = get_hparams_from_file(config) self.hps = get_hparams_from_file(config)
@ -66,11 +167,23 @@ class VoiceChanger():
# PyTorchモデル生成 # PyTorchモデル生成
if pyTorch_model_file != None: if pyTorch_model_file != None:
self.net_g = SynthesizerTrn( self.net_g = SynthesizerTrn(
len(symbols), spec_channels=self.hps.data.filter_length // 2 + 1,
self.hps.data.filter_length // 2 + 1, segment_size=self.hps.train.segment_size // self.hps.data.hop_length,
self.hps.train.segment_size // self.hps.data.hop_length, inter_channels=self.hps.model.inter_channels,
hidden_channels=self.hps.model.hidden_channels,
upsample_rates=self.hps.model.upsample_rates,
upsample_initial_channel=self.hps.model.upsample_initial_channel,
upsample_kernel_sizes=self.hps.model.upsample_kernel_sizes,
n_flow=self.hps.model.n_flow,
dec_out_channels=1,
dec_kernel_size=7,
n_speakers=self.hps.data.n_speakers, n_speakers=self.hps.data.n_speakers,
**self.hps.model) gin_channels=self.hps.model.gin_channels,
requires_grad_pe=self.hps.requires_grad.pe,
requires_grad_flow=self.hps.requires_grad.flow,
requires_grad_text_enc=self.hps.requires_grad.text_enc,
requires_grad_dec=self.hps.requires_grad.dec
)
self.net_g.eval() self.net_g.eval()
load_checkpoint(pyTorch_model_file, self.net_g, None) load_checkpoint(pyTorch_model_file, self.net_g, None)
# utils.load_checkpoint(pyTorch_model_file, self.net_g, None) # utils.load_checkpoint(pyTorch_model_file, self.net_g, None)
@ -92,7 +205,7 @@ class VoiceChanger():
def get_info(self): def get_info(self):
data = asdict(self.settings) data = asdict(self.settings)
data["onnxExecutionProvider"] = self.onnx_session.get_providers() if self.onnx_session != None else [] data["onnxExecutionProviders"] = self.onnx_session.get_providers() if self.onnx_session != None else []
files = ["configFile", "pyTorchModelFile", "onnxModelFile"] files = ["configFile", "pyTorchModelFile", "onnxModelFile"]
for f in files: for f in files:
if data[f] != None and os.path.exists(data[f]): if data[f] != None and os.path.exists(data[f]):
@ -102,6 +215,18 @@ class VoiceChanger():
return data return data
def _get_f0_dio(self, y, sr=SAMPLING_RATE):
_f0, time = pw.dio(y, sr, frame_period=5)
f0 = pw.stonemask(y, _f0, time, sr)
time = np.linspace(0, y.shape[0] / sr, len(time))
return f0, time
def _get_f0_harvest(self, y, sr=SAMPLING_RATE):
_f0, time = pw.harvest(y, sr, frame_period=5)
f0 = pw.stonemask(y, _f0, time, sr)
time = np.linspace(0, y.shape[0] / sr, len(time))
return f0, time
def update_setteings(self, key: str, val: any): def update_setteings(self, key: str, val: any):
if key == "onnxExecutionProvider" and self.onnx_session != None: if key == "onnxExecutionProvider" and self.onnx_session != None:
if val == "CUDAExecutionProvider": if val == "CUDAExecutionProvider":
@ -121,6 +246,36 @@ class VoiceChanger():
self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options) self.onnx_session.set_providers(providers=["CUDAExecutionProvider"], provider_options=provider_options)
if key == "crossFadeOffsetRate" or key == "crossFadeEndRate": if key == "crossFadeOffsetRate" or key == "crossFadeEndRate":
self.unpackedData_length = 0 self.unpackedData_length = 0
if key == "recordIO" and val == 1:
self._setupRecordIO()
if key == "recordIO" and val == 0:
pass
if key == "recordIO" and val == 2:
try:
stream_input_file = os.path.join(TMP_DIR, "in.wav")
analyze_file_dio = os.path.join(TMP_DIR, "analyze-dio.png")
analyze_file_harvest = os.path.join(TMP_DIR, "analyze-harvest.png")
y, sr = librosa.load(stream_input_file, SAMPLING_RATE)
y = y.astype(np.float64)
spec = librosa.amplitude_to_db(np.abs(librosa.stft(y, n_fft=2048, win_length=2048, hop_length=128)), ref=np.max)
f0_dio, times = self._get_f0_dio(y)
f0_harvest, times = self._get_f0_harvest(y)
pylab.close()
HOP_LENGTH = 128
img = librosa.display.specshow(spec, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
pylab.plot(times, f0_dio, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
pylab.savefig(analyze_file_dio)
pylab.close()
HOP_LENGTH = 128
img = librosa.display.specshow(spec, sr=SAMPLING_RATE, hop_length=HOP_LENGTH, x_axis='time', y_axis='log', )
pylab.plot(times, f0_harvest, label='f0', color=(0, 1, 1, 0.6), linewidth=3)
pylab.savefig(analyze_file_harvest)
except Exception as e:
print("recordIO exception", e)
elif key in self.settings.floatData: elif key in self.settings.floatData:
setattr(self.settings, key, float(val)) setattr(self.settings, key, float(val))
elif key in self.settings.strData: elif key in self.settings.strData:
@ -132,14 +287,17 @@ class VoiceChanger():
def _generate_strength(self, unpackedData): def _generate_strength(self, unpackedData):
if self.unpackedData_length != unpackedData.shape[0] or self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or self.currentCrossFadeOverlapRate != self.settings.crossFadeOverlapRate: if self.unpackedData_length != unpackedData.shape[0] or \
self.currentCrossFadeOffsetRate != self.settings.crossFadeOffsetRate or \
self.currentCrossFadeEndRate != self.settings.crossFadeEndRate or \
self.currentCrossFadeOverlapSize != self.settings.crossFadeOverlapSize:
self.unpackedData_length = unpackedData.shape[0] self.unpackedData_length = unpackedData.shape[0]
self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate self.currentCrossFadeOffsetRate = self.settings.crossFadeOffsetRate
self.currentCrossFadeEndRate = self.settings.crossFadeEndRate self.currentCrossFadeEndRate = self.settings.crossFadeEndRate
self.currentCrossFadeOverlapRate = self.settings.crossFadeOverlapRate self.currentCrossFadeOverlapSize = self.settings.crossFadeOverlapSize
overlapSize = int(unpackedData.shape[0] * self.settings.crossFadeOverlapRate)
overlapSize = min(self.settings.crossFadeOverlapSize, self.unpackedData_length)
cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate) cf_offset = int(overlapSize * self.settings.crossFadeOffsetRate)
cf_end = int(overlapSize * self.settings.crossFadeEndRate) cf_end = int(overlapSize * self.settings.crossFadeEndRate)
cf_range = cf_end - cf_offset cf_range = cf_end - cf_offset
@ -171,17 +329,37 @@ class VoiceChanger():
audio_norm = audio / self.hps.data.max_wav_value # normalize audio_norm = audio / self.hps.data.max_wav_value # normalize
audio_norm = audio_norm.unsqueeze(0) # unsqueeze audio_norm = audio_norm.unsqueeze(0) # unsqueeze
self.audio_buffer = torch.cat([self.audio_buffer, audio_norm], axis=1) # 過去のデータに連結 self.audio_buffer = torch.cat([self.audio_buffer, audio_norm], axis=1) # 過去のデータに連結
audio_norm = self.audio_buffer[:, -convertSize:] # 変換対象の部分だけ抽出 # audio_norm = self.audio_buffer[:, -(convertSize + 1280 * 2):] # 変換対象の部分だけ抽出
audio_norm = self.audio_buffer[:, -(convertSize):] # 変換対象の部分だけ抽出
self.audio_buffer = audio_norm self.audio_buffer = audio_norm
# TBD: numpy <--> pytorch変換が行ったり来たりしているが、まずは動かすことを最優先。
audio_norm_np = audio_norm.squeeze().numpy().astype(np.float64)
if self.settings.f0Detector == "dio":
_f0, _time = pw.dio(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5)
f0 = pw.stonemask(audio_norm_np, _f0, _time, self.hps.data.sampling_rate)
else:
f0, t = pw.harvest(audio_norm_np, self.hps.data.sampling_rate, frame_period=5.5, f0_floor=71.0, f0_ceil=1000.0)
f0 = convert_continuos_f0(f0, int(audio_norm_np.shape[0] / self.hps.data.hop_length))
f0 = torch.from_numpy(f0.astype(np.float32))
spec = spectrogram_torch(audio_norm, self.hps.data.filter_length, spec = spectrogram_torch(audio_norm, self.hps.data.filter_length,
self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length, self.hps.data.sampling_rate, self.hps.data.hop_length, self.hps.data.win_length,
center=False) center=False)
# dispose_stft_specs = 2
# spec = spec[:, dispose_stft_specs:-dispose_stft_specs]
# f0 = f0[dispose_stft_specs:-dispose_stft_specs]
spec = torch.squeeze(spec, 0) spec = torch.squeeze(spec, 0)
sid = torch.LongTensor([int(self.settings.srcId)]) sid = torch.LongTensor([int(self.settings.srcId)])
data = (self.text_norm, spec, audio_norm, sid) # data = (self.text_norm, spec, audio_norm, sid)
data = TextAudioSpeakerCollate()([data]) # data = TextAudioSpeakerCollate()([data])
data = TextAudioSpeakerCollate(
sample_rate=self.hps.data.sampling_rate,
hop_size=self.hps.data.hop_length,
f0_factor=self.settings.f0Factor
)([(spec, sid, f0)])
return data return data
def _onnx_inference(self, data, inputSize): def _onnx_inference(self, data, inputSize):
@ -189,7 +367,10 @@ class VoiceChanger():
print("[Voice Changer] No ONNX session.") print("[Voice Changer] No ONNX session.")
return np.zeros(1).astype(np.int16) return np.zeros(1).astype(np.int16)
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data] # x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x for x in data]
# sid_tgt1 = torch.LongTensor([self.settings.dstId])
spec, spec_lengths, sid_src, sin, d = data
sid_tgt1 = torch.LongTensor([self.settings.dstId]) sid_tgt1 = torch.LongTensor([self.settings.dstId])
# if spec.size()[2] >= 8: # if spec.size()[2] >= 8:
audio1 = self.onnx_session.run( audio1 = self.onnx_session.run(
@ -197,11 +378,17 @@ class VoiceChanger():
{ {
"specs": spec.numpy(), "specs": spec.numpy(),
"lengths": spec_lengths.numpy(), "lengths": spec_lengths.numpy(),
"sin": sin.numpy(),
"d0": d[0][:1].numpy(),
"d1": d[1][:1].numpy(),
"d2": d[2][:1].numpy(),
"d3": d[3][:1].numpy(),
"sid_src": sid_src.numpy(), "sid_src": sid_src.numpy(),
"sid_tgt": sid_tgt1.numpy() "sid_tgt": sid_tgt1.numpy()
})[0][0, 0] * self.hps.data.max_wav_value })[0][0, 0] * self.hps.data.max_wav_value
if hasattr(self, 'np_prev_audio1') == True: if hasattr(self, 'np_prev_audio1') == True:
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate) overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.np_prev_audio1[-1 * overlapSize:] prev_overlap = self.np_prev_audio1[-1 * overlapSize:]
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize] cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
# print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape) # print(prev_overlap.shape, self.np_prev_strength.shape, cur_overlap.shape, self.np_cur_strength.shape)
@ -224,10 +411,15 @@ class VoiceChanger():
if self.settings.gpu < 0 or self.gpu_num == 0: if self.settings.gpu < 0 or self.gpu_num == 0:
with torch.no_grad(): with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cpu() for x in data] spec, spec_lengths, sid_src, sin, d = data
sid_tgt1 = torch.LongTensor([self.settings.dstId]).cpu() spec = spec.cpu()
audio1 = (self.net_g.cpu().voice_conversion(spec, spec_lengths, sid_src=sid_src, spec_lengths = spec_lengths.cpu()
sid_tgt=sid_tgt1)[0, 0].data * self.hps.data.max_wav_value) sid_src = sid_src.cpu()
sin = sin.cpu()
d = tuple([d[:1].cpu() for d in d])
sid_target = torch.LongTensor([self.settings.dstId]).cpu()
audio1 = self.net_g.cpu().voice_conversion(spec, spec_lengths, sin, d, sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
if self.prev_strength.device != torch.device('cpu'): if self.prev_strength.device != torch.device('cpu'):
print(f"prev_strength move from {self.prev_strength.device} to cpu") print(f"prev_strength move from {self.prev_strength.device} to cpu")
@ -237,7 +429,7 @@ class VoiceChanger():
self.cur_strength = self.cur_strength.cpu() self.cur_strength = self.cur_strength.cpu()
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'): # prev_audio1が所望のデバイスに無い場合は一回休み。 if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cpu'): # prev_audio1が所望のデバイスに無い場合は一回休み。
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate) overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.prev_audio1[-1 * overlapSize:] prev_overlap = self.prev_audio1[-1 * overlapSize:]
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize] cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
powered_prev = prev_overlap * self.prev_strength powered_prev = prev_overlap * self.prev_strength
@ -256,10 +448,19 @@ class VoiceChanger():
else: else:
with torch.no_grad(): with torch.no_grad():
x, x_lengths, spec, spec_lengths, y, y_lengths, sid_src = [x.cuda(self.settings.gpu) for x in data] spec, spec_lengths, sid_src, sin, d = data
sid_tgt1 = torch.LongTensor([self.settings.dstId]).cuda(self.settings.gpu) spec = spec.cuda(self.settings.gpu)
audio1 = self.net_g.cuda(self.settings.gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src, spec_lengths = spec_lengths.cuda(self.settings.gpu)
sid_tgt=sid_tgt1)[0, 0].data * self.hps.data.max_wav_value sid_src = sid_src.cuda(self.settings.gpu)
sin = sin.cuda(self.settings.gpu)
d = tuple([d[:1].cuda(self.settings.gpu) for d in d])
sid_target = torch.LongTensor([self.settings.dstId]).cuda(self.settings.gpu)
# audio1 = self.net_g.cuda(self.settings.gpu).voice_conversion(spec, spec_lengths, sid_src=sid_src,
# sid_tgt=sid_tgt1)[0, 0].data * self.hps.data.max_wav_value
audio1 = self.net_g.cuda(self.settings.gpu).voice_conversion(spec, spec_lengths, sin, d,
sid_src, sid_target)[0, 0].data * self.hps.data.max_wav_value
if self.prev_strength.device != torch.device('cuda', self.settings.gpu): if self.prev_strength.device != torch.device('cuda', self.settings.gpu):
print(f"prev_strength move from {self.prev_strength.device} to gpu{self.settings.gpu}") print(f"prev_strength move from {self.prev_strength.device} to gpu{self.settings.gpu}")
@ -269,13 +470,16 @@ class VoiceChanger():
self.cur_strength = self.cur_strength.cuda(self.settings.gpu) self.cur_strength = self.cur_strength.cuda(self.settings.gpu)
if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.settings.gpu): if hasattr(self, 'prev_audio1') == True and self.prev_audio1.device == torch.device('cuda', self.settings.gpu):
overlapSize = int(inputSize * self.settings.crossFadeOverlapRate) overlapSize = min(self.settings.crossFadeOverlapSize, inputSize)
prev_overlap = self.prev_audio1[-1 * overlapSize:] prev_overlap = self.prev_audio1[-1 * overlapSize:]
cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize] cur_overlap = audio1[-1 * (inputSize + overlapSize):-1 * inputSize]
powered_prev = prev_overlap * self.prev_strength powered_prev = prev_overlap * self.prev_strength
powered_cur = cur_overlap * self.cur_strength powered_cur = cur_overlap * self.cur_strength
powered_result = powered_prev + powered_cur powered_result = powered_prev + powered_cur
# print(overlapSize, prev_overlap.shape, cur_overlap.shape, self.prev_strength.shape, self.cur_strength.shape)
# print(self.prev_audio1.shape, audio1.shape, inputSize, overlapSize)
cur = audio1[-1 * inputSize:-1 * overlapSize] # 今回のインプットの生部分。(インプット - 次回のCrossfade部分)。 cur = audio1[-1 * inputSize:-1 * overlapSize] # 今回のインプットの生部分。(インプット - 次回のCrossfade部分)。
result = torch.cat([powered_result, cur], axis=0) # Crossfadeと今回のインプットの生部分を結合 result = torch.cat([powered_result, cur], axis=0) # Crossfadeと今回のインプットの生部分を結合
@ -288,32 +492,63 @@ class VoiceChanger():
return result return result
def on_request(self, unpackedData: any): def on_request(self, unpackedData: any):
convertSize = self.settings.convertChunkNum * 128 # 128sample/1chunk
if unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate) + 1024 > convertSize: with Timer("pre-process") as t:
convertSize = int(unpackedData.shape[0] * (1 + self.settings.crossFadeOverlapRate)) + 1024 if self.settings.inputSampleRate != 24000:
if convertSize < self.settings.minConvertSize: unpackedData = resampy.resample(unpackedData, 48000, 24000)
convertSize = self.settings.minConvertSize convertSize = unpackedData.shape[0] + min(self.settings.crossFadeOverlapSize, unpackedData.shape[0])
# print("convert Size", unpackedData.shape[0], unpackedData.shape[0]*(1 + self.settings.crossFadeOverlapRate), convertSize, self.settings.minConvertSize) # print(convertSize, unpackedData.shape[0])
if convertSize < 8192:
convertSize = 8192
if convertSize % 128 != 0: # モデルの出力のホップサイズで切り捨てが発生するので補う。
convertSize = convertSize + (128 - (convertSize % 128))
self._generate_strength(unpackedData)
data = self._generate_input(unpackedData, convertSize)
preprocess_time = t.secs
self._generate_strength(unpackedData) with Timer("main-process") as t:
data = self._generate_input(unpackedData, convertSize) try:
if self.settings.framework == "ONNX":
result = self._onnx_inference(data, unpackedData.shape[0])
else:
result = self._pyTorch_inference(data, unpackedData.shape[0])
try: except Exception as e:
if self.settings.framework == "ONNX": print("VC PROCESSING!!!! EXCEPTION!!!", e)
result = self._onnx_inference(data, unpackedData.shape[0]) print(traceback.format_exc())
else: if hasattr(self, "np_prev_audio1"):
result = self._pyTorch_inference(data, unpackedData.shape[0]) del self.np_prev_audio1
if hasattr(self, "prev_audio1"):
del self.prev_audio1
return np.zeros(1).astype(np.int16)
mainprocess_time = t.secs
except Exception as e: with Timer("post-process") as t:
print("VC PROCESSING!!!! EXCEPTION!!!", e)
print(traceback.format_exc())
if hasattr(self, "np_prev_audio1"):
del self.np_prev_audio1
if hasattr(self, "prev_audio1"):
del self.prev_audio1
return np.zeros(1).astype(np.int16)
result = result.astype(np.int16) result = result.astype(np.int16)
# print("on_request result size:",result.shape) # print("on_request result size:",result.shape)
return result if self.settings.recordIO == 1:
self.stream_in.write(unpackedData.astype(np.int16).tobytes())
self.stream_out.write(result.tobytes())
if self.settings.inputSampleRate != 24000:
result = resampy.resample(result, 24000, 48000).astype(np.int16)
postprocess_time = t.secs
perf = [preprocess_time, mainprocess_time, postprocess_time]
return result, perf
##############
class Timer(object):
def __init__(self, title: str):
self.title = title
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, *args):
self.end = time.time()
self.secs = self.end - self.start
self.msecs = self.secs * 1000 # millisecs

View File

@ -36,4 +36,4 @@ class VoiceChangerManager():
return self.voiceChanger.on_request(unpackedData) return self.voiceChanger.on_request(unpackedData)
else: else:
print("Voice Change is not loaded. Did you load a correct model?") print("Voice Change is not loaded. Did you load a correct model?")
return np.zeros(1).astype(np.int16) return np.zeros(1).astype(np.int16), []

View File

@ -0,0 +1,208 @@
from features import SignalGenerator, dilated_factor
from scipy.interpolate import interp1d
import torch
import numpy as np
import json
import os
hann_window = {}
class TextAudioSpeakerCollate():
""" Zero-pads model inputs and targets
"""
def __init__(
self,
sample_rate,
hop_size,
f0_factor=1.0,
dense_factors=[0.5, 1, 4, 8],
upsample_scales=[8, 4, 2, 2],
sine_amp=0.1,
noise_amp=0.003,
signal_types=["sine"],
):
self.dense_factors = dense_factors
self.prod_upsample_scales = np.cumprod(upsample_scales)
self.sample_rate = sample_rate
self.signal_generator = SignalGenerator(
sample_rate=sample_rate,
hop_size=hop_size,
sine_amp=sine_amp,
noise_amp=noise_amp,
signal_types=signal_types,
)
self.f0_factor = f0_factor
def __call__(self, batch):
"""Collate's training batch from normalized text, audio and speaker identities
PARAMS
------
batch: [text_normalized, spec_normalized, wav_normalized, sid, note]
"""
spec_lengths = torch.LongTensor(len(batch))
sid = torch.LongTensor(len(batch))
spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), batch[0][0].size(1))
f0_padded = torch.FloatTensor(len(batch), 1, batch[0][2].size(0))
# 返り値の初期化
spec_padded.zero_()
f0_padded.zero_()
# dfs
dfs_batch = [[] for _ in range(len(self.dense_factors))]
# row spec, sid, f0
for i in range(len(batch)):
row = batch[i]
spec = row[0]
spec_padded[i, :, :spec.size(1)] = spec
spec_lengths[i] = spec.size(1)
sid[i] = row[1]
# 推論時 f0/cf0にf0の倍率を乗算してf0/cf0を求める
f0 = row[2] * self.f0_factor
f0_padded[i, :, :f0.size(0)] = f0
# dfs
dfs = []
# dilated_factor の入力はnumpy!!
for df, us in zip(self.dense_factors, self.prod_upsample_scales):
dfs += [
np.repeat(dilated_factor(torch.unsqueeze(f0, dim=1).to('cpu').detach().numpy(), self.sample_rate, df), us)
]
# よくわからないけど、後で論文ちゃんと読む
for i in range(len(self.dense_factors)):
dfs_batch[i] += [
dfs[i].astype(np.float32).reshape(-1, 1)
] # [(T', 1), ...]
# よくわからないdfsを転置
for i in range(len(self.dense_factors)):
dfs_batch[i] = torch.FloatTensor(np.array(dfs_batch[i])).transpose(
2, 1
) # (B, 1, T')
# f0/cf0を実際に使うSignalに変換する
in_batch = self.signal_generator(f0_padded)
return spec_padded, spec_lengths, sid, in_batch, dfs_batch
def convert_continuos_f0(f0, f0_size):
# get start and end of f0
if (f0 == 0).all():
return np.zeros((f0_size,))
start_f0 = f0[f0 != 0][0]
end_f0 = f0[f0 != 0][-1]
# padding start and end of f0 sequence
cf0 = f0
start_idx = np.where(cf0 == start_f0)[0][0]
end_idx = np.where(cf0 == end_f0)[0][-1]
cf0[:start_idx] = start_f0
cf0[end_idx:] = end_f0
# get non-zero frame index
nz_frames = np.where(cf0 != 0)[0]
# perform linear interpolation
f = interp1d(nz_frames, cf0[nz_frames], bounds_error=False, fill_value=0.0)
cf0_ = f(np.arange(0, f0_size))
# print(cf0.shape, cf0_.shape, f0.shape, f0_size)
# print(cf0_)
return f(np.arange(0, f0_size))
def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False):
if torch.min(y) < -1.:
print('min value is ', torch.min(y))
if torch.max(y) > 1.:
print('max value is ', torch.max(y))
dtype_device = str(y.dtype) + '_' + str(y.device)
wnsize_dtype_device = str(win_size) + '_' + dtype_device
if wnsize_dtype_device not in hann_window:
hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device)
y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode='reflect')
y = y.squeeze(1)
spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device],
center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=True)
spec = torch.view_as_real(spec)
spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6)
return spec
def get_hparams_from_file(config_path):
with open(config_path, "r", encoding="utf-8") as f:
data = f.read()
config = json.loads(data)
hparams = HParams(**config)
return hparams
class HParams():
def __init__(self, **kwargs):
for k, v in kwargs.items():
if type(v) == dict:
v = HParams(**v)
self[k] = v
def keys(self):
return self.__dict__.keys()
def items(self):
return self.__dict__.items()
def values(self):
return self.__dict__.values()
def __len__(self):
return len(self.__dict__)
def __getitem__(self, key):
return getattr(self, key)
def __setitem__(self, key, value):
return setattr(self, key, value)
def __contains__(self, key):
return key in self.__dict__
def __repr__(self):
return self.__dict__.__repr__()
def load_checkpoint(checkpoint_path, model, optimizer=None):
assert os.path.isfile(checkpoint_path), f"No such file or directory: {checkpoint_path}"
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
iteration = checkpoint_dict['iteration']
learning_rate = checkpoint_dict['learning_rate']
if optimizer is not None:
optimizer.load_state_dict(checkpoint_dict['optimizer'])
saved_state_dict = {
**checkpoint_dict['pe'],
**checkpoint_dict['flow'],
**checkpoint_dict['text_enc'],
**checkpoint_dict['dec'],
**checkpoint_dict['emb_g']
}
if hasattr(model, 'module'):
state_dict = model.module.state_dict()
else:
state_dict = model.state_dict()
new_state_dict = {}
for k, v in state_dict.items():
try:
new_state_dict[k] = saved_state_dict[k]
except:
new_state_dict[k] = v
if hasattr(model, 'module'):
model.module.load_state_dict(new_state_dict)
else:
model.load_state_dict(new_state_dict)
return model, optimizer, learning_rate, iteration

2
trainer/F0/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
trainer/cF0/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
trainer/configs/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
trainer/dataset/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
trainer/logs/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

2
trainer/units/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore