support protect

2023-06-01 13:28:45 +09:00 · 2023-06-01 13:28:45 +09:00 · 8685f9e5bf
commit 8685f9e5bf
parent b53fd5c123 ac7c751439
24 changed files with 2209 additions and 3644 deletions
--- a/client/demo/dist/assets/gui_settings/RVC.json
+++ b/client/demo/dist/assets/gui_settings/RVC.json
@ -175,11 +175,11 @@
                "options": {}
            },
            {
-                "name": "rvcQuality",
+                "name": "silenceFront",
                "options": {}
            },
            {
-                "name": "silenceFront",
+                "name": "protect",
                "options": {}
            }
        ]
--- a/client/demo/dist/index.html
+++ b/client/demo/dist/index.html
@ -1,10 +1 @@
-<!DOCTYPE html>
-<html style="width: 100%; height: 100%; overflow: hidden">
-    <head>
-        <meta charset="utf-8" />
-        <title>Voice Changer Client Demo</title>
-    <script defer src="index.js"></script></head>
-    <body style="width: 100%; height: 100%; margin: 0px">
-        <div id="app" style="width: 100%; height: 100%"></div>
-    </body>
-</html>
+<!doctype html><html style="width:100%;height:100%;overflow:hidden"><head><meta charset="utf-8"/><title>Voice Changer Client Demo</title><script defer="defer" src="index.js"></script></head><body style="width:100%;height:100%;margin:0"><div id="app" style="width:100%;height:100%"></div></body></html>
--- a/client/demo/dist/index.js
+++ b/client/demo/dist/index.js
--- a/client/demo/dist/index.js.LICENSE.txt
+++ b/client/demo/dist/index.js.LICENSE.txt
@ -0,0 +1,31 @@
+/*! regenerator-runtime -- Copyright (c) 2014-present, Facebook, Inc. -- license (MIT): https://github.com/facebook/regenerator/blob/main/LICENSE */
+
+/**
+ * @license React
+ * react-dom.production.min.js
+ *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * @license React
+ * react.production.min.js
+ *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+/**
+ * @license React
+ * scheduler.production.min.js
+ *
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
--- a/client/demo/package-lock.json
+++ b/client/demo/package-lock.json
--- a/client/demo/package.json
+++ b/client/demo/package.json
@ -19,18 +19,18 @@
    "author": "wataru.okada@flect.co.jp",
    "license": "ISC",
    "devDependencies": {
-        "@babel/core": "^7.22.0",
-        "@babel/plugin-transform-runtime": "^7.22.0",
-        "@babel/preset-env": "^7.22.0",
-        "@babel/preset-react": "^7.22.0",
+        "@babel/core": "^7.22.1",
+        "@babel/plugin-transform-runtime": "^7.22.4",
+        "@babel/preset-env": "^7.22.4",
+        "@babel/preset-react": "^7.22.3",
        "@babel/preset-typescript": "^7.21.5",
-        "@types/node": "^20.2.4",
+        "@types/node": "^20.2.5",
        "@types/react": "^18.2.7",
        "@types/react-dom": "^18.2.4",
        "autoprefixer": "^10.4.14",
        "babel-loader": "^9.1.2",
        "copy-webpack-plugin": "^11.0.0",
-        "css-loader": "^6.7.4",
+        "css-loader": "^6.8.1",
        "eslint": "^8.41.0",
        "eslint-config-prettier": "^8.8.0",
        "eslint-plugin-prettier": "^4.2.1",
@ -39,7 +39,7 @@
        "html-loader": "^4.2.0",
        "html-webpack-plugin": "^5.5.1",
        "npm-run-all": "^4.1.5",
-        "postcss-loader": "^7.3.1",
+        "postcss-loader": "^7.3.2",
        "postcss-nested": "^6.0.1",
        "prettier": "^2.8.8",
        "rimraf": "^5.0.1",
@ -47,12 +47,12 @@
        "ts-loader": "^9.4.3",
        "tsconfig-paths": "^4.2.0",
        "typescript": "^5.0.4",
-        "webpack": "^5.84.1",
+        "webpack": "^5.85.0",
        "webpack-cli": "^5.1.1",
        "webpack-dev-server": "^4.15.0"
    },
    "dependencies": {
-        "@dannadori/voice-changer-client-js": "^1.0.135",
+        "@dannadori/voice-changer-client-js": "^1.0.138",
        "@fortawesome/fontawesome-svg-core": "^6.4.0",
        "@fortawesome/free-brands-svg-icons": "^6.4.0",
        "@fortawesome/free-regular-svg-icons": "^6.4.0",
--- a/client/demo/public/assets/gui_settings/RVC.json
+++ b/client/demo/public/assets/gui_settings/RVC.json
@ -175,11 +175,11 @@
                "options": {}
            },
            {
-                "name": "rvcQuality",
+                "name": "silenceFront",
                "options": {}
            },
            {
-                "name": "silenceFront",
+                "name": "protect",
                "options": {}
            }
        ]
--- a/client/demo/src/components/demo/002_ComponentGenerator.tsx
+++ b/client/demo/src/components/demo/002_ComponentGenerator.tsx
@ -32,7 +32,6 @@ import { CrossFadeOffsetRateRow, CrossFadeOffsetRateRowProps } from "./component
 import { CrossFadeEndRateRow, CrossFadeEndRateRowProps } from "./components/807_CrossFadeEndRateRow"
 import { DownSamplingModeRow, DownSamplingModeRowProps } from "./components/808_DownSamplingModeRow"
 import { TrancateNumTresholdRow, TrancateNumTresholdRowProps } from "./components/809_TrancateNumTresholdRow"
-import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow"
 import { RVCQualityRow, RVCQualityRowProps } from "./components/810_RVCQualityRow"
 import { ModelSamplingRateRow, ModelSamplingRateRowProps } from "./components/303_ModelSamplingRateRow"
 import { DstIdRow2, DstIdRow2Props } from "./components/602v2_DstIdRow2"
@ -56,6 +55,8 @@ import { DiffMethodRow, DiffMethodRowProps } from "./components/613_DiffMethodRo
 import { ServerOpertationRow, ServerOpertationRowProps } from "./components/207_ServerOpertationRow"
 import { SampleModelSelectRow, SampleModelSelectRowProps } from "./components/301-j_SampleModelSelectRow"
 import { SampleDownloadControlRow, SampleDownloadControlRowProps } from "./components/301-k_SampleDownloadControl"
+import { IndexRatioRow, IndexRatioRowProps } from "./components/609_IndexRatioRow copy"
+import { ProtectRow, ProtectRowProps } from "./components/610_ProtectRow"

 export const catalog: { [key: string]: (props: any) => JSX.Element } = {}

@ -122,6 +123,7 @@ const initialize = () => {
    addToCatalog("noiseScale", (props: NoiseScaleRowProps) => { return <NoiseScaleRow {...props} /> })
    addToCatalog("silentThreshold", (props: SilentThresholdRowProps) => { return <SilentThresholdRow {...props} /> })
    addToCatalog("indexRatio", (props: IndexRatioRowProps) => { return <IndexRatioRow {...props} /> })
+    addToCatalog("protect", (props: ProtectRowProps) => { return <ProtectRow {...props} /> })
    addToCatalog("diffEnabler", (props: DiffEnablerRowProps) => { return <DiffEnablerRow {...props} /> })
    addToCatalog("diffSetting", (props: DiffSettingRowProps) => { return <DiffSettingRow {...props} /> })
    addToCatalog("diffMethod", (props: DiffMethodRowProps) => { return <DiffMethodRow {...props} /> })
--- a/client/demo/src/components/demo/903_StartingNoticeDialog.tsx
+++ b/client/demo/src/components/demo/903_StartingNoticeDialog.tsx
@ -77,7 +77,7 @@ export const StartingNoticeDialog = () => {
        const content = (
            <div className="body-row">
                {lang != "ja" || edition.indexOf("onnxdirectML-cuda") >= 0 ? donationMessage : <></>}
-                {lang != "ja" || edition.indexOf("onnxdirectML-cuda") >= 0 ? directMLMessage : <></>}
+                {edition.indexOf("onnxdirectML-cuda") >= 0 ? directMLMessage : <></>}
                {clickToStartMessage}
            </div>
        )
--- a/client/demo/src/components/demo/components/204_ModelSwitchRow.tsx
+++ b/client/demo/src/components/demo/components/204_ModelSwitchRow.tsx
@ -24,7 +24,10 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
        }


-        const options = appState.serverSetting.serverSetting.modelSlots.map((x, index) => {
+        const modelSlots = appState.serverSetting.serverSetting.modelSlots
+        let options: React.JSX.Element[] = []
+        if (modelSlots) {
+            options = modelSlots.map((x, index) => {
                let filename = ""
                if (x.modelFile && x.modelFile.length > 0) {
                    filename = x.modelFile.replace(/^.*[\\\/]/, '')
@ -53,16 +56,18 @@ export const ModelSwitchRow = (_props: ModelSwitchRowProps) => {
                const tuning = `t:${x.defaultTune}`
                const useIndex = x.indexFile != null ? `i:true` : `i:false`
                const defaultIndexRatio = `ir:${x.defaultIndexRatio}`
-            const subMetadata = `(${tuning},${useIndex},${defaultIndexRatio})`
+                const defaultProtect = `p:${x.defaultProtect}`
+                const subMetadata = `(${tuning},${useIndex},${defaultIndexRatio},${defaultProtect})`
                const displayName = `${metadata} ${x.name || filename}  ${subMetadata}`


                return (
                    <option key={index} value={index}>{displayName}</option>
                )
-        }).filter(x => { return x != null })
+            }).filter(x => { return x != null }) as React.JSX.Element[]
+        }

-        const selectedTermOfUseUrl = appState.serverSetting.serverSetting.modelSlots[slot]?.termsOfUseUrl || null
+        const selectedTermOfUseUrl = modelSlots ? modelSlots[slot]?.termsOfUseUrl || null : null
        const selectedTermOfUseUrlLink = selectedTermOfUseUrl ? <a href={selectedTermOfUseUrl} target="_blank" rel="noopener noreferrer" className="body-item-text-small">[terms of use]</a> : <></>

        return (
--- a/client/demo/src/components/demo/components/609_IndexRatioRow.tsx
+++ b/client/demo/src/components/demo/components/609_IndexRatioRow.tsx
--- a/client/demo/src/components/demo/components/610_ProtectRow.tsx
+++ b/client/demo/src/components/demo/components/610_ProtectRow.tsx
@ -0,0 +1,30 @@
+import React, { useMemo } from "react"
+import { useAppState } from "../../../001_provider/001_AppStateProvider"
+
+export type ProtectRowProps = {
+}
+
+export const ProtectRow = (_props: ProtectRowProps) => {
+    const appState = useAppState()
+
+    const clusterRatioRow = useMemo(() => {
+        return (
+            <div className="body-row split-3-3-4 left-padding-1 guided">
+                <div className="body-item-title left-padding-1 ">protect</div>
+                <div>
+                    <input type="range" className="body-item-input-slider" min="0" max="0.5" step="0.1" value={appState.serverSetting.serverSetting.protect || 0} onChange={(e) => {
+                        appState.serverSetting.updateServerSettings({ ...appState.serverSetting.serverSetting, protect: Number(e.target.value) })
+                    }}></input>
+                    <span className="body-item-input-slider-val">{appState.serverSetting.serverSetting.protect}</span>
+                </div>
+                <div className="body-button-container">
+                </div>
+            </div>
+        )
+    }, [
+        appState.serverSetting.serverSetting,
+        appState.serverSetting.updateServerSettings
+    ])
+
+    return clusterRatioRow
+}
--- a/client/demo/src/components/demo/components/a01_MergeLab.Row.tsx
+++ b/client/demo/src/components/demo/components/a01_MergeLab.Row.tsx
@ -15,6 +15,9 @@ export const MergeLabRow = (_props: MergeLabRowProps) => {

    // スロットが変更されたときの初期化処理
    const newSlotChangeKey = useMemo(() => {
+        if (!appState.serverSetting.serverSetting.modelSlots) {
+            return ""
+        }
        return appState.serverSetting.serverSetting.modelSlots.reduce((prev, cur) => {
            return prev + "_" + cur.modelFile
        }, "")
@ -50,6 +53,7 @@ export const MergeLabRow = (_props: MergeLabRowProps) => {
                command: "mix",
                defaultTune: defaultTune,
                defaultIndexRatio: 1,
+                defaultProtect: 0.5,
                files: mergeElements
            })
        }
--- a/client/lib/package-lock.json
+++ b/client/lib/package-lock.json
--- a/client/lib/package.json
+++ b/client/lib/package.json
@ -1,6 +1,6 @@
 {
  "name": "@dannadori/voice-changer-client-js",
-  "version": "1.0.136",
+  "version": "1.0.138",
  "description": "",
  "main": "dist/index.js",
  "directories": {
@ -27,7 +27,7 @@
  "license": "ISC",
  "devDependencies": {
    "@types/audioworklet": "^0.0.46",
-    "@types/node": "^20.2.4",
+    "@types/node": "^20.2.5",
    "@types/react": "18.2.7",
    "@types/react-dom": "18.2.4",
    "eslint": "^8.41.0",
@ -41,17 +41,17 @@
    "rimraf": "^5.0.1",
    "ts-loader": "^9.4.3",
    "typescript": "^5.0.4",
-    "webpack": "^5.84.1",
+    "webpack": "^5.85.0",
    "webpack-cli": "^5.1.1",
    "webpack-dev-server": "^4.15.0"
  },
  "dependencies": {
    "@types/readable-stream": "^2.3.15",
-    "amazon-chime-sdk-js": "^3.14.0",
+    "amazon-chime-sdk-js": "^3.14.1",
    "buffer": "^6.0.3",
    "localforage": "^1.10.0",
    "react": "^18.2.0",
    "react-dom": "^18.2.0",
-    "socket.io-client": "^4.6.1"
+    "socket.io-client": "^4.6.2"
  }
 }
--- a/client/lib/src/const.ts
+++ b/client/lib/src/const.ts
@ -118,6 +118,7 @@ export const ServerSettingKey = {
    "clusterInferRatio": "clusterInferRatio",

    "indexRatio": "indexRatio",
+    "protect": "protect",
    "rvcQuality": "rvcQuality",
    "modelSamplingRate": "modelSamplingRate",
    "silenceFront": "silenceFront",
@ -176,6 +177,7 @@ export type VoiceChangerServerSetting = {
    clusterInferRatio: number // so-vits-svc

    indexRatio: number // RVC
+    protect: number // RVC
    rvcQuality: number // 0:low, 1:high
    silenceFront: number // 0:off, 1:on
    modelSamplingRate: ModelSamplingRate // 32000,40000,48000
@ -202,6 +204,7 @@ type ModelSlot = {

    defaultTune: number,
    defaultIndexRatio: number,
+    defaultProtect: number,

    modelType: ModelType,
    embChannels: number,
@ -294,6 +297,7 @@ export const DefaultServerSetting: ServerInfo = {
    clusterInferRatio: 0,

    indexRatio: 0,
+    protect: 0.5,
    rvcQuality: 0,
    modelSamplingRate: 48000,
    silenceFront: 1,
@ -523,5 +527,6 @@ export type MergeModelRequest = {
    command: "mix",
    defaultTune: number,
    defaultIndexRatio: number,
+    defaultProtect: number,
    files: MergeElement[]
 }
--- a/client/lib/src/hooks/useServerSetting.ts
+++ b/client/lib/src/hooks/useServerSetting.ts
@ -15,6 +15,7 @@ export type FileUploadSetting = {
    uploaded: boolean
    defaultTune: number
    defaultIndexRatio: number
+    defaultProtect: number
    framework: Framework
    params: string

@ -48,6 +49,7 @@ const InitialFileUploadSetting: FileUploadSetting = {
    uploaded: false,
    defaultTune: 0,
    defaultIndexRatio: 1,
+    defaultProtect: 0.5,
    framework: Framework.PyTorch,
    params: "{}",

@ -372,6 +374,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
            const params = JSON.stringify({
                defaultTune: fileUploadSetting.defaultTune || 0,
                defaultIndexRatio: fileUploadSetting.defaultIndexRatio || 1,
+                defaultProtect: fileUploadSetting.defaultProtect || 0.5,
                sampleId: fileUploadSetting.isSampleMode ? fileUploadSetting.sampleId || "" : "",
                rvcIndexDownload: fileUploadSetting.rvcIndexDownload || false,
                files: fileUploadSetting.isSampleMode ? {} : {
@ -432,6 +435,7 @@ export const useServerSetting = (props: UseServerSettingProps): ServerSettingSta
                uploaded: false, // キャッシュから読み込まれるときには、まだuploadされていないから。
                defaultTune: fileUploadSetting.defaultTune,
                defaultIndexRatio: fileUploadSetting.defaultIndexRatio,
+                defaultProtect: fileUploadSetting.defaultProtect,
                framework: fileUploadSetting.framework,
                params: fileUploadSetting.params,

--- a/server/voice_changer/RVC/ModelSlot.py
+++ b/server/voice_changer/RVC/ModelSlot.py
@ -9,6 +9,7 @@ class ModelSlot:
    indexFile: str = ""
    defaultTune: int = 0
    defaultIndexRatio: int = 1
+    defaultProtect: float = 0.5
    isONNX: bool = False
    modelType: str = EnumInferenceTypes.pyTorchRVC.value
    samplingRate: int = -1
--- a/server/voice_changer/RVC/RVC.py
+++ b/server/voice_changer/RVC/RVC.py
@ -139,6 +139,7 @@ class RVC:

        slotInfo.defaultTune = params["defaultTune"]
        slotInfo.defaultIndexRatio = params["defaultIndexRatio"]
+        slotInfo.defaultProtect = params["defaultProtect"]
        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")

        if slotInfo.isONNX:
@ -239,6 +240,7 @@ class RVC:
        # その他の設定
        self.next_trans = modelSlot.defaultTune
        self.next_index_ratio = modelSlot.defaultIndexRatio
+        self.next_protect = modelSlot.defaultProtect
        self.next_samplingRate = modelSlot.samplingRate
        self.next_framework = "ONNX" if modelSlot.isONNX else "PyTorch"
        # self.needSwitch = True
@ -251,6 +253,7 @@ class RVC:
        self.pipeline = self.next_pipeline
        self.settings.tran = self.next_trans
        self.settings.indexRatio = self.next_index_ratio
+        self.settings.protect = self.next_protect
        self.settings.modelSamplingRate = self.next_samplingRate
        self.settings.framework = self.next_framework

@ -346,6 +349,7 @@ class RVC:
        sid = 0
        f0_up_key = self.settings.tran
        index_rate = self.settings.indexRatio
+        protect = self.settings.protect
        if_f0 = 1 if self.settings.modelSlots[self.currentSlot].f0 else 0
        embOutputLayer = self.settings.modelSlots[self.currentSlot].embOutputLayer
        useFinalProj = self.settings.modelSlots[self.currentSlot].useFinalProj
@ -360,6 +364,7 @@ class RVC:
            embOutputLayer,
            useFinalProj,
            repeat,
+            protect,
        )

        result = audio_out.detach().cpu().numpy() * np.sqrt(vol)
@ -421,6 +426,7 @@ class RVC:
        params = {
            "defaultTune": req.defaultTune,
            "defaultIndexRatio": req.defaultIndexRatio,
+            "defaultProtect": req.defaultProtect,
            "sampleId": "",
            "files": {"rvcModel": storeFile},
        }
@ -442,6 +448,7 @@ class RVC:
        )
        params["defaultTune"] = self.settings.tran
        params["defaultIndexRatio"] = self.settings.indexRatio
+        params["defaultProtect"] = self.settings.protect

        json.dump(params, open(os.path.join(slotDir, "params.json"), "w"))
        self.loadSlots()
--- a/server/voice_changer/RVC/RVCSettings.py
+++ b/server/voice_changer/RVC/RVCSettings.py
@ -24,6 +24,7 @@ class RVCSettings:
    sampleModels: list[RVCModelSample] = field(default_factory=lambda: [])

    indexRatio: float = 0
+    protect: float = 0.5
    rvcQuality: int = 0
    silenceFront: int = 1  # 0:off, 1:on
    modelSamplingRate: int = 48000
@ -45,5 +46,5 @@ class RVCSettings:
        "isHalf",
        "enableDirectML",
    ]
-    floatData = ["silentThreshold", "indexRatio"]
+    floatData = ["silentThreshold", "indexRatio", "protect"]
    strData = ["framework", "f0Detector"]
--- a/server/voice_changer/RVC/SampleDownloader.py
+++ b/server/voice_changer/RVC/SampleDownloader.py
@ -82,9 +82,9 @@ def downloadInitialSampleModels(sampleJsons: list[str], model_dir: str):
        slotInfo.description = sample.description
        slotInfo.name = sample.name
        slotInfo.termsOfUseUrl = sample.termsOfUseUrl
-
        slotInfo.defaultTune = 0
        slotInfo.defaultIndexRatio = 1
+        slotInfo.defaultProtect = 0.5
        slotInfo.isONNX = slotInfo.modelFile.endswith(".onnx")

        # この時点ではまだファイルはダウンロードされていない
--- a/server/voice_changer/RVC/modelMerger/MergeModelRequest.py
+++ b/server/voice_changer/RVC/modelMerger/MergeModelRequest.py
@ -17,4 +17,5 @@ class MergeModelRequest:
    slot: int = -1
    defaultTune: int = 0
    defaultIndexRatio: int = 1
+    defaultProtect: float = 0.5
    files: List[MergeFile] = field(default_factory=lambda: [])
--- a/server/voice_changer/RVC/pipeline/Pipeline.py
+++ b/server/voice_changer/RVC/pipeline/Pipeline.py
@ -83,7 +83,9 @@ class Pipeline(object):
        embOutputLayer,
        useFinalProj,
        repeat,
+        protect=0.5,
    ):
+        search_index = self.index is not None and self.big_npy is not None and index_rate != 0
        self.t_pad = self.sr * repeat
        self.t_pad_tgt = self.targetSR * repeat

@ -136,10 +138,12 @@ class Pipeline(object):
                raise DeviceChangingException()
            else:
                raise e
+        if protect < 0.5 and search_index:
+             feats0 = feats.clone()   

        # Index - feature抽出
        # if self.index is not None and self.feature is not None and index_rate != 0:
-        if self.index is not None and self.big_npy is not None and index_rate != 0:
+        if search_index:
            npy = feats[0].cpu().numpy()
            if self.isHalf is True:
                npy = npy.astype("float32")
@ -165,7 +169,10 @@ class Pipeline(object):
                + (1 - index_rate) * feats
            )
        feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
-
+        if protect < 0.5 and search_index:
+            feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
+                0, 2, 1
+            )
        # ピッチサイズ調整
        p_len = audio_pad.shape[0] // self.window
        if feats.shape[1] < p_len:
@ -173,6 +180,15 @@ class Pipeline(object):
            if pitch is not None and pitchf is not None:
                pitch = pitch[:, :p_len]
                pitchf = pitchf[:, :p_len]
+
+        # pitchの推定が上手くいかない(pitchf=0)場合、検索前の特徴を混ぜる
+        if protect < 0.5 and search_index:
+            pitchff = pitchf.clone()
+            pitchff[pitchf > 0] = 1
+            pitchff[pitchf < 1] = protect
+            pitchff = pitchff.unsqueeze(-1)
+            feats = feats * pitchff + feats0 * (1 - pitchff)
+            feats = feats.to(feats0.dtype)
        p_len = torch.tensor([p_len], device=self.device).long()

        # 推論実行
--- a/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py
+++ b/server/voice_changer/RVC/pitchExtractor/CrepePitchExtractor.py
@ -29,7 +29,7 @@ class CrepePitchExtractor(PitchExtractor):
        f0_mel_min = 1127 * np.log(1 + f0_min / 700)
        f0_mel_max = 1127 * np.log(1 + f0_max / 700)

-        f0 = torchcrepe.predict(
+        f0, pd = torchcrepe.predict(
            audio.unsqueeze(0),
            sr,
            hop_length=window,
@ -40,8 +40,11 @@ class CrepePitchExtractor(PitchExtractor):
            batch_size=256,
            decoder=torchcrepe.decode.weighted_argmax,
            device=self.device,
+            return_periodicity=True,
        )
-        f0 = torchcrepe.filter.median(f0, 3)
+        f0 = torchcrepe.filter.median(f0, 3)  # 本家だとmeanですが、harvestに合わせmedianフィルタ
+        pd = torchcrepe.filter.median(pd, 3)
+        f0[pd < 0.1] = 0
        f0 = f0.squeeze()

        f0 = torch.nn.functional.pad(