reactor

2023-08-13 18:55:50 +09:00 · 2023-08-13 18:55:50 +09:00 · 9fe14f94e2
commit 9fe14f94e2
parent 46ff05b04f
1 changed files with 83 additions and 56 deletions
--- a/server/voice_changer/RVC/pipeline/Pipeline.py
+++ b/server/voice_changer/RVC/pipeline/Pipeline.py
@ -74,6 +74,66 @@ class Pipeline(object):
    def setPitchExtractor(self, pitchExtractor: PitchExtractor):
        self.pitchExtractor = pitchExtractor

+    def extractPitch(self, audio_pad, if_f0, pitchf, f0_up_key, silence_front):
+        try:
+            if if_f0 == 1:
+                pitch, pitchf = self.pitchExtractor.extract(
+                    audio_pad,
+                    pitchf,
+                    f0_up_key,
+                    self.sr,
+                    self.window,
+                    silence_front=silence_front,
+                )
+                # pitch = pitch[:p_len]
+                # pitchf = pitchf[:p_len]
+                pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
+                pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
+            else:
+                pitch = None
+                pitchf = None
+        except IndexError as e:  # NOQA
+            # print(e)
+            # import traceback
+            # traceback.print_exc()
+            raise NotEnoughDataExtimateF0()
+        return pitch, pitchf
+
+    def extractFeatures(self, feats, embOutputLayer, useFinalProj):
+        with autocast(enabled=self.isHalf):
+            try:
+                feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
+                if torch.isnan(feats).all():
+                    raise DeviceCannotSupportHalfPrecisionException()
+                return feats
+            except RuntimeError as e:
+                if "HALF" in e.__str__().upper():
+                    raise HalfPrecisionChangingException()
+                elif "same device" in e.__str__():
+                    raise DeviceChangingException()
+                else:
+                    raise e
+                
+    def infer(self, feats, p_len, pitch, pitchf, sid, out_size):
+        try:
+            with torch.no_grad():
+                with autocast(enabled=self.isHalf):
+                    audio1 = (
+                        torch.clip(
+                            self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
+                            -1.0,
+                            1.0,
+                        )
+                        * 32767.5
+                    ).data.to(dtype=torch.int16)
+            return audio1
+        except RuntimeError as e:
+            if "HALF" in e.__str__().upper():
+                print("HalfPresicion Error:", e)
+                raise HalfPrecisionChangingException()
+            else:
+                raise e
+
    def exec(
        self,
        sid,
@ -113,30 +173,6 @@ class Pipeline(object):
            pitchf = pitchf if repeat == 0 else np.zeros(p_len)
            out_size = out_size if repeat == 0 else None

-            # ピッチ検出
-            try:
-                if if_f0 == 1:
-                    pitch, pitchf = self.pitchExtractor.extract(
-                        audio_pad,
-                        pitchf,
-                        f0_up_key,
-                        self.sr,
-                        self.window,
-                        silence_front=silence_front,
-                    )
-                    # pitch = pitch[:p_len]
-                    # pitchf = pitchf[:p_len]
-                    pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
-                    pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
-                else:
-                    pitch = None
-                    pitchf = None
-            except IndexError as e:  # NOQA
-                # print(e)
-                # import traceback
-                # traceback.print_exc()
-                raise NotEnoughDataExtimateF0()
-
            # tensor型調整
            feats = audio_pad
            if feats.dim() == 2:  # double channels
@ -144,21 +180,27 @@ class Pipeline(object):
            assert feats.dim() == 1, feats.dim()
            feats = feats.view(1, -1)

+            # ピッチ検出
+            with Timer("main-process", True) as t:
+                pitch, pitchf = self.extractPitch(audio_pad, if_f0, pitchf, f0_up_key, silence_front)
+            print(f"[Perform(Pit)] {t.secs}")
+
            # embedding
-            with Timer("main-process", False) as te:
-                with autocast(enabled=self.isHalf):
-                    try:
-                        feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
-                        if torch.isnan(feats).all():
-                            raise DeviceCannotSupportHalfPrecisionException()
-                    except RuntimeError as e:
-                        if "HALF" in e.__str__().upper():
-                            raise HalfPrecisionChangingException()
-                        elif "same device" in e.__str__():
-                            raise DeviceChangingException()
-                        else:
-                            raise e
-            # print(f"[Embedding] {te.secs}")
+            with Timer("main-process", True) as t:
+                # with autocast(enabled=self.isHalf):
+                #     try:
+                #         feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
+                #         if torch.isnan(feats).all():
+                #             raise DeviceCannotSupportHalfPrecisionException()
+                #     except RuntimeError as e:
+                #         if "HALF" in e.__str__().upper():
+                #             raise HalfPrecisionChangingException()
+                #         elif "same device" in e.__str__():
+                #             raise DeviceChangingException()
+                #         else:
+                #             raise e
+                feats = self.extractFeatures(feats, embOutputLayer, useFinalProj)
+            print(f"[Perform(Emb)] {t.secs}")

            # Index - feature抽出
            # if self.index is not None and self.feature is not None and index_rate != 0:
@ -227,26 +269,11 @@ class Pipeline(object):
            p_len = torch.tensor([feats_len], device=self.device).long()

            # 推論実行
-            try:
-                with torch.no_grad():
-                    with autocast(enabled=self.isHalf):
-                        audio1 = (
-                            torch.clip(
-                                self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
-                                -1.0,
-                                1.0,
-                            )
-                            * 32767.5
-                        ).data.to(dtype=torch.int16)
-            except RuntimeError as e:
-                if "HALF" in e.__str__().upper():
-                    print("11", e)
-                    raise HalfPrecisionChangingException()
-                else:
-                    raise e
+            with Timer("main-process", True) as t:
+                audio1 = self.infer(feats, p_len, pitch, pitchf, sid, out_size)
+            print(f"[Perform(Inf)] {t.secs}")

            feats_buffer = feats.squeeze(0).detach().cpu()
-
            if pitchf is not None:
                pitchf_buffer = pitchf.squeeze(0).detach().cpu()
            else: