reactor

2023-08-13 18:55:50 +09:00 · 2023-08-13 18:55:50 +09:00 · 9fe14f94e2
commit 9fe14f94e2
parent 46ff05b04f
1 changed files with 83 additions and 56 deletions
--- a/server/voice_changer/RVC/pipeline/Pipeline.py
+++ b/server/voice_changer/RVC/pipeline/Pipeline.py
@ -74,6 +74,66 @@ class Pipeline(object):
    def setPitchExtractor(self, pitchExtractor: PitchExtractor):
        self.pitchExtractor = pitchExtractor
    def extractPitch(self, audio_pad, if_f0, pitchf, f0_up_key, silence_front):
        try:
            if if_f0 == 1:
                pitch, pitchf = self.pitchExtractor.extract(
                    audio_pad,
                    pitchf,
                    f0_up_key,
                    self.sr,
                    self.window,
                    silence_front=silence_front,
                )
                # pitch = pitch[:p_len]
                # pitchf = pitchf[:p_len]
                pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
                pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
            else:
                pitch = None
                pitchf = None
        except IndexError as e:  # NOQA
            # print(e)
            # import traceback
            # traceback.print_exc()
            raise NotEnoughDataExtimateF0()
        return pitch, pitchf
    def extractFeatures(self, feats, embOutputLayer, useFinalProj):
        with autocast(enabled=self.isHalf):
            try:
                feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
                if torch.isnan(feats).all():
                    raise DeviceCannotSupportHalfPrecisionException()
                return feats
            except RuntimeError as e:
                if "HALF" in e.__str__().upper():
                    raise HalfPrecisionChangingException()
                elif "same device" in e.__str__():
                    raise DeviceChangingException()
                else:
                    raise e
    def infer(self, feats, p_len, pitch, pitchf, sid, out_size):
        try:
            with torch.no_grad():
                with autocast(enabled=self.isHalf):
                    audio1 = (
                        torch.clip(
                            self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
                            -1.0,
                            1.0,
                        )
                        * 32767.5
                    ).data.to(dtype=torch.int16)
            return audio1
        except RuntimeError as e:
            if "HALF" in e.__str__().upper():
                print("HalfPresicion Error:", e)
                raise HalfPrecisionChangingException()
            else:
                raise e
    def exec(
        self,
        sid,
@ -113,30 +173,6 @@ class Pipeline(object):
            pitchf = pitchf if repeat == 0 else np.zeros(p_len)
            out_size = out_size if repeat == 0 else None
            # ピッチ検出
            try:
                if if_f0 == 1:
                    pitch, pitchf = self.pitchExtractor.extract(
                        audio_pad,
                        pitchf,
                        f0_up_key,
                        self.sr,
                        self.window,
                        silence_front=silence_front,
                    )
                    # pitch = pitch[:p_len]
                    # pitchf = pitchf[:p_len]
                    pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
                    pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
                else:
                    pitch = None
                    pitchf = None
            except IndexError as e:  # NOQA
                # print(e)
                # import traceback
                # traceback.print_exc()
                raise NotEnoughDataExtimateF0()
            # tensor型調整
            feats = audio_pad
            if feats.dim() == 2:  # double channels
@ -144,21 +180,27 @@ class Pipeline(object):
            assert feats.dim() == 1, feats.dim()
            feats = feats.view(1, -1)
            # ピッチ検出
            with Timer("main-process", True) as t:
                pitch, pitchf = self.extractPitch(audio_pad, if_f0, pitchf, f0_up_key, silence_front)
            print(f"[Perform(Pit)] {t.secs}")
            # embedding
-            with Timer("main-process", False) as te:
+            with Timer("main-process", True) as t:
-                with autocast(enabled=self.isHalf):
+                # with autocast(enabled=self.isHalf):
-                    try:
+                #     try:
-                        feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
+                #         feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
-                        if torch.isnan(feats).all():
+                #         if torch.isnan(feats).all():
-                            raise DeviceCannotSupportHalfPrecisionException()
+                #             raise DeviceCannotSupportHalfPrecisionException()
-                    except RuntimeError as e:
+                #     except RuntimeError as e:
-                        if "HALF" in e.__str__().upper():
+                #         if "HALF" in e.__str__().upper():
-                            raise HalfPrecisionChangingException()
+                #             raise HalfPrecisionChangingException()
-                        elif "same device" in e.__str__():
+                #         elif "same device" in e.__str__():
-                            raise DeviceChangingException()
+                #             raise DeviceChangingException()
-                        else:
+                #         else:
-                            raise e
+                #             raise e
-            # print(f"[Embedding] {te.secs}")
+                feats = self.extractFeatures(feats, embOutputLayer, useFinalProj)
            print(f"[Perform(Emb)] {t.secs}")
            # Index - feature抽出
            # if self.index is not None and self.feature is not None and index_rate != 0:
@ -227,26 +269,11 @@ class Pipeline(object):
            p_len = torch.tensor([feats_len], device=self.device).long()
            # 推論実行
-            try:
+            with Timer("main-process", True) as t:
-                with torch.no_grad():
+                audio1 = self.infer(feats, p_len, pitch, pitchf, sid, out_size)
-                    with autocast(enabled=self.isHalf):
+            print(f"[Perform(Inf)] {t.secs}")
                        audio1 = (
                            torch.clip(
                                self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
                                -1.0,
                                1.0,
                            )
                            * 32767.5
                        ).data.to(dtype=torch.int16)
            except RuntimeError as e:
                if "HALF" in e.__str__().upper():
                    print("11", e)
                    raise HalfPrecisionChangingException()
                else:
                    raise e
            feats_buffer = feats.squeeze(0).detach().cpu()
            if pitchf is not None:
                pitchf_buffer = pitchf.squeeze(0).detach().cpu()
            else: