This commit is contained in:
w-okada 2023-08-13 18:55:50 +09:00
parent 46ff05b04f
commit 9fe14f94e2

View File

@ -74,6 +74,66 @@ class Pipeline(object):
def setPitchExtractor(self, pitchExtractor: PitchExtractor): def setPitchExtractor(self, pitchExtractor: PitchExtractor):
self.pitchExtractor = pitchExtractor self.pitchExtractor = pitchExtractor
def extractPitch(self, audio_pad, if_f0, pitchf, f0_up_key, silence_front):
try:
if if_f0 == 1:
pitch, pitchf = self.pitchExtractor.extract(
audio_pad,
pitchf,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
# pitch = pitch[:p_len]
# pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
else:
pitch = None
pitchf = None
except IndexError as e: # NOQA
# print(e)
# import traceback
# traceback.print_exc()
raise NotEnoughDataExtimateF0()
return pitch, pitchf
def extractFeatures(self, feats, embOutputLayer, useFinalProj):
with autocast(enabled=self.isHalf):
try:
feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
if torch.isnan(feats).all():
raise DeviceCannotSupportHalfPrecisionException()
return feats
except RuntimeError as e:
if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException()
elif "same device" in e.__str__():
raise DeviceChangingException()
else:
raise e
def infer(self, feats, p_len, pitch, pitchf, sid, out_size):
try:
with torch.no_grad():
with autocast(enabled=self.isHalf):
audio1 = (
torch.clip(
self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
-1.0,
1.0,
)
* 32767.5
).data.to(dtype=torch.int16)
return audio1
except RuntimeError as e:
if "HALF" in e.__str__().upper():
print("HalfPresicion Error:", e)
raise HalfPrecisionChangingException()
else:
raise e
def exec( def exec(
self, self,
sid, sid,
@ -113,30 +173,6 @@ class Pipeline(object):
pitchf = pitchf if repeat == 0 else np.zeros(p_len) pitchf = pitchf if repeat == 0 else np.zeros(p_len)
out_size = out_size if repeat == 0 else None out_size = out_size if repeat == 0 else None
# ピッチ検出
try:
if if_f0 == 1:
pitch, pitchf = self.pitchExtractor.extract(
audio_pad,
pitchf,
f0_up_key,
self.sr,
self.window,
silence_front=silence_front,
)
# pitch = pitch[:p_len]
# pitchf = pitchf[:p_len]
pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
pitchf = torch.tensor(pitchf, device=self.device, dtype=torch.float).unsqueeze(0)
else:
pitch = None
pitchf = None
except IndexError as e: # NOQA
# print(e)
# import traceback
# traceback.print_exc()
raise NotEnoughDataExtimateF0()
# tensor型調整 # tensor型調整
feats = audio_pad feats = audio_pad
if feats.dim() == 2: # double channels if feats.dim() == 2: # double channels
@ -144,21 +180,27 @@ class Pipeline(object):
assert feats.dim() == 1, feats.dim() assert feats.dim() == 1, feats.dim()
feats = feats.view(1, -1) feats = feats.view(1, -1)
# ピッチ検出
with Timer("main-process", True) as t:
pitch, pitchf = self.extractPitch(audio_pad, if_f0, pitchf, f0_up_key, silence_front)
print(f"[Perform(Pit)] {t.secs}")
# embedding # embedding
with Timer("main-process", False) as te: with Timer("main-process", True) as t:
with autocast(enabled=self.isHalf): # with autocast(enabled=self.isHalf):
try: # try:
feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj) # feats = self.embedder.extractFeatures(feats, embOutputLayer, useFinalProj)
if torch.isnan(feats).all(): # if torch.isnan(feats).all():
raise DeviceCannotSupportHalfPrecisionException() # raise DeviceCannotSupportHalfPrecisionException()
except RuntimeError as e: # except RuntimeError as e:
if "HALF" in e.__str__().upper(): # if "HALF" in e.__str__().upper():
raise HalfPrecisionChangingException() # raise HalfPrecisionChangingException()
elif "same device" in e.__str__(): # elif "same device" in e.__str__():
raise DeviceChangingException() # raise DeviceChangingException()
else: # else:
raise e # raise e
# print(f"[Embedding] {te.secs}") feats = self.extractFeatures(feats, embOutputLayer, useFinalProj)
print(f"[Perform(Emb)] {t.secs}")
# Index - feature抽出 # Index - feature抽出
# if self.index is not None and self.feature is not None and index_rate != 0: # if self.index is not None and self.feature is not None and index_rate != 0:
@ -227,26 +269,11 @@ class Pipeline(object):
p_len = torch.tensor([feats_len], device=self.device).long() p_len = torch.tensor([feats_len], device=self.device).long()
# 推論実行 # 推論実行
try: with Timer("main-process", True) as t:
with torch.no_grad(): audio1 = self.infer(feats, p_len, pitch, pitchf, sid, out_size)
with autocast(enabled=self.isHalf): print(f"[Perform(Inf)] {t.secs}")
audio1 = (
torch.clip(
self.inferencer.infer(feats, p_len, pitch, pitchf, sid, out_size)[0][0, 0].to(dtype=torch.float32),
-1.0,
1.0,
)
* 32767.5
).data.to(dtype=torch.int16)
except RuntimeError as e:
if "HALF" in e.__str__().upper():
print("11", e)
raise HalfPrecisionChangingException()
else:
raise e
feats_buffer = feats.squeeze(0).detach().cpu() feats_buffer = feats.squeeze(0).detach().cpu()
if pitchf is not None: if pitchf is not None:
pitchf_buffer = pitchf.squeeze(0).detach().cpu() pitchf_buffer = pitchf.squeeze(0).detach().cpu()
else: else: