WIP: refactor
This commit is contained in:
parent
4e0d2a24a3
commit
e780af7fc2
@ -17,7 +17,6 @@ class VC(object):
|
|||||||
self.sr = 16000 # hubert输入采样率
|
self.sr = 16000 # hubert输入采样率
|
||||||
self.window = 160 # 每帧点数
|
self.window = 160 # 每帧点数
|
||||||
self.t_pad = self.sr * x_pad # 每条前后pad时间
|
self.t_pad = self.sr * x_pad # 每条前后pad时间
|
||||||
print("INITIALIZE", self.sr, x_pad, self.t_pad)
|
|
||||||
self.t_pad_tgt = tgt_sr * x_pad
|
self.t_pad_tgt = tgt_sr * x_pad
|
||||||
self.t_pad2 = self.t_pad * 2
|
self.t_pad2 = self.t_pad * 2
|
||||||
self.t_query = self.sr * x_query # 查询切点前后查询时间
|
self.t_query = self.sr * x_query # 查询切点前后查询时间
|
||||||
@ -28,10 +27,8 @@ class VC(object):
|
|||||||
|
|
||||||
def get_f0(self, x, p_len, f0_up_key, f0_method, inp_f0=None):
|
def get_f0(self, x, p_len, f0_up_key, f0_method, inp_f0=None):
|
||||||
time_step = self.window / self.sr * 1000
|
time_step = self.window / self.sr * 1000
|
||||||
# f0_min = 50
|
f0_min = 50
|
||||||
# f0_max = 1100
|
f0_max = 1100
|
||||||
f0_min = 70
|
|
||||||
f0_max = 1000
|
|
||||||
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
|
f0_mel_min = 1127 * np.log(1 + f0_min / 700)
|
||||||
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
|
f0_mel_max = 1127 * np.log(1 + f0_max / 700)
|
||||||
if (f0_method == "pm"):
|
if (f0_method == "pm"):
|
||||||
@ -69,7 +66,6 @@ class VC(object):
|
|||||||
return f0_coarse, f0bak # 1-0
|
return f0_coarse, f0bak # 1-0
|
||||||
|
|
||||||
def vc(self, model, net_g, sid, audio0, pitch, pitchf, times, index, big_npy, index_rate): # ,file_index,file_big_npy
|
def vc(self, model, net_g, sid, audio0, pitch, pitchf, times, index, big_npy, index_rate): # ,file_index,file_big_npy
|
||||||
print("vc audio len 1,", len(audio0))
|
|
||||||
feats = torch.from_numpy(audio0)
|
feats = torch.from_numpy(audio0)
|
||||||
if (self.is_half == True):
|
if (self.is_half == True):
|
||||||
feats = feats.half()
|
feats = feats.half()
|
||||||
@ -142,13 +138,6 @@ class VC(object):
|
|||||||
audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode='reflect')
|
audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode='reflect')
|
||||||
print("audio_pad len 1,", len(audio_pad))
|
print("audio_pad len 1,", len(audio_pad))
|
||||||
opt_ts = []
|
opt_ts = []
|
||||||
# if (audio_pad.shape[0] > self.t_max):
|
|
||||||
# audio_sum = np.zeros_like(audio)
|
|
||||||
# for i in range(self.window):
|
|
||||||
# audio_sum += audio_pad[i:i - self.window]
|
|
||||||
# for t in range(self.t_center, audio.shape[0], self.t_center):
|
|
||||||
# opt_ts.append(t - self.t_query + np.where(np.abs(audio_sum[t - self.t_query:t + self.t_query])
|
|
||||||
# == np.abs(audio_sum[t - self.t_query:t + self.t_query]).min())[0][0])
|
|
||||||
|
|
||||||
print("audio_pad len 2,", len(audio_pad), opt_ts)
|
print("audio_pad len 2,", len(audio_pad), opt_ts)
|
||||||
|
|
||||||
@ -159,19 +148,7 @@ class VC(object):
|
|||||||
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode='reflect')
|
audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode='reflect')
|
||||||
p_len = audio_pad.shape[0] // self.window
|
p_len = audio_pad.shape[0] // self.window
|
||||||
inp_f0 = None
|
inp_f0 = None
|
||||||
print("audio_pad len 3,", len(audio_pad), self.t_pad, len(audio))
|
|
||||||
|
|
||||||
# if (hasattr(f0_file, 'name') == True):
|
|
||||||
# print("load pitch !!!!!!!!!!!!", f0_file.name)
|
|
||||||
# try:
|
|
||||||
# with open(f0_file.name, "r")as f:
|
|
||||||
# lines = f.read().strip("\n").split("\n")
|
|
||||||
# inp_f0 = []
|
|
||||||
# for line in lines:
|
|
||||||
# inp_f0.append([float(i)for i in line.split(",")])
|
|
||||||
# inp_f0 = np.array(inp_f0, dtype="float32")
|
|
||||||
# except:
|
|
||||||
# traceback.print_exc()
|
|
||||||
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
|
sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
|
||||||
pitch, pitchf = None, None
|
pitch, pitchf = None, None
|
||||||
if (if_f0 == 1):
|
if (if_f0 == 1):
|
||||||
@ -183,26 +160,13 @@ class VC(object):
|
|||||||
pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
|
pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
|
||||||
t2 = ttime()
|
t2 = ttime()
|
||||||
times[1] += (t2 - t1)
|
times[1] += (t2 - t1)
|
||||||
print("opt start")
|
if self.t_pad_tgt == 0:
|
||||||
# for t in opt_ts:
|
|
||||||
# print("opt exec")
|
|
||||||
# t = t // self.window * self.window
|
|
||||||
# if (if_f0 == 1):
|
|
||||||
# audio_opt.append(self.vc(model, net_g, sid, audio_pad[s:t + self.t_pad2 + self.window], pitch[:, s // self.window:(
|
|
||||||
# t + self.t_pad2) // self.window], pitchf[:, s // self.window:(t + self.t_pad2) // self.window], times, index, big_npy, index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
|
||||||
# else:
|
|
||||||
# audio_opt.append(self.vc(model, net_g, sid, audio_pad[s:t + self.t_pad2 + self.window],
|
|
||||||
# None, None, times, index, big_npy, index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
|
||||||
# s = t
|
|
||||||
print("opt end")
|
|
||||||
if (if_f0 == 1):
|
|
||||||
print("TTTTT", t, self.t_pad_tgt)
|
|
||||||
# audio_opt.append(self.vc(model, net_g, sid, audio_pad[t:], pitch[:, t // self.window:]if t is not None else pitch, pitchf[:,
|
|
||||||
# t // self.window:]if t is not None else pitchf, times, index, big_npy, index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
|
||||||
audio_opt.append(self.vc(model, net_g, sid, audio_pad[t:], pitch[:, t // self.window:]if t is not None else pitch, pitchf[:,
|
audio_opt.append(self.vc(model, net_g, sid, audio_pad[t:], pitch[:, t // self.window:]if t is not None else pitch, pitchf[:,
|
||||||
t // self.window:]if t is not None else pitchf, times, index, big_npy, index_rate))
|
t // self.window:]if t is not None else pitchf, times, index, big_npy, index_rate))
|
||||||
else:
|
else:
|
||||||
audio_opt.append(self.vc(model, net_g, sid, audio_pad[t:], None, None, times, index, big_npy, index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
audio_opt.append(self.vc(model, net_g, sid, audio_pad[t:], pitch[:, t // self.window:]if t is not None else pitch, pitchf[:,
|
||||||
|
t // self.window:]if t is not None else pitchf, times, index, big_npy, index_rate)[self.t_pad_tgt:-self.t_pad_tgt])
|
||||||
|
|
||||||
audio_opt = np.concatenate(audio_opt)
|
audio_opt = np.concatenate(audio_opt)
|
||||||
del pitch, pitchf, sid
|
del pitch, pitchf, sid
|
||||||
torch.cuda.empty_cache()
|
torch.cuda.empty_cache()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user