Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 25 additions & 5 deletions OpenUtau.Core/DiffSinger/DiffSingerPitch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ public class DsPitch : IDisposable
DiffSingerSpeakerEmbedManager speakerEmbedManager;
const string PEXP = DiffSingerUtils.PEXP;

public float FrameMs => frameMs;

public DsPitch(string rootPath)
{
this.rootPath = rootPath;
Expand Down Expand Up @@ -107,7 +109,7 @@ int PhonemeTokenize(string phoneme){
return token;
}

public RenderPitchResult Process(RenderPhrase phrase){
public RenderPitchResult Process(RenderPhrase phrase, HashSet<int>? retakeNoteIndexes = null, float[]? existingPitch = null){
var startMs = phrase.phones[0].positionMs - DiffSingerUtils.GetHeadMs(frameMs);
int headFrames = DiffSingerUtils.headFrames;
int tailFrames = DiffSingerUtils.tailFrames;
Expand Down Expand Up @@ -184,21 +186,29 @@ public RenderPitchResult Process(RenderPhrase phrase){
var noteDurMsList = new List<double>();
var noteMidiList = new List<float>();
var noteRestList = new List<bool>();
//paddedToRealNoteIndex is kept in lockstep with noteDurMsList so the retake
//frame mask can map each padded segment to the real note it belongs to.
//Gap-rest segments inserted below follow the preceding real note.
var paddedToRealNoteIndex = new List<int>();
//Head padding
noteDurMsList.Add(Math.Max(0, phrase.notes[0].positionMs - startMs));
noteMidiList.Add(phrase.notes[0].adjustedTone);
noteRestList.Add(true);
paddedToRealNoteIndex.Add(0);
double prevNoteEndMs = phrase.notes[0].positionMs;
foreach (var note in phrase.notes) {
for (int realIdx = 0; realIdx < phrase.notes.Length; realIdx++) {
var note = phrase.notes[realIdx];
double gapMs = note.positionMs - prevNoteEndMs;
if (gapMs > 0) {
//Insert a rest note for the gap
//Insert a rest note for the gap; associate it with the previous real note
noteDurMsList.Add(gapMs);
noteMidiList.Add(note.adjustedTone);
noteRestList.Add(true);
paddedToRealNoteIndex.Add(realIdx - 1);
}
noteDurMsList.Add(note.durationMs);
noteMidiList.Add(note.adjustedTone);
paddedToRealNoteIndex.Add(realIdx);
//Slur notes follow the previous note's rest status
if (note.lyric.StartsWith("+")) {
noteRestList.Add(noteRestList[^1]);
Expand All @@ -217,6 +227,7 @@ public RenderPitchResult Process(RenderPhrase phrase){
noteDurMsList.Add(DiffSingerUtils.GetTailMs(frameMs));
noteMidiList.Add(phrase.notes[^1].adjustedTone);
noteRestList.Add(true);
paddedToRealNoteIndex.Add(phrase.notes.Length - 1);

//Set tone for each rest group using nearest non-rest note
var note_rest = noteRestList;
Expand Down Expand Up @@ -251,6 +262,13 @@ public RenderPitchResult Process(RenderPhrase phrase){
.ToList();
var pitch = Enumerable.Repeat(60f, totalFrames).ToArray();
var retake = Enumerable.Repeat(true, totalFrames).ToArray();
if (retakeNoteIndexes != null && existingPitch != null) {
retake = DiffSingerRetake.BuildRetakeFrameMask(
note_dur, paddedToRealNoteIndex, retakeNoteIndexes, totalFrames);
for (int i = 0; i < totalFrames && i < existingPitch.Length; i++) {
pitch[i] = existingPitch[i];
}
}
Comment thread
KakaruHayate marked this conversation as resolved.
var pitchInputs = new List<NamedOnnxValue>();
pitchInputs.Add(NamedOnnxValue.CreateFromTensor("encoder_out", encoder_out));
pitchInputs.Add(NamedOnnxValue.CreateFromTensor("note_midi",
Expand Down Expand Up @@ -322,14 +340,16 @@ public RenderPitchResult Process(RenderPhrase phrase){
.Select(i=>(float)phrase.timeAxis.MsPosToTickPos(startMs + i*frameMs) - phrase.position)
.Append((float)phrase.duration + 1)
.ToArray(),
tones = pitch_out.Append(pitch_out[^1]).ToArray()
tones = pitch_out.Append(pitch_out[^1]).ToArray(),
retakeMask = retakeNoteIndexes != null ? retake.Append(retake[^1]).ToArray() : null,
};
}else{
return new RenderPitchResult{
ticks = Enumerable.Range(0,totalFrames)
.Select(i=>(float)phrase.timeAxis.MsPosToTickPos(startMs + i*frameMs) - phrase.position)
.ToArray(),
tones = pitch_out
tones = pitch_out,
retakeMask = retakeNoteIndexes != null ? retake : null,
};
}
}
Expand Down
32 changes: 32 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,38 @@ public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase) {
}
}

public RenderPitchResult LoadRenderedPitch(RenderPhrase phrase, HashSet<int> selectedNotePositions) {
if (!Preferences.Default.DiffSingerLocalRetaking) {
return LoadRenderedPitch(phrase);
}
DiffSingerSinger singer = (DiffSingerSinger) phrase.singer;
if (!singer.HasPitchPredictor) {
throw new Exception("This singer has no pitch predictor.");
}
var pitchPredictor = singer.getPitchPredictor()!;
var noteRelativePositions = new int[phrase.notes.Length];
for (int i = 0; i < phrase.notes.Length; i++) {
noteRelativePositions[i] = phrase.notes[i].position;
}
var retakeNoteIndexes = DiffSingerRetake.MapSelectedPositionsToNoteIndexes(
phrase.position, noteRelativePositions, selectedNotePositions);
if (retakeNoteIndexes.Count == 0 || retakeNoteIndexes.Count == phrase.notes.Length) {
lock (pitchPredictor) {
return pitchPredictor.Process(phrase);
}
}
var frameMs = pitchPredictor.FrameMs;
int headFrames = DiffSingerUtils.headFrames;
int tailFrames = DiffSingerUtils.tailFrames;
var ph_dur = DiffSingerUtils.PaddedPhoneDurations(phrase, frameMs, headFrames, tailFrames);
int totalFrames = ph_dur.Sum();
var existingPitch = DiffSingerUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, totalFrames, headFrames, tailFrames,
x => x * 0.01).Select(f => (float)f).ToArray();
lock (pitchPredictor) {
return pitchPredictor.Process(phrase, retakeNoteIndexes, existingPitch);
}
}

public List<RenderRealCurveResult> LoadRenderedRealCurves(RenderPhrase phrase) {
if (!Preferences.Default.DiffSingerTensorCache) {
throw new Exception("Please enable DiffSinger tensor cache and re-render the phrase to display correct base curves.");
Expand Down
52 changes: 52 additions & 0 deletions OpenUtau.Core/DiffSinger/DiffSingerRetake.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
using System.Collections.Generic;

namespace OpenUtau.Core.DiffSinger {
public static class DiffSingerRetake {
public static HashSet<int> MapSelectedPositionsToNoteIndexes(
int phrasePosition,
IReadOnlyList<int> noteRelativePositions,
IReadOnlyCollection<int>? selectedAbsolutePositions) {
var result = new HashSet<int>();
if (selectedAbsolutePositions == null || selectedAbsolutePositions.Count == 0) {
return result;
}
Comment thread
KakaruHayate marked this conversation as resolved.
var lookup = selectedAbsolutePositions as ISet<int> ?? new HashSet<int>(selectedAbsolutePositions);
for (int i = 0; i < noteRelativePositions.Count; i++) {
if (lookup.Contains(phrasePosition + noteRelativePositions[i])) {
result.Add(i);
}
}
return result;
}

// paddedToRealNoteIndex must be the same length as paddedNoteDurations.
// Each entry is the real-note index the padded segment should follow for retake purposes,
// or -1 for a segment that is never retaken regardless of selection.
public static bool[] BuildRetakeFrameMask(
IReadOnlyList<int> paddedNoteDurations,
IReadOnlyList<int> paddedToRealNoteIndex,
IReadOnlyCollection<int>? retakeNoteIndexes,
int totalFrames) {
var mask = new bool[totalFrames];
if (retakeNoteIndexes == null || retakeNoteIndexes.Count == 0 || paddedNoteDurations.Count == 0) {
return mask;
}
Comment thread
KakaruHayate marked this conversation as resolved.
var lookup = retakeNoteIndexes as ISet<int> ?? new HashSet<int>(retakeNoteIndexes);
int padded = paddedNoteDurations.Count;
int frameOffset = 0;
for (int segIdx = 0; segIdx < padded; segIdx++) {
int realIdx = paddedToRealNoteIndex[segIdx];
bool shouldRetake = realIdx >= 0 && lookup.Contains(realIdx);
int dur = paddedNoteDurations[segIdx];
for (int f = 0; f < dur; f++) {
int fi = frameOffset + f;
if (fi < totalFrames) {
mask[fi] = shouldRetake;
}
}
frameOffset += dur;
}
return mask;
}
}
}
5 changes: 4 additions & 1 deletion OpenUtau.Core/Editing/NoteBatchEdits.cs
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ public void RunAsync(
var commands = new List<SetCurveCommand>();
for (int ph_i = phrases.Count() - 1; ph_i >= 0; ph_i--) {
var phrase = phrases[ph_i];
var result = renderer.LoadRenderedPitch(phrase);
var result = renderer.LoadRenderedPitch(phrase, positions);
if (result == null) {
continue;
}
Expand All @@ -502,6 +502,9 @@ public void RunAsync(
if (result.tones[i] < 0) {
continue;
}
if (result.retakeMask != null && i < result.retakeMask.Length && !result.retakeMask[i]) {
continue;
}
int x = phrase.position - part.position + (int)result.ticks[i];
if (result.ticks[i] < 0) {
if (i + 1 < result.ticks.Length && result.ticks[i + 1] > 0) { } else
Expand Down
6 changes: 6 additions & 0 deletions OpenUtau.Core/Render/IRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ public class RenderPitchResult {
/// Semitone values in MIDI scale.
/// </summary>
public float[] tones;

/// <summary>
/// Per-frame mask indicating retaken frames. Null means full retake.
/// </summary>
public bool[]? retakeMask;
}

public class RenderRealCurveResult {
Expand Down Expand Up @@ -70,6 +75,7 @@ public interface IRenderer {
RenderResult Layout(RenderPhrase phrase);
Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int trackNo, CancellationTokenSource cancellation, bool isPreRender = false);
RenderPitchResult LoadRenderedPitch(RenderPhrase phrase);
RenderPitchResult LoadRenderedPitch(RenderPhrase phrase, HashSet<int> selectedNotePositions) { return LoadRenderedPitch(phrase); }
List<RenderRealCurveResult> LoadRenderedRealCurves(RenderPhrase phrase) { return new List<RenderRealCurveResult>(0);}
UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings);
}
Expand Down
1 change: 1 addition & 0 deletions OpenUtau.Core/Util/Preferences.cs
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ public class SerializablePreferences {
public int DiffSingerStepsPitch = 10;
public bool DiffSingerTensorCache = true;
public bool DiffSingerLangCodeHide = false;
public bool DiffSingerLocalRetaking = false;
public bool SkipRenderingMutedTracks = false;
public string Language = string.Empty;
public string? SortingOrder = null;
Expand Down
Loading
Loading