Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: optimize Matcher algorithm #56

Merged
merged 1 commit into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions SubRenamer/Helper/MatcherDataConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using System.Collections.Generic;
using System.Linq;

namespace SubRenamer.Helper;

public static class MatcherDataConverter
{
public static List<Matcher.MatchItem> ConvertMatchItems(IReadOnlyList<Model.MatchItem> matchItems)
{
return matchItems.Select(item => new Matcher.MatchItem(item.Key, item.Video, item.Subtitle)).ToList();
}

public static List<Model.MatchItem> ConvertMatchItems(IReadOnlyList<Matcher.MatchItem> matchItems)
{
return matchItems.Select(item => new Model.MatchItem(item.Key, item.Video, item.Subtitle)).ToList();
}
}
81 changes: 51 additions & 30 deletions SubRenamer/Matcher/Diff.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@

namespace SubRenamer.Matcher;

public class DiffResult
{
public string Prefix = "";
public string Suffix = "";
}

public static class Diff
{
public record DiffResult(string Prefix, string Suffix)
{
public override string ToString() =>
$"DiffResult {{ Prefix = \"{Prefix}\", Suffix = \"{Suffix}\" }}";
}

public static DiffResult? GetDiffResult(List<string> names)
{
// Note: the names is without extension
Expand All @@ -21,14 +21,14 @@ public static class Diff

for (var i = 0; i < names.Count - 1; i++)
{
for (var j = i + 1; j < names.Count; j++)
for (var j = names.Count - 1; j > i; j--) // Start from the end to avoid two names too similar
{
var prefix = FindCommonPrefix(names[i], names[j]);
var suffix = FindCommonSuffix(prefix, names[i], names[j]);
var suffix = FindCommonSuffix(names[i][prefix.Length..], names[j][prefix.Length..]);

if (!string.IsNullOrEmpty(prefix) && !string.IsNullOrEmpty(suffix))
if (!string.IsNullOrEmpty(prefix))
{
return new DiffResult { Prefix = prefix, Suffix = suffix };
return new DiffResult(prefix, suffix);
}
}
}
Expand All @@ -39,60 +39,81 @@ public static class Diff
private static string FindCommonPrefix(string a, string b)
{
var minLength = Math.Min(a.Length, b.Length);
var prefix = a.Substring(0, minLength);

for (var i = 0; i < minLength; i++)
{
if (a[i] != b[i])
{
var prefix = a.Substring(0, i);
// Trim end number
prefix = Regex.Replace(prefix, "\\d+$", "");
return prefix;
prefix = a.Substring(0, i);
break;
}
}
return a.Substring(0, minLength);

// Trim end number
prefix = Regex.Replace(prefix, "\\d+$", "");

return prefix;
}

private static string FindCommonSuffix(string prefix, string a, string b)
private static string FindCommonSuffix(string a, string b)
{
a = a[prefix.Length..];
b = b[prefix.Length..];
var minLength = Math.Min(a.Length, b.Length);
for (var i = 0; i < minLength; i++)
var i = 0;
var j = 0;

while (i < a.Length && j < b.Length)
{
if (IsSymbol(a[i]) && IsSymbol(b[i]) && a[i] == b[i])
// Skip characters
while (i < a.Length && Skip(a[i])) i++;
while (j < b.Length && Skip(b[j])) j++;

// If both are still in valid range, compare the current character
if (i < a.Length && j < b.Length && char.ToLower(a[i]) == char.ToLower(b[j]))
{
return a.Substring(i, 1);
return a[i].ToString();
}

i++;
j++;
}

return "";

bool IsSymbol(char c) => !char.IsAsciiLetterOrDigit(c) && c != ' '; // skip whitespace
// Skip [a-z], [A-Z], [0-9], and whitespace. Which is not allowed as a suffix.
// Because it may be a part of the `Key` (Episode Number).
// Such as "file [01A] end" and "file [01B] end".
//
// But allows Chinese character as a suffix.
// Such as "file 01 話" and "file 02 話".
// @see https://github.com/qwqcode/SubRenamer/pull/45
bool Skip(char c) => char.IsAsciiLetterOrDigit(c) || c == ' ';
}
public static string? ExtractMatchKeyByDiff(DiffResult? diff, string filename)

public static string ExtractMatchKeyByDiff(DiffResult? diff, string filename)
{
string pattern;
if (diff is null)
{
// if matchData is null then fail down to simple number match
// (in case that filename sample less than 2)
pattern = "(\\d+)(?!.*\\d)"; // @link https://stackoverflow.com/questions/5320525/regular-expression-to-match-last-number-in-a-string
} else {
pattern = diff.Suffix is null
}
else
{
pattern = string.IsNullOrEmpty(diff.Suffix)
? $"{Regex.Escape(diff.Prefix)}(\\d+)"
: $"{Regex.Escape(diff.Prefix)}(.+?){Regex.Escape(diff.Suffix)}";
}

var match = Regex.Match(filename, pattern);
if (!match.Success || match.Groups.Count == 0) return "";

var key = match.Groups[1].Value.Trim();

// check is pure number
if (key.All(char.IsDigit))
key = int.Parse(key).ToString(); // '01' -> '1'

return key;
}
}
22 changes: 10 additions & 12 deletions SubRenamer/Matcher/Helper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,18 @@
using System.Linq;
using System.Text.RegularExpressions;
using SubRenamer.Helper;
using SubRenamer.Model;

namespace SubRenamer.Matcher;

public static class Helper
{
public static string? ExtractMatchKeyRegex(string pattern, string filename)
public static string ExtractMatchKeyRegex(string pattern, string filename)
{
try {
var match = Regex.Match(filename, pattern, RegexOptions.IgnoreCase);
if (match.Success) return match.Groups[1].Value;
} catch (Exception e) {
Console.WriteLine(e.Message);
Logger.Out.WriteLine(e.Message);
}
return "";
}
Expand All @@ -42,18 +41,17 @@ public static void MergeSameKeysItems(List<MatchItem> items)
}
}

public static void MoveEmptyKeyItemsToLast(List<MatchItem> items)
public static List<MatchItem> MoveEmptyKeyItemsToLast(IReadOnlyList<MatchItem> items)
{
var emptyKeyItems = items.Where(x => string.IsNullOrEmpty(x.Key)).ToList();
foreach (var item in emptyKeyItems)
{
items.Remove(item);
items.Add(item);
}
var keyedItems = items.Where(x => !string.IsNullOrEmpty(x.Key));
var emptyKeyItems = items.Where(x => string.IsNullOrEmpty(x.Key));
return [..keyedItems, ..emptyKeyItems];
}

public static void SortItemsByKeys(List<MatchItem> items)
public static List<MatchItem> SortItemsByKeys(IReadOnlyList<MatchItem> items)
{
items.Sort((a, b) => new MixedStringComparer().Compare(a.Key, b.Key));
List<MatchItem> result = [..items];
result.Sort((a, b) => new MixedStringComparer().Compare(a.Key, b.Key));
return result;
}
}
10 changes: 10 additions & 0 deletions SubRenamer/Matcher/Logger.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using System;
using System.IO;

namespace SubRenamer.Matcher;

public static class Logger
{
public static TextWriter Out { get; private set; } = Console.Out;
public static void SetWriter(TextWriter writer) => Out = writer;
}
76 changes: 39 additions & 37 deletions SubRenamer/Matcher/Matcher.cs
Original file line number Diff line number Diff line change
@@ -1,99 +1,101 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using SubRenamer.Common;
using SubRenamer.Model;

namespace SubRenamer.Matcher;

public record MatchItem(string Key, string Video, string Subtitle);

public static class Matcher
{
public static List<MatchItem> Execute(IReadOnlyList<MatchItem> matchItems)
public static List<MatchItem> Execute(IReadOnlyList<MatchItem> inputItems)
{
// Create new collection
var items = new List<MatchItem>();
var videoFiles = new List<string>();
var subtitleFiles = new List<string>();
List<MatchItem> result = [];
List<string> videoFiles = [];
List<string> subtitleFiles = [];

// Separate Video files and Subtitle files
matchItems.Where(x => !string.IsNullOrEmpty(x.Video)).ToList().ForEach(item =>
inputItems.Where(x => !string.IsNullOrEmpty(x.Video)).ToList().ForEach(item =>
{
items.Add(new MatchItem("", item.Video, "", ""));
result.Add(new MatchItem("", item.Video, ""));
videoFiles.Add(item.Video);
});
matchItems.Where(x => !string.IsNullOrEmpty(x.Subtitle)).ToList().ForEach(item =>

inputItems.Where(x => !string.IsNullOrEmpty(x.Subtitle)).ToList().ForEach(item =>
{
items.Add(new MatchItem("", "", item.Subtitle, ""));
result.Add(new MatchItem("", "", item.Subtitle));
subtitleFiles.Add(item.Subtitle);
});

// Get file keys
var m = Config.Get().MatchMode;
var videoRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualVideoRegex : Config.Get().VideoRegex) : null;
var subtitleRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualSubtitleRegex : Config.Get().SubtitleRegex) : null;

var video2Keys = CalculateFileKeys(videoFiles, videoRegex);
var subtitle2Keys = CalculateFileKeys(subtitleFiles, subtitleRegex);

// Apply keys
foreach (var item in items)
List<MatchItem> keyedItems = [];
foreach (var item in result)
{
string? k = null;

if (!string.IsNullOrEmpty(item.Video)) video2Keys.TryGetValue(item.Video, out k);
else if (!string.IsNullOrEmpty(item.Subtitle)) subtitle2Keys.TryGetValue(item.Subtitle, out k);

item.Key = k ?? "";
keyedItems.Add(new MatchItem(k ?? "", item.Video, item.Subtitle));
}


result = keyedItems;

// Merge items with same keys
Helper.MergeSameKeysItems(items);
result = Helper.MergeSameKeysItems(result);

// Sort
Helper.SortItemsByKeys(items);
result = Helper.SortItemsByKeys(result);

// Move empty keys to last
Helper.MoveEmptyKeyItemsToLast(items);
result = Helper.MoveEmptyKeyItemsToLast(result);

return items;
return result;
}
private static Dictionary<string, string?> CalculateFileKeys(List<string> files, string? regexPattern)

private static Dictionary<string, string> CalculateFileKeys(IReadOnlyList<string> files, string? regexPattern)
{
var dict = new Dictionary<string, string?>();
var result = new Dictionary<string, string>();

if (regexPattern is null)
{
// 1. Auto Diff Algorithm

// Diff filenames
var names = files
.Select(Path.GetFileNameWithoutExtension)
.Where(x => !string.IsNullOrEmpty(x))
.Distinct()
.ToList();

var diff = Diff.GetDiffResult(names!);

Logger.Out.WriteLine("[Diff.GetDiffResult]\n\n {0}\n", (diff != null ? diff : "null"));

// Extract Match keys
foreach (var f in files)
{
var key = Diff.ExtractMatchKeyByDiff(diff, Path.GetFileNameWithoutExtension(f));
dict[f] = key;
result[f] = Diff.ExtractMatchKeyByDiff(diff, Path.GetFileNameWithoutExtension(f));
}
}
else
{
// 2. Regex Algorithm
foreach (var f in files)
{
var key = Helper.ExtractMatchKeyRegex(regexPattern, Path.GetFileName(f));
dict[f] = key;
result[f] = Helper.ExtractMatchKeyRegex(regexPattern, Path.GetFileName(f));
}
}

return dict;
return result;
}
}
6 changes: 3 additions & 3 deletions SubRenamer/Model/IRenameService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ namespace SubRenamer.Model;

public interface IRenameService
{
void UpdateRenameTaskList(IEnumerable<MatchItem> matchList, Collection<RenameTask> destList);
void ExecuteRename(IEnumerable<RenameTask> taskList);
string GenerateRenameCommands(IEnumerable<MatchItem> list);
void UpdateRenameTaskList(IReadOnlyList<MatchItem> matchList, Collection<RenameTask> destList);
void ExecuteRename(IReadOnlyList<RenameTask> taskList);
string GenerateRenameCommands(IReadOnlyList<MatchItem> list);
}
4 changes: 2 additions & 2 deletions SubRenamer/Model/MatchItem.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

namespace SubRenamer.Model;

public partial class MatchItem(string? key = "", string video = "", string subtitle = "", string status = "") : ObservableObject
public partial class MatchItem(string key = "", string video = "", string subtitle = "", string status = "") : ObservableObject
{
/**
* Match Key
*/
[ObservableProperty] private string? _key = key;
[ObservableProperty] private string _key = key;

/**
* Video Absolute Path
Expand Down
Loading