Skip to content

Commit

Permalink
HTML形式判定の追加と整理 [skip ci]
Browse files Browse the repository at this point in the history
  • Loading branch information
onihusube committed Jun 19, 2023
1 parent 69dd466 commit b9bd052
Showing 1 changed file with 64 additions and 27 deletions.
91 changes: 64 additions & 27 deletions HtmlConverter/HtmltoDat.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ private set
/// </summary>
public bool IsHttpsReplace { get; set; }

private enum CGIType
{
Old,
Until202306,
Krsw,
Ver202306
}

/// <summary>
/// 指定されたスレのHTMLをdatへ変換する
/// </summary>
Expand All @@ -85,10 +93,10 @@ private set
public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String LastMod = null)
{
// 現在(23/06/18)pinkはまだ新形式ではない
if (URI.Contains(".5ch.net/"))
{
URI = URI.Replace("test/read.cgi/", "test/read.cgi/c/");
}
//if (URI.Contains(".5ch.net/"))
//{
// URI = URI.Replace("test/read.cgi/", "test/read.cgi/c/");
//}

System.Diagnostics.Debug.WriteLine($"{URI} をHTML変換開始");
System.Diagnostics.Debug.WriteLine($"Range:{range}, UA:{this.UserAgent}, CRReplace:{CRReplace}, LastMod:{LastMod}");
Expand All @@ -111,9 +119,12 @@ public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String L
{
String title = "もうずっと人大杉";

bool NewCGI = false;
CGIType cgiver = CGIType.Ver202306;

//bool cgi_ver_202306 = false;
//bool cgi_until_202306 = false;
// krsw鯖のHTML形式の検出
bool is_krsw = false;
//bool is_krsw = false;

string line = html.ReadLine();

Expand All @@ -126,19 +137,35 @@ public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String L
if (Regex.IsMatch(line, @"<title>(.+?)<\/title>"))
{
title = Regex.Match(line, @"<title>(.+?)<\/title>").Groups[1].Value;
cgiver = CGIType.Old;
break;
}
else if (Regex.IsMatch(line, @"<title>(.+?)$"))
{
title = Regex.Match(line, @"<title>(.+?)$").Groups[1].Value;
NewCGI = true;

// 202306以降かどうかを判定
bool is_until_202306 =
line.Contains("</script><title>") ||
line.Contains("bootstrap.min") ||
!line.Contains("ad-manager.min");

if (is_until_202306)
{
cgiver = CGIType.Until202306;
}
else
{
cgiver = CGIType.Ver202306;
}

break;
}
}
}
else
{
is_krsw = true;
cgiver = CGIType.Krsw;
title = Regex.Match(line, @"<title>(.+?)<\/title>").Groups[1].Value;
}

Expand All @@ -153,28 +180,38 @@ public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String L
System.Diagnostics.Debug.WriteLine($"Title:{title}");

StringBuilder Builddat = null;
string ketu;
string ketu = "0";
//新CGI形式と古いCGI形式で処理を分ける
if (is_krsw)
{
// 2022/08/05頃に観測された、krsw鯖の形式(1行に詰まってる)
System.Diagnostics.Debug.WriteLine("krsw鯖形式");

Builddat = this.krswCGIFormat(title, URI, line, out ketu);
}
else if (NewCGI)
switch (cgiver)
{
// 2022年8月時点で主流のHTML形式(全5行くらいのやつ)
System.Diagnostics.Debug.WriteLine("新CGI形式");

Builddat = this.PresentCGIFormat(title, URI, html, out ketu);
}
else
{
// API導入前の古い形式(1レス1行)
System.Diagnostics.Debug.WriteLine("旧CGI形式");

Builddat = this.OldCGIFormat(title, html, out ketu);
case CGIType.Ver202306:
// 2023年6月ごろから導入の新HTML形式
System.Diagnostics.Debug.WriteLine("CGI ver202306形式");

Builddat = this.CGI202306_ConvertProcess(title, URI, line);
break;
case CGIType.Until202306:
// 2022年8月時点で主流のHTML形式(全5行くらいのやつ)
System.Diagnostics.Debug.WriteLine("新CGI形式");

Builddat = this.PresentCGIFormat(title, URI, html, out ketu);
break;
case CGIType.Krsw:
// 2022/08/05頃に観測された、krsw鯖の形式(1行に詰まってる)
System.Diagnostics.Debug.WriteLine("krsw鯖形式");

Builddat = this.krswCGIFormat(title, URI, line, out ketu);
break;
case CGIType.Old:
// API導入前の古い形式(1レス1行)
System.Diagnostics.Debug.WriteLine("旧CGI形式");

Builddat = this.OldCGIFormat(title, html, out ketu);
break;
default:
System.Diagnostics.Debug.WriteLine("未知のCGI形式");
break;
}

//スレッドが生存している場合
Expand Down

1 comment on commit b9bd052

@onihusube
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

#11

Please sign in to comment.