Skip to content

Commit

Permalink
新HTML形式への対応、ひとまず #11
Browse files Browse the repository at this point in the history
  • Loading branch information
onihusube committed Jun 19, 2023
1 parent b9bd052 commit 112007c
Show file tree
Hide file tree
Showing 4 changed files with 1,033 additions and 4 deletions.
29 changes: 26 additions & 3 deletions HtmlConverter/HtmltoDat.cs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,29 @@ public Byte[] Gethtml(String URI, int range, String UA, bool CRReplace, String L
// 2023年6月ごろから導入の新HTML形式
System.Diagnostics.Debug.WriteLine("CGI ver202306形式");

// 1400行ほど飛ばす
for (int i = 0; i <= 1400; ++i)
{
html.ReadLine();
}

// レス本文探索
for (; !html.EndOfStream; line = html.ReadLine())
{
if (line.Contains("<article"))
{
break;
}
}

// 先に終端に到達したらやめる
if (html.EndOfStream)
{
break;
}

line += html.ReadToEnd();

Builddat = this.CGI202306_ConvertProcess(title, URI, line);
break;
case CGIType.Until202306:
Expand Down Expand Up @@ -272,7 +295,7 @@ private StringBuilder CGI202306_ConvertProcess(string title, string URI, string
// レスの連続抽出はざっくりとやる
var ResMatches = Regex.Matches(allres, @"<article id=.+?</section></article>");
// ↑で抽出した1つのレス内で各要素を抽出
Regex ResContent = new Regex(@"<article id=.(?<num>\d+?).+?<summary>.+?<span class=.postusername.>(?<name><b>.+?</b>)</span></summary><span class=.date.>(?<date>.+?)</span><span class=.uid.>(?<id>.+?)</span>(?<be><span class=.be.+?</span>)?</details><section class=.post-content.>(?<massage>.+?)</section></article>");
Regex ResContent = new Regex(@"<article id=.(?<num>\d+?).+?<summary>.+?<span class=.postusername.>(?<name><b>.*?</b>)</span></summary><span class=.date.>(?<date>.+?)</span><span class=.uid.>(?<id>.*?)</span>(?<be><span class=.be.+?</span>)?</details><section class=.post-content.>(?<massage>.+?)</section></article>");

// 旧型式(API移行直後のhtml形式)の処理を再利用するために、レス部分のhtmlを1レスづつ旧型式に変換する
// 細部のハンドリングを継承するための措置
Expand All @@ -283,7 +306,7 @@ private StringBuilder CGI202306_ConvertProcess(string title, string URI, string
string resnumber = res_content.Groups["num"].Value;
string name = res_content.Groups["name"].Value;
string date = res_content.Groups["date"].Value;
string id = res_content.Groups["id"].Value; // キャプチャ無し(IDなし)の場合は空文字列になる(らしい
string id = res_content.Groups["id"].Value; // 無ければ空文字列
string be = res_content.Groups["be"].Value; // 無ければ空文字列
string message = res_content.Groups["massage"].Value;

Expand Down Expand Up @@ -313,7 +336,7 @@ private StringBuilder CGI202306_ConvertProcess(string title, string URI, string
date = "あぼーん";
id = "";
}
if (string.IsNullOrEmpty(be))
if (res_content.Groups["be"].Success)
{
// beリンクの変換
// <span class="be r2BP"><a href="http://be.5ch.net/user/823355746" target="_blank">?2BP(0)</a></span> これを
Expand Down
Loading

0 comments on commit 112007c

Please sign in to comment.