Skip to content

Commit

Permalink
Fix weird formatting in articles
Browse files Browse the repository at this point in the history
This code is very hacky but it gets the job done

Signed-off-by: Marcus Nilsson <marcus.nilsson@genarp.com>
  • Loading branch information
mkanilsson committed Sep 22, 2023
1 parent 6fa12d9 commit efb6771
Showing 1 changed file with 28 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@

import java.io.File;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -134,7 +136,32 @@ public static String getHtmlPage(RequestManager glide, RssItem rssItem, boolean
}

String description = rssItem.getBody();
if(description.isEmpty() && rssItem.getMediaDescription() != null) {

if (!description.isEmpty()) {
// UUID is used so there is only a very small chance that the placeholder text actually exists in the article
var uuid = UUID.randomUUID().toString();

// pre-blocks shouldn't have their newlines replaced
var matcher = Pattern.compile("<pre>(.*?)</pre>", Pattern.MULTILINE | Pattern.DOTALL).matcher(description);
var preBlocks = new ArrayList<String>();

while (matcher.find()) {
var group = matcher.group();
description = description.replace(group, "PRE_BLOCK_THAT_WILL_BE_REPLACED_" + uuid + "_" + preBlocks.size());
preBlocks.add(group);
}

description = description
.replaceAll("\n\n", "THIS_WILL_BE_BECOME_ONE_NEWLINE_LATER_" + uuid) // This is required because otherwise `\n\n` would become 2 spaces
.replaceAll(">\n", ">") // The first character after a tag shouldn't have a space
.replaceAll("\n", " ")
.replaceAll("THIS_WILL_BE_BECOME_ONE_NEWLINE_LATER_" + uuid, "\n");

for (int i = 0; i < preBlocks.size(); i++) {
description = description.replace("PRE_BLOCK_THAT_WILL_BE_REPLACED_" + uuid + "_" + i, preBlocks.get(i));
}
}
else if(rssItem.getMediaDescription() != null) {
// in case the rss body is empty, fallback to the media description (e.g. youtube / ted talks)
description = rssItem.getMediaDescription();
}
Expand Down

0 comments on commit efb6771

Please sign in to comment.