From efb6771f141ead55da9b5890b8be0dd66766d267 Mon Sep 17 00:00:00 2001 From: Marcus Nilsson Date: Fri, 22 Sep 2023 16:55:16 +0200 Subject: [PATCH] Fix weird formatting in articles This code is very hacky but it gets the job done Signed-off-by: Marcus Nilsson --- .../async_tasks/RssItemToHtmlTask.java | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/News-Android-App/src/main/java/de/luhmer/owncloudnewsreader/async_tasks/RssItemToHtmlTask.java b/News-Android-App/src/main/java/de/luhmer/owncloudnewsreader/async_tasks/RssItemToHtmlTask.java index 13359726b..9e622d7ae 100644 --- a/News-Android-App/src/main/java/de/luhmer/owncloudnewsreader/async_tasks/RssItemToHtmlTask.java +++ b/News-Android-App/src/main/java/de/luhmer/owncloudnewsreader/async_tasks/RssItemToHtmlTask.java @@ -22,8 +22,10 @@ import java.io.File; import java.text.DecimalFormat; +import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.UUID; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -134,7 +136,32 @@ public static String getHtmlPage(RequestManager glide, RssItem rssItem, boolean } String description = rssItem.getBody(); - if(description.isEmpty() && rssItem.getMediaDescription() != null) { + + if (!description.isEmpty()) { + // UUID is used so there is only a very small chance that the placeholder text actually exists in the article + var uuid = UUID.randomUUID().toString(); + + // pre-blocks shouldn't have their newlines replaced + var matcher = Pattern.compile("
(.*?)
", Pattern.MULTILINE | Pattern.DOTALL).matcher(description); + var preBlocks = new ArrayList(); + + while (matcher.find()) { + var group = matcher.group(); + description = description.replace(group, "PRE_BLOCK_THAT_WILL_BE_REPLACED_" + uuid + "_" + preBlocks.size()); + preBlocks.add(group); + } + + description = description + .replaceAll("\n\n", "THIS_WILL_BE_BECOME_ONE_NEWLINE_LATER_" + uuid) // This is required because otherwise `\n\n` would become 2 spaces + .replaceAll(">\n", ">") // The first character after a tag shouldn't have a space + .replaceAll("\n", " ") + .replaceAll("THIS_WILL_BE_BECOME_ONE_NEWLINE_LATER_" + uuid, "\n"); + + for (int i = 0; i < preBlocks.size(); i++) { + description = description.replace("PRE_BLOCK_THAT_WILL_BE_REPLACED_" + uuid + "_" + i, preBlocks.get(i)); + } + } + else if(rssItem.getMediaDescription() != null) { // in case the rss body is empty, fallback to the media description (e.g. youtube / ted talks) description = rssItem.getMediaDescription(); }