Skip to content

Commit

Permalink
Ignore empty paragraphs in HTML to match browser rendering (#289)
Browse files Browse the repository at this point in the history
  • Loading branch information
MohamedRejeb committed May 30, 2024
1 parent de506ab commit eccef2b
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,14 @@ internal class RichSpan(
*/
fun isEmpty(): Boolean = text.isEmpty() && isChildrenEmpty()

/**
* Check if the rich span is blank.
* A rich span is blank if its text is blank and its children are blank
*
* @return True if the rich span is blank, false otherwise
*/
fun isBlank(): Boolean = text.isBlank() && isChildrenBlank()

/**
* Check if the rich span children are empty
*
Expand All @@ -184,6 +192,16 @@ internal class RichSpan(
richSpan.text.isEmpty() && richSpan.isChildrenEmpty()
}

/**
* Check if the rich span children are blank
*
* @return True if the rich span children are blank, false otherwise
*/
private fun isChildrenBlank(): Boolean =
children.all { richSpan ->
richSpan.text.isBlank() && richSpan.isChildrenBlank()
}

/**
* Get the first non-empty child
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,16 @@ internal class RichParagraph(
return true
}

fun isBlank(ignoreStartRichSpan: Boolean = true): Boolean {
if (!ignoreStartRichSpan && !type.startRichSpan.isBlank()) return false

if (children.isEmpty()) return true
children.fastForEach { richSpan ->
if (!richSpan.isBlank()) return false
}
return true
}

fun getFirstNonEmptyChild(offset: Int = -1): RichSpan? {
children.fastForEach { richSpan ->
if (richSpan.text.isNotEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,13 @@ internal object RichTextStateHtmlParser : RichTextStateParser<String> {
var currentRichSpan: RichSpan? = null
var lastClosedTag: String? = null

var skipText = false

val handler = KsoupHtmlHandler
.Builder()
.onText {
if (skipText) return@onText

// In html text inside ul/ol tags is skipped
val lastOpenedTag = openedTags.lastOrNull()?.first
if (lastOpenedTag == "ul" || lastOpenedTag == "ol") return@onText

if (lastOpenedTag in skippedHtmlElements) return@onText

val addedText = KsoupEntities.decodeHtml(
Expand Down Expand Up @@ -74,27 +73,47 @@ internal object RichTextStateHtmlParser : RichTextStateParser<String> {
}
}
.onOpenTag { name, attributes, _ ->
val lastOpenedTag = openedTags.lastOrNull()?.first

openedTags.add(name to attributes)

if (name == "ul" || name == "ol") {
skipText = true
// Todo: Apply ul/ol styling if exists
return@onOpenTag
}

val cssStyleMap = attributes["style"]?.let { CssEncoder.parseCssStyle(it) } ?: emptyMap()
val cssSpanStyle = CssEncoder.parseCssStyleMapToSpanStyle(cssStyleMap)
val tagSpanStyle = htmlElementsSpanStyleEncodeMap[name]

if (name in htmlBlockElements) {
val currentRichParagraph = richParagraphList.lastOrNull()
val isCurrentRichParagraphBlank = currentRichParagraph?.isBlank() == true
val isCurrentTagBlockElement = name in htmlBlockElements
val isLastOpenedTagBlockElement = lastOpenedTag in htmlBlockElements

if (
lastOpenedTag != null &&
isCurrentTagBlockElement &&
isLastOpenedTagBlockElement &&
name == "li" &&
currentRichParagraph != null &&
currentRichParagraph.type is DefaultParagraph &&
isCurrentRichParagraphBlank
) {
val paragraphType = encodeHtmlElementToRichParagraphType(lastOpenedTag)
currentRichParagraph.type = paragraphType

val cssParagraphStyle = CssEncoder.parseCssStyleMapToParagraphStyle(cssStyleMap)
currentRichParagraph.paragraphStyle = currentRichParagraph.paragraphStyle.merge(cssParagraphStyle)
}

if (isCurrentTagBlockElement && (!isLastOpenedTagBlockElement || !isCurrentRichParagraphBlank)) {
stringBuilder.append(' ')

val newRichParagraph = RichParagraph()
var paragraphType: ParagraphType = DefaultParagraph()
if (name == "li") {
skipText = false
openedTags.getOrNull(openedTags.lastIndex - 1)?.first?.let { lastOpenedTag ->
paragraphType = encodeHtmlElementToRichParagraphType(lastOpenedTag)
}
if (name == "li" && lastOpenedTag != null) {
paragraphType = encodeHtmlElementToRichParagraphType(lastOpenedTag)
}
val cssParagraphStyle = CssEncoder.parseCssStyleMapToParagraphStyle(cssStyleMap)

Expand Down Expand Up @@ -158,7 +177,6 @@ internal object RichTextStateHtmlParser : RichTextStateParser<String> {
lastClosedTag = name

if (name == "ul" || name == "ol") {
skipText = false
return@onCloseTag
}

Expand All @@ -173,6 +191,12 @@ internal object RichTextStateHtmlParser : RichTextStateParser<String> {
parser.write(input)
parser.end()

for (i in richParagraphList.lastIndex downTo 0) {
if (richParagraphList[i].isBlank()) {
richParagraphList.removeAt(i)
}
}

return RichTextState(
initialRichParagraphList = richParagraphList,
)
Expand Down

0 comments on commit eccef2b

Please sign in to comment.