Skip to content

Commit

Permalink
Accept letters with strokes or diacritics
Browse files Browse the repository at this point in the history
  • Loading branch information
paul-dingemans committed Jul 7, 2022
1 parent 2fc863e commit 55ce3e2
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 90 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package com.pinterest.ktlint.ruleset.standard

import com.pinterest.ktlint.core.Rule
import com.pinterest.ktlint.ruleset.standard.internal.removeDiacriticsFromLetters
import com.pinterest.ktlint.ruleset.standard.internal.regExIgnoringDiacriticsAndStrokesOnLetters
import org.jetbrains.kotlin.com.intellij.lang.ASTNode
import org.jetbrains.kotlin.com.intellij.psi.impl.source.tree.CompositeElement
import org.jetbrains.kotlin.psi.KtEnumEntry
Expand All @@ -11,7 +11,7 @@ import org.jetbrains.kotlin.psi.KtEnumEntry
*/
public class EnumEntryNameCaseRule : Rule("enum-entry-name-case") {
internal companion object {
val regex = Regex("[A-Z]([A-Za-z\\d]*|[A-Z_\\d]*)")
val regex = "[A-Z]([A-Za-z\\d]*|[A-Z_\\d]*)".regExIgnoringDiacriticsAndStrokesOnLetters()
}

override fun visit(
Expand All @@ -25,7 +25,7 @@ public class EnumEntryNameCaseRule : Rule("enum-entry-name-case") {
val enumEntry = node.psi as? KtEnumEntry ?: return
val name = enumEntry.name ?: return

if (!name.removeDiacriticsFromLetters().matches(regex)) {
if (!name.matches(regex)) {
emit(
node.startOffset,
"Enum entry name should be uppercase underscore-separated names like \"ENUM_ENTRY\" or upper camel-case like \"EnumEntry\"",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import com.pinterest.ktlint.core.ast.ElementType.PROPERTY
import com.pinterest.ktlint.core.ast.ElementType.TYPEALIAS
import com.pinterest.ktlint.core.ast.ElementType.TYPE_REFERENCE
import com.pinterest.ktlint.core.ast.children
import com.pinterest.ktlint.ruleset.standard.internal.removeDiacriticsFromLetters
import com.pinterest.ktlint.ruleset.standard.internal.regExIgnoringDiacriticsAndStrokesOnLetters
import java.nio.file.Paths
import org.jetbrains.kotlin.com.intellij.lang.ASTNode
import org.jetbrains.kotlin.com.intellij.lang.FileASTNode
Expand Down Expand Up @@ -167,7 +167,7 @@ public class FilenameRule : Rule(
private fun String.shouldMatchPascalCase(
emit: (offset: Int, errorMessage: String, canBeAutoCorrected: Boolean) -> Unit
) {
if (!this.removeDiacriticsFromLetters().matches(pascalCaseRegEx)) {
if (!this.matches(pascalCaseRegEx)) {
emit(0, "File name '$this.kt' should conform PascalCase", false)
}
}
Expand Down Expand Up @@ -199,7 +199,7 @@ public class FilenameRule : Rule(
}

private companion object {
val pascalCaseRegEx = Regex("""^[A-Z][A-Za-z\d]*$""")
val pascalCaseRegEx = "^[A-Z][A-Za-z\\d]*$".regExIgnoringDiacriticsAndStrokesOnLetters()
val NON_CLASS_RELATED_TOP_LEVEL_DECLARATION_TYPES = listOf(OBJECT_DECLARATION, TYPEALIAS, PROPERTY)
}
}
Original file line number Diff line number Diff line change
@@ -1,23 +1,11 @@
package com.pinterest.ktlint.ruleset.standard.internal

import java.text.Normalizer

/**
* Removes diacritics from letters. Note that ligatures æ (ae), œ (oe), Æ (AE), Œ (OE), and letters with strokes ł (l),
* ø (o), ß (s), Ł (L), Ø (O) are not changed.
* Transforms a string containing regular expression ranges like "A-Z" and "a-z" to a RegEx which checks whether a
* unicode character has an uppercase versus a lowercase mapping to a letter. This function intents to keep the original
* expression more readable
*/

internal fun String.removeDiacriticsFromLetters() =
map { originalChar ->
Normalizer
// Decompose characters having a diacritic into an ascii alphabetic character (a-zA-Z) followed by the diacritic(s)
.normalize(originalChar.toString(), Normalizer.Form.NFD)
.let {
if (it.first().isLetterOrDigit()) {
// Ignore all diacritics
it.first()
} else {
it
}
}
}.joinToString(separator = "")
internal fun String.regExIgnoringDiacriticsAndStrokesOnLetters() =
replace("A-Z", "\\p{Lu}")
.replace("a-z", "\\p{Ll}")
.toRegex()
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class EnumEntryNameCaseRuleTest {
val code =
"""
enum class SomeEnum {
ŸÈŚ_THÎS_IS_ALLOWED,
ŸèśThîsIsAllowed,
ŸÈŚ_THÎS_IS_ALLOWED_123,
ŸèśThîsIsAllowed123,
}
""".trimIndent()
enumEntryNameCaseRuleAssertThat(code).hasNoLintViolations()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ class FilenameRuleTest {
fun `Issue 1530 - Given a file which name should match PascalCase then this name may also contain letters with diacritics`() {
val code = "// some code"
fileNameRuleAssertThat(code)
.asFileWithPath("ŸëšThïsĮsÂllòwed.kt")
.asFileWithPath("ŸëšThïsĮsÂllòwed123.kt")
.hasNoLintViolations()
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,53 +1,38 @@
package com.pinterest.ktlint.ruleset.standard.internal

import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.CsvSource
import org.junit.jupiter.params.provider.ValueSource

class RemoveDiacriticsFromLettersTest {
@ParameterizedTest(name = "Original character: {0}, expected result: {1}")
@CsvSource(
value = [
"àáâäãåā,aaaaaaa",
"çćč,ccc",
"èéêëēėę,eeeeeee",
"îïíīįì,iiiiii",
"ñń,nn",
"ôöòóōõ,oooooo",
"śš,ss",
"ûüùúū,uuuuu",
"ÿ,y",
"žźż,zzz",
"ÀÁÂÄÃÅĀ,AAAAAAA",
"ÇĆČ,CCC",
"ÈÉÊËĒĖĘ,EEEEEEE",
"ÎÏÍĪĮÌ,IIIIII",
"ÑŃ,NN",
"ÔÖÒÓŌÕ,OOOOOO",
"ŚŠ,SS",
"ÛÜÙÚŪ,UUUUU",
"Ÿ,Y",
"ŽŹŻ,ZZZ"
]
)
fun `Given a letter with a diacritic then remove it`(original: String, expected: String) {
assertThat(original.removeDiacriticsFromLetters()).isEqualTo(expected)
}

@ParameterizedTest(name = "Character: {0}")
@ValueSource(
strings = [
"æ", "ł", "œ", "ø", "ß", "Æ", "Ł", "Œ", "Ø"
"àáâäæãåā",
"çćč",
"èéêëēėę",
"îïíīįì",
"ł",
"ñń",
"ôöòóœøōõ",
"ßśš",
"ûüùúū",
"ÿ",
"žźż",
"ÀÁÂÄÆÃÅĀ",
"ÇĆČ",
"ÈÉÊËĒĖĘ",
"ÎÏÍĪĮÌ",
"Ł",
"ÑŃ",
"ÔÖÒÓŒØŌÕ",
"ŚŠ",
"ÛÜÙÚŪ",
"Ÿ",
"ŽŹŻ"
]
)
fun `Given a ligature or letter with stroke then keep it unchanged`(original: String) {
assertThat(original.removeDiacriticsFromLetters()).isEqualTo(original)
}

@Test
fun `Given a string containing`() {
assertThat("ÅÄÖāăąēîïĩíĝġńñšŝśûůŷ".removeDiacriticsFromLetters()).isEqualTo("AAOaaaeiiiiggnnsssuuy")
fun `Given a letter with a diacritic then remove it`(original: String) {
assertThat(original.matches("[A-Za-z]*".regExIgnoringDiacriticsAndStrokesOnLetters())).isTrue
}
}
64 changes: 41 additions & 23 deletions ktlint/src/main/kotlin/com/pinterest/ktlint/internal/FileUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import java.nio.file.Paths
import java.nio.file.SimpleFileVisitor
import java.nio.file.attribute.BasicFileAttributes
import kotlin.system.exitProcess
import kotlin.system.measureTimeMillis
import mu.KotlinLogging

private val logger = KotlinLogging.logger {}.initKtLintKLogger()
Expand Down Expand Up @@ -66,33 +67,50 @@ internal fun FileSystem.fileSequence(
}
}

Files.walkFileTree(
rootDir,
object : SimpleFileVisitor<Path>() {
override fun visitFile(
filePath: Path,
fileAttrs: BasicFileAttributes
): FileVisitResult {
if (negatedPathMatchers.none { it.matches(filePath) } &&
pathMatchers.any { it.matches(filePath) }
) {
result.add(filePath)
logger.debug {
"""
Start walkFileTree for rootDir: '$rootDir'
include:
${pathMatchers.map { " - $it" }}
exlcude:
${negatedPathMatchers.map { " - $it" }}
""".trimIndent()
}
val duration = measureTimeMillis {
Files.walkFileTree(
rootDir,
object : SimpleFileVisitor<Path>() {
override fun visitFile(
filePath: Path,
fileAttrs: BasicFileAttributes
): FileVisitResult {
if (negatedPathMatchers.none { it.matches(filePath) } &&
pathMatchers.any { it.matches(filePath) }
) {
logger.debug { "- File: $filePath: Include" }
result.add(filePath)
} else {
logger.debug { "- File: $filePath: Ignore" }
}
return FileVisitResult.CONTINUE
}
return FileVisitResult.CONTINUE
}

override fun preVisitDirectory(
dirPath: Path,
dirAttr: BasicFileAttributes
): FileVisitResult {
return if (Files.isHidden(dirPath)) {
FileVisitResult.SKIP_SUBTREE
} else {
FileVisitResult.CONTINUE
override fun preVisitDirectory(
dirPath: Path,
dirAttr: BasicFileAttributes
): FileVisitResult {
return if (Files.isHidden(dirPath)) {
logger.debug { "- Dir: $dirPath: Ignore" }
FileVisitResult.SKIP_SUBTREE
} else {
logger.debug { "- Dir: $dirPath: Traverse" }
FileVisitResult.CONTINUE
}
}
}
}
)
)
}
logger.debug { "Results: include ${result.count()} files in $duration ms" }

return result.asSequence()
}
Expand Down

0 comments on commit 55ce3e2

Please sign in to comment.