Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use StringConcatFactory for string concatenation on JDK 9+ #12929

Merged
merged 3 commits into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 88 additions & 9 deletions compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1063,30 +1063,109 @@ trait BCodeBodyBuilder extends BCodeSkelBuilder {
}
}

/* Generate string concatenation
*
* On JDK 8: create and append using `StringBuilder`
* On JDK 9+: use `invokedynamic` with `StringConcatFactory`
*/
def genStringConcat(tree: Tree): BType = {
lineNumber(tree)
liftStringConcat(tree) match {
// Optimization for expressions of the form "" + x. We can avoid the StringBuilder.
// Optimization for expressions of the form "" + x
case List(Literal(Constant("")), arg) =>
genLoad(arg, ObjectReference)
genCallMethod(defn.String_valueOf_Object, InvokeStyle.Static)

case concatenations =>
bc.genStartConcat
for (elem <- concatenations) {
val loadedElem = elem match {
val concatArguments = concatenations.view
.filter {
case Literal(Constant("")) => false // empty strings are no-ops in concatenation
case _ => true
}
.map {
case Apply(boxOp, value :: Nil) if Erasure.Boxing.isBox(boxOp.symbol) && boxOp.symbol.denot.owner != defn.UnitModuleClass =>
// Eliminate boxing of primitive values. Boxing is introduced by erasure because
// there's only a single synthetic `+` method "added" to the string class.
value
case other => other
}
.toList

// `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower
if (classfileVersion < asm.Opcodes.V9) {

// Estimate capacity needed for the string builder
val approxBuilderSize = concatArguments.view.map {
case Literal(Constant(s: String)) => s.length
case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length
case _ => 0
}.sum
bc.genNewStringBuilder(approxBuilderSize)

for (elem <- concatArguments) {
val elemType = tpeTK(elem)
genLoad(elem, elemType)
bc.genStringBuilderAppend(elemType)
}
bc.genStringBuilderEnd
} else {

/* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If
* the string concatenation is longer (unlikely), we spill into multiple calls
*/
val MaxIndySlots = 200
val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument
val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant

val recipe = new StringBuilder()
val argTypes = Seq.newBuilder[asm.Type]
val constVals = Seq.newBuilder[String]
var totalArgSlots = 0
var countConcats = 1 // ie. 1 + how many times we spilled

for (elem <- concatArguments) {
val tpe = tpeTK(elem)
val elemSlots = tpe.size

// Unlikely spill case
if (totalArgSlots + elemSlots >= MaxIndySlots) {
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())
countConcats += 1
totalArgSlots = 0
recipe.setLength(0)
argTypes.clear()
constVals.clear()
}

case _ => elem
elem match {
case Literal(Constant(s: String)) =>
if (s.contains(TagArg) || s.contains(TagConst)) {
totalArgSlots += elemSlots
recipe.append(TagConst)
constVals += s
} else {
recipe.append(s)
}

case other =>
totalArgSlots += elemSlots
recipe.append(TagArg)
val tpe = tpeTK(elem)
argTypes += tpe.toASMType
genLoad(elem, tpe)
}
}
bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result())

// If we spilled, generate one final concat
if (countConcats > 1) {
bc.genIndyStringConcat(
TagArg.toString * countConcats,
Seq.fill(countConcats)(StringRef.toASMType),
Seq.empty
)
}
val elemType = tpeTK(loadedElem)
genLoad(loadedElem, elemType)
bc.genConcat(elemType)
}
bc.genEndConcat
}
StringRef
}
Expand Down
42 changes: 35 additions & 7 deletions compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala
Original file line number Diff line number Diff line change
Expand Up @@ -224,24 +224,27 @@ trait BCodeIdiomatic {

} // end of method genPrimitiveShift()

/*
/* Creates a new `StringBuilder` instance with the requested capacity
*
* can-multi-thread
*/
final def genStartConcat: Unit = {
final def genNewStringBuilder(size: Int): Unit = {
jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName)
jmethod.visitInsn(Opcodes.DUP)
jmethod.visitLdcInsn(Integer.valueOf(size))
invokespecial(
JavaStringBuilderClassName,
INSTANCE_CONSTRUCTOR_NAME,
"()V",
"(I)V",
itf = false
)
}

/*
/* Issue a call to `StringBuilder#append` for the right element type
*
* can-multi-thread
*/
def genConcat(elemType: BType): Unit = {
final def genStringBuilderAppend(elemType: BType): Unit = {
val paramType = elemType match {
case ct: ClassBType if ct.isSubtypeOf(StringRef) => StringRef
case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef) => jlStringBufferRef
Expand All @@ -257,13 +260,38 @@ trait BCodeIdiomatic {
invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor)
}

/*
/* Extract the built `String` from the `StringBuilder`
*
* can-multi-thread
*/
final def genEndConcat: Unit = {
final def genStringBuilderEnd: Unit = {
invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;")
}

/* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants`
* (only works for JDK 9+)
*
* can-multi-thread
*/
final def genIndyStringConcat(
recipe: String,
argTypes: Seq[asm.Type],
constants: Seq[String]
): Unit = {
jmethod.visitInvokeDynamicInsn(
"makeConcatWithConstants",
asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*),
new asm.Handle(
asm.Opcodes.H_INVOKESTATIC,
"java/lang/invoke/StringConcatFactory",
"makeConcatWithConstants",
"(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;",
false
),
(recipe +: constants):_*
)
}

/*
* Emits one or more conversion instructions based on the types given as arguments.
*
Expand Down
4 changes: 2 additions & 2 deletions compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ class StringConcatTest extends DottyBytecodeTest {
}

assertEquals(List(
"<init>()V",
"<init>(I)V",
"toString()Ljava/lang/String;",
"append(Ljava/lang/String;)Ljava/lang/StringBuilder;",
"append(Ljava/lang/Object;)Ljava/lang/StringBuilder;",
Expand All @@ -82,7 +82,7 @@ class StringConcatTest extends DottyBytecodeTest {
)

assertEquals(List(
"<init>()V",
"<init>(I)V",
"toString()Ljava/lang/String;",
"append(Ljava/lang/String;)Ljava/lang/StringBuilder;",
"append(Ljava/lang/String;)Ljava/lang/StringBuilder;",
Expand Down
Binary file added tests/run/StringConcat.check
Binary file not shown.
79 changes: 79 additions & 0 deletions tests/run/StringConcat.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
@main def Test() = {

// This should generally obey 15.18.1. of the JLS (String Concatenation Operator +)
def concatenatingVariousTypes(): String = {
val str: String = "some string"
val sb: StringBuffer = new StringBuffer("some stringbuffer")
val cs: CharSequence = java.nio.CharBuffer.allocate(50).append("charsequence")
val i: Int = 123456789
val s: Short = 345
val b: Byte = 12
val z: Boolean = true
val f: Float = 3.14f
val j: Long = 98762147483647L
val d: Double = 3.1415d

"String " + str + "\n" +
"StringBuffer " + sb + "\n" +
"CharSequence " + cs + "\n" +
"Int " + i + "\n" +
"Short " + s + "\n" +
"Byte " + b + "\n" +
"Boolean " + z + "\n" +
"Float " + f + "\n" +
"Long " + j + "\n" +
"Double " + d + "\n"
}
// The characters `\u0001` and `\u0002` play a special role in `StringConcatFactory`
def concatenationInvolvingSpecialCharacters(): String = {
val s1 = "Qux"
val s2 = "Quux"

s"Foo \u0001 $s1 Bar \u0002 $s2 Baz"
}
// Concatenation involving more than 200 elements
def largeConcatenation(): String = {
val s00 = "s00"
val s01 = "s01"
val s02 = "s02"
val s03 = "s03"
val s04 = "s04"
val s05 = "s05"
val s06 = "s06"
val s07 = "s07"
val s08 = "s08"

// 24 rows follow
((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") +
(s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n")) +
((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") +
(s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" +
s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n"))
}
println("----------")
println(concatenatingVariousTypes())
println("----------")
println(concatenationInvolvingSpecialCharacters())
println("----------")
println(largeConcatenation())
println("----------")
}