From d4cd2a868bdfbe181898cc3045477bda29a6cf19 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Thu, 19 Dec 2024 21:09:21 +0100 Subject: [PATCH] Better solution for reference candidate --- .../aisec/cpg/frontends/Language.kt | 26 +++++++++++++++++++ .../aisec/cpg/passes/SymbolResolver.kt | 19 +++++++++++--- .../aisec/cpg/frontends/java/JavaLanguage.kt | 24 +++++++++++++++++ .../aisec/cpg/passes/JavaExtraPass.kt | 1 + 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/Language.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/Language.kt index 6192a7f1308..66d53d71d9b 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/Language.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/Language.kt @@ -33,12 +33,15 @@ import com.fasterxml.jackson.databind.annotation.JsonSerialize import de.fraunhofer.aisec.cpg.* import de.fraunhofer.aisec.cpg.graph.Name import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.edges.ast.TemplateArguments import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.* import de.fraunhofer.aisec.cpg.graph.unknownType +import de.fraunhofer.aisec.cpg.helpers.Util import de.fraunhofer.aisec.cpg.passes.SymbolResolver import java.io.File import kotlin.reflect.KClass @@ -361,6 +364,29 @@ abstract class Language> : Node() { Pair(list.toSet(), CallResolutionResult.SuccessKind.SUCCESSFUL) } } + + /** + * This function returns the best viable declaration when resolving a [Reference]. The + * candidates to chose from are stored in [Reference.candidates] In most cases the languages can + * keep the default implementation, which only returns a declaration, if the list contains one + * single item. Otherwise, we have an ambiguous result and cannot determine the result with + * certainty. + * + * If we encounter an ambiguous result, a warning is issued. + */ + open fun bestViableReferenceCandidate(ref: Reference): Declaration? { + return if (ref.candidates.size > 1) { + Util.warnWithFileLocation( + ref, + log, + "Resolution of reference {} was ambiguous, cannot set refersTo correctly.", + ref.name + ) + null + } else { + ref.candidates.singleOrNull() + } + } } /** diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index c544e77f5ac..dd00dcfa8ea 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -193,6 +193,16 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { protected fun handleReference(currentClass: RecordDeclaration?, ref: Reference) { val language = ref.language + if (language == null) { + Util.warnWithFileLocation( + ref, + log, + "Language for reference {} is empty, we cannot resolve this reference correctly.", + ref.name + ) + return + } + // Ignore references to anonymous identifiers, if the language supports it (e.g., the _ // identifier in Go) if ( @@ -211,10 +221,11 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // resolution, but in future this will also be used in resolving regular references. ref.candidates = scopeManager.lookupSymbolByNameOfNode(ref).toSet() - // Preparation for a future without legacy call resolving. Taking the first candidate is not - // ideal since we are running into an issue with function pointers here (see workaround - // below). - var wouldResolveTo = ref.candidates.firstOrNull() + // We need to choose the best viable candidate out of the ones we have for our reference. + // Hopefully we have only one, but there might be instances where more than one is a valid + // candidate. We let the language have a chance at overriding the default behaviour (which + // takes only a single one). + var wouldResolveTo = language.bestViableReferenceCandidate(ref) // For now, we need to ignore reference expressions that are directly embedded into call // expressions, because they are the "callee" property. In the future, we will use this diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt index f0f6497559e..070dcf4aaf4 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/frontends/java/JavaLanguage.kt @@ -28,9 +28,14 @@ package de.fraunhofer.aisec.cpg.frontends.java import com.fasterxml.jackson.annotation.JsonIgnore import de.fraunhofer.aisec.cpg.ScopeManager import de.fraunhofer.aisec.cpg.frontends.* +import de.fraunhofer.aisec.cpg.graph.declarations.Declaration +import de.fraunhofer.aisec.cpg.graph.declarations.FunctionDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration +import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration import de.fraunhofer.aisec.cpg.graph.statements.expressions.BinaryOperator +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference import de.fraunhofer.aisec.cpg.graph.types.* import de.fraunhofer.aisec.cpg.passes.JavaCallResolverHelper import kotlin.reflect.KClass @@ -115,6 +120,25 @@ open class JavaLanguage : scopeManager: ScopeManager, ) = JavaCallResolverHelper.handleSuperExpression(memberExpression, curClass, scopeManager) + /** + * This function handles some specifics of the Java language when choosing a reference target + * before invoking [Language.bestViableReferenceCandidate]. + */ + override fun bestViableReferenceCandidate(ref: Reference): Declaration? { + // Java allows to have "ambiguous" symbol when importing static fields and methods. + // Therefore, it can be that we both import a field and a method with the same name. We + // therefore do some additional filtering of the candidates here, before handling it. + if (ref.candidates.size > 1) { + if (ref.resolutionHelper is CallExpression) { + ref.candidates = ref.candidates.filter { it is FunctionDeclaration }.toSet() + } else { + ref.candidates = ref.candidates.filter { it is VariableDeclaration }.toSet() + } + } + + return super.bestViableReferenceCandidate(ref) + } + override val startCharacter = '<' override val endCharacter = '>' override val receiverName: String diff --git a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExtraPass.kt b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExtraPass.kt index 7984ca5dfca..c7a518e1f21 100644 --- a/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExtraPass.kt +++ b/cpg-language-java/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/JavaExtraPass.kt @@ -73,6 +73,7 @@ class JavaExtraPass(ctx: TranslationContext) : TranslationUnitPass(ctx) { newReference(type.name.fqn(me.name.localName), type = me.type) .codeAndLocationFrom(me) .apply { isStaticAccess = true } + ref.language = me.language walker.replace(parent, me, ref) } }