From df45a0e3f966dd24feb0fece101dfa469ff24d1b Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 4 Nov 2024 13:31:38 +0000 Subject: [PATCH] [red-knot] Add MRO resolution for classes (#14027) --- Cargo.lock | 7 +- Cargo.toml | 1 + crates/red_knot_python_semantic/Cargo.toml | 6 +- .../resources/mdtest/assignment/augmented.md | 2 +- .../resources/mdtest/attributes.md | 35 ++ .../resources/mdtest/binary/instances.md | 6 +- .../resources/mdtest/mro.md | 409 ++++++++++++++ .../mdtest/scopes/moduletype_attrs.md | 9 +- .../resources/mdtest/stubs/class.md | 9 +- crates/red_knot_python_semantic/src/types.rs | 151 +++-- .../src/types/infer.rs | 137 +++-- .../red_knot_python_semantic/src/types/mro.rs | 518 ++++++++++++++++++ crates/ruff_benchmark/benches/red_knot.rs | 1 + 13 files changed, 1171 insertions(+), 120 deletions(-) create mode 100644 crates/red_knot_python_semantic/resources/mdtest/mro.md create mode 100644 crates/red_knot_python_semantic/src/types/mro.rs diff --git a/Cargo.lock b/Cargo.lock index 4574e7cb9f5c99..8c287a07cb7597 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1162,12 +1162,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.0", "serde", ] @@ -2112,6 +2112,7 @@ dependencies = [ "countme", "dir-test", "hashbrown 0.15.0", + "indexmap", "insta", "itertools 0.13.0", "memchr", diff --git a/Cargo.toml b/Cargo.toml index d50490971156ef..1002410da691ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -81,6 +81,7 @@ hashbrown = { version = "0.15.0", default-features = false, features = [ ignore = { version = "0.4.22" } imara-diff = { version = "0.1.5" } imperative = { version = "1.0.4" } +indexmap = {version = "2.6.0" } indicatif = { version = "0.17.8" } indoc = { version = "2.0.4" } insta = { version = "1.35.1" } diff --git a/crates/red_knot_python_semantic/Cargo.toml b/crates/red_knot_python_semantic/Cargo.toml index 27b7684544ec8b..ab4d63ddb225fa 100644 --- a/crates/red_knot_python_semantic/Cargo.toml +++ b/crates/red_knot_python_semantic/Cargo.toml @@ -24,7 +24,8 @@ bitflags = { workspace = true } camino = { workspace = true } compact_str = { workspace = true } countme = { workspace = true } -itertools = { workspace = true} +indexmap = { workspace = true } +itertools = { workspace = true } ordermap = { workspace = true } salsa = { workspace = true } thiserror = { workspace = true } @@ -43,10 +44,9 @@ red_knot_test = { workspace = true } red_knot_vendored = { workspace = true } anyhow = { workspace = true } -dir-test = {workspace = true} +dir-test = { workspace = true } insta = { workspace = true } tempfile = { workspace = true } [lints] workspace = true - diff --git a/crates/red_knot_python_semantic/resources/mdtest/assignment/augmented.md b/crates/red_knot_python_semantic/resources/mdtest/assignment/augmented.md index dd96fdc8193be0..1e28506e1f5d3b 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/assignment/augmented.md +++ b/crates/red_knot_python_semantic/resources/mdtest/assignment/augmented.md @@ -85,7 +85,7 @@ f = Foo() # that `Foo.__iadd__` may be unbound as additional context. f += "Hello, world!" -reveal_type(f) # revealed: int | @Todo +reveal_type(f) # revealed: int | Unknown ``` ## Partially bound with `__add__` diff --git a/crates/red_knot_python_semantic/resources/mdtest/attributes.md b/crates/red_knot_python_semantic/resources/mdtest/attributes.md index 3278a7e48a2d79..f0ecdff1d551c6 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/attributes.md +++ b/crates/red_knot_python_semantic/resources/mdtest/attributes.md @@ -18,3 +18,38 @@ else: reveal_type(C.x) # revealed: Literal[1, 2] ``` + +## Inherited attributes + +```py +class A: + X = "foo" + +class B(A): ... +class C(B): ... + +reveal_type(C.X) # revealed: Literal["foo"] +``` + +## Inherited attributes (multiple inheritance) + +```py +class O: ... + +class F(O): + X = 56 + +class E(O): + X = 42 + +class D(O): ... +class C(D, F): ... +class B(E, D): ... +class A(B, C): ... + +# revealed: tuple[Literal[A], Literal[B], Literal[E], Literal[C], Literal[D], Literal[F], Literal[O], Literal[object]] +reveal_type(A.__mro__) + +# `E` is earlier in the MRO than `F`, so we should use the type of `E.X` +reveal_type(A.X) # revealed: Literal[42] +``` diff --git a/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md b/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md index c2f8f2f1a844ca..efc619132cbba5 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md +++ b/crates/red_knot_python_semantic/resources/mdtest/binary/instances.md @@ -202,11 +202,7 @@ reveal_type(A() + B()) # revealed: MyString # N.B. Still a subtype of `A`, even though `A` does not appear directly in the class's `__bases__` class C(B): ... -# TODO: we currently only understand direct subclasses as subtypes of the superclass. -# We need to iterate through the full MRO rather than just the class's bases; -# if we do, we'll understand `C` as a subtype of `A`, and correctly understand this as being -# `MyString` rather than `str` -reveal_type(A() + C()) # revealed: str +reveal_type(A() + C()) # revealed: MyString ``` ## Reflected precedence 2 diff --git a/crates/red_knot_python_semantic/resources/mdtest/mro.md b/crates/red_knot_python_semantic/resources/mdtest/mro.md new file mode 100644 index 00000000000000..2dc5ff0d5d5617 --- /dev/null +++ b/crates/red_knot_python_semantic/resources/mdtest/mro.md @@ -0,0 +1,409 @@ +# Method Resolution Order tests + +Tests that assert that we can infer the correct type for a class's `__mro__` attribute. + +This attribute is rarely accessed directly at runtime. However, it's extremely important for *us* to +know the precise possible values of a class's Method Resolution Order, or we won't be able to infer +the correct type of attributes accessed from instances. + +For documentation on method resolution orders, see: + +- +- + +## No bases + +```py +class C: ... + +reveal_type(C.__mro__) # revealed: tuple[Literal[C], Literal[object]] +``` + +## The special case: `object` itself + +```py +reveal_type(object.__mro__) # revealed: tuple[Literal[object]] +``` + +## Explicit inheritance from `object` + +```py +class C(object): ... + +reveal_type(C.__mro__) # revealed: tuple[Literal[C], Literal[object]] +``` + +## Explicit inheritance from non-`object` single base + +```py +class A: ... +class B(A): ... + +reveal_type(B.__mro__) # revealed: tuple[Literal[B], Literal[A], Literal[object]] +``` + +## Linearization of multiple bases + +```py +class A: ... +class B: ... +class C(A, B): ... + +reveal_type(C.__mro__) # revealed: tuple[Literal[C], Literal[A], Literal[B], Literal[object]] +``` + +## Complex diamond inheritance (1) + +This is "ex_2" from + +```py +class O: ... +class X(O): ... +class Y(O): ... +class A(X, Y): ... +class B(Y, X): ... + +reveal_type(A.__mro__) # revealed: tuple[Literal[A], Literal[X], Literal[Y], Literal[O], Literal[object]] +reveal_type(B.__mro__) # revealed: tuple[Literal[B], Literal[Y], Literal[X], Literal[O], Literal[object]] +``` + +## Complex diamond inheritance (2) + +This is "ex_5" from + +```py +class O: ... +class F(O): ... +class E(O): ... +class D(O): ... +class C(D, F): ... +class B(D, E): ... +class A(B, C): ... + +# revealed: tuple[Literal[C], Literal[D], Literal[F], Literal[O], Literal[object]] +reveal_type(C.__mro__) +# revealed: tuple[Literal[B], Literal[D], Literal[E], Literal[O], Literal[object]] +reveal_type(B.__mro__) +# revealed: tuple[Literal[A], Literal[B], Literal[C], Literal[D], Literal[E], Literal[F], Literal[O], Literal[object]] +reveal_type(A.__mro__) +``` + +## Complex diamond inheritance (3) + +This is "ex_6" from + +```py +class O: ... +class F(O): ... +class E(O): ... +class D(O): ... +class C(D, F): ... +class B(E, D): ... +class A(B, C): ... + +# revealed: tuple[Literal[C], Literal[D], Literal[F], Literal[O], Literal[object]] +reveal_type(C.__mro__) +# revealed: tuple[Literal[B], Literal[E], Literal[D], Literal[O], Literal[object]] +reveal_type(B.__mro__) +# revealed: tuple[Literal[A], Literal[B], Literal[E], Literal[C], Literal[D], Literal[F], Literal[O], Literal[object]] +reveal_type(A.__mro__) +``` + +## Complex diamond inheritance (4) + +This is "ex_9" from + +```py +class O: ... +class A(O): ... +class B(O): ... +class C(O): ... +class D(O): ... +class E(O): ... +class K1(A, B, C): ... +class K2(D, B, E): ... +class K3(D, A): ... +class Z(K1, K2, K3): ... + +# revealed: tuple[Literal[K1], Literal[A], Literal[B], Literal[C], Literal[O], Literal[object]] +reveal_type(K1.__mro__) +# revealed: tuple[Literal[K2], Literal[D], Literal[B], Literal[E], Literal[O], Literal[object]] +reveal_type(K2.__mro__) +# revealed: tuple[Literal[K3], Literal[D], Literal[A], Literal[O], Literal[object]] +reveal_type(K3.__mro__) +# revealed: tuple[Literal[Z], Literal[K1], Literal[K2], Literal[K3], Literal[D], Literal[A], Literal[B], Literal[C], Literal[E], Literal[O], Literal[object]] +reveal_type(Z.__mro__) +``` + +## Inheritance from `Unknown` + +```py +from does_not_exist import DoesNotExist # error: [unresolved-import] + +class A(DoesNotExist): ... +class B: ... +class C: ... +class D(A, B, C): ... +class E(B, C): ... +class F(E, A): ... + +reveal_type(A.__mro__) # revealed: tuple[Literal[A], Unknown, Literal[object]] +reveal_type(D.__mro__) # revealed: tuple[Literal[D], Literal[A], Unknown, Literal[B], Literal[C], Literal[object]] +reveal_type(E.__mro__) # revealed: tuple[Literal[E], Literal[B], Literal[C], Literal[object]] +reveal_type(F.__mro__) # revealed: tuple[Literal[F], Literal[E], Literal[B], Literal[C], Literal[A], Unknown, Literal[object]] +``` + +## `__bases__` lists that cause errors at runtime + +If the class's `__bases__` cause an exception to be raised at runtime and therefore the class +creation to fail, we infer the class's `__mro__` as being `[, Unknown, object]`: + +```py +# error: [inconsistent-mro] "Cannot create a consistent method resolution order (MRO) for class `Foo` with bases list `[, ]`" +class Foo(object, int): ... + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] + +class Bar(Foo): ... + +reveal_type(Bar.__mro__) # revealed: tuple[Literal[Bar], Literal[Foo], Unknown, Literal[object]] + +# This is the `TypeError` at the bottom of "ex_2" +# in the examples at +class O: ... +class X(O): ... +class Y(O): ... +class A(X, Y): ... +class B(Y, X): ... + +reveal_type(A.__mro__) # revealed: tuple[Literal[A], Literal[X], Literal[Y], Literal[O], Literal[object]] +reveal_type(B.__mro__) # revealed: tuple[Literal[B], Literal[Y], Literal[X], Literal[O], Literal[object]] + +# error: [inconsistent-mro] "Cannot create a consistent method resolution order (MRO) for class `Z` with bases list `[, ]`" +class Z(A, B): ... + +reveal_type(Z.__mro__) # revealed: tuple[Literal[Z], Unknown, Literal[object]] + +class AA(Z): ... + +reveal_type(AA.__mro__) # revealed: tuple[Literal[AA], Literal[Z], Unknown, Literal[object]] +``` + +## `__bases__` includes a `Union` + +We don't support union types in a class's bases; a base must resolve to a single `ClassLiteralType`. +If we find a union type in a class's bases, we infer the class's `__mro__` as being +`[, Unknown, object]`, the same as for MROs that cause errors at runtime. + +```py +def returns_bool() -> bool: + return True + +class A: ... +class B: ... + +if returns_bool(): + x = A +else: + x = B + +reveal_type(x) # revealed: Literal[A, B] + +# error: 11 [invalid-base] "Invalid class base with type `Literal[A, B]` (all bases must be a class, `Any`, `Unknown` or `Todo`)" +class Foo(x): ... + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] +``` + +## `__bases__` includes multiple `Union`s + +```py +def returns_bool() -> bool: + return True + +class A: ... +class B: ... +class C: ... +class D: ... + +if returns_bool(): + x = A +else: + x = B + +if returns_bool(): + y = C +else: + y = D + +reveal_type(x) # revealed: Literal[A, B] +reveal_type(y) # revealed: Literal[C, D] + +# error: 11 [invalid-base] "Invalid class base with type `Literal[A, B]` (all bases must be a class, `Any`, `Unknown` or `Todo`)" +# error: 14 [invalid-base] "Invalid class base with type `Literal[C, D]` (all bases must be a class, `Any`, `Unknown` or `Todo`)" +class Foo(x, y): ... + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] +``` + +## `__bases__` lists that cause errors... now with `Union`s + +```py +def returns_bool() -> bool: + return True + +class O: ... +class X(O): ... +class Y(O): ... + +if bool(): + foo = Y +else: + foo = object + +# error: 21 [invalid-base] "Invalid class base with type `Literal[Y, object]` (all bases must be a class, `Any`, `Unknown` or `Todo`)" +class PossibleError(foo, X): ... + +reveal_type(PossibleError.__mro__) # revealed: tuple[Literal[PossibleError], Unknown, Literal[object]] + +class A(X, Y): ... + +reveal_type(A.__mro__) # revealed: tuple[Literal[A], Literal[X], Literal[Y], Literal[O], Literal[object]] + +if returns_bool(): + class B(X, Y): ... + +else: + class B(Y, X): ... + +# revealed: tuple[Literal[B], Literal[X], Literal[Y], Literal[O], Literal[object]] | tuple[Literal[B], Literal[Y], Literal[X], Literal[O], Literal[object]] +reveal_type(B.__mro__) + +# error: 12 [invalid-base] "Invalid class base with type `Literal[B, B]` (all bases must be a class, `Any`, `Unknown` or `Todo`)" +class Z(A, B): ... + +reveal_type(Z.__mro__) # revealed: tuple[Literal[Z], Unknown, Literal[object]] +``` + +## `__bases__` lists with duplicate bases + +```py +class Foo(str, str): ... # error: 16 [duplicate-base] "Duplicate base class `str`" + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] + +class Spam: ... +class Eggs: ... +class Ham( + Spam, + Eggs, + Spam, # error: [duplicate-base] "Duplicate base class `Spam`" + Eggs, # error: [duplicate-base] "Duplicate base class `Eggs`" +): ... + +reveal_type(Ham.__mro__) # revealed: tuple[Literal[Ham], Unknown, Literal[object]] + +class Mushrooms: ... +class Omelette(Spam, Eggs, Mushrooms, Mushrooms): ... # error: [duplicate-base] + +reveal_type(Omelette.__mro__) # revealed: tuple[Literal[Omelette], Unknown, Literal[object]] +``` + +## `__bases__` lists with duplicate `Unknown` bases + +```py +# error: [unresolved-import] +# error: [unresolved-import] +from does_not_exist import unknown_object_1, unknown_object_2 + +reveal_type(unknown_object_1) # revealed: Unknown +reveal_type(unknown_object_2) # revealed: Unknown + +# We *should* emit an error here to warn the user that we have no idea +# what the MRO of this class should really be. +# However, we don't complain about "duplicate base classes" here, +# even though two classes are both inferred as being `Unknown`. +# +# (TODO: should we revisit this? Does it violate the gradual guarantee? +# Should we just silently infer `[Foo, Unknown, object]` as the MRO here +# without emitting any error at all? Not sure...) +# +# error: [inconsistent-mro] "Cannot create a consistent method resolution order (MRO) for class `Foo` with bases list `[Unknown, Unknown]`" +class Foo(unknown_object_1, unknown_object_2): ... + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] +``` + +## Unrelated objects inferred as `Any`/`Unknown` do not have special `__mro__` attributes + +```py +from does_not_exist import unknown_object # error: [unresolved-import] + +reveal_type(unknown_object) # revealed: Unknown +reveal_type(unknown_object.__mro__) # revealed: Unknown +``` + +## Classes that inherit from themselves + +These are invalid, but we need to be able to handle them gracefully without panicking. + +```py path=a.pyi +class Foo(Foo): ... # error: [cyclic-class-def] + +reveal_type(Foo) # revealed: Literal[Foo] +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] + +class Bar: ... +class Baz: ... +class Boz(Bar, Baz, Boz): ... # error: [cyclic-class-def] + +reveal_type(Boz) # revealed: Literal[Boz] +reveal_type(Boz.__mro__) # revealed: tuple[Literal[Boz], Unknown, Literal[object]] +``` + +## Classes with indirect cycles in their MROs + +These are similarly unlikely, but we still shouldn't crash: + +```py path=a.pyi +class Foo(Bar): ... # error: [cyclic-class-def] +class Bar(Baz): ... # error: [cyclic-class-def] +class Baz(Foo): ... # error: [cyclic-class-def] + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] +reveal_type(Bar.__mro__) # revealed: tuple[Literal[Bar], Unknown, Literal[object]] +reveal_type(Baz.__mro__) # revealed: tuple[Literal[Baz], Unknown, Literal[object]] +``` + +## Classes with cycles in their MROs, and multiple inheritance + +```py path=a.pyi +class Spam: ... +class Foo(Bar): ... # error: [cyclic-class-def] +class Bar(Baz): ... # error: [cyclic-class-def] +class Baz(Foo, Spam): ... # error: [cyclic-class-def] + +reveal_type(Foo.__mro__) # revealed: tuple[Literal[Foo], Unknown, Literal[object]] +reveal_type(Bar.__mro__) # revealed: tuple[Literal[Bar], Unknown, Literal[object]] +reveal_type(Baz.__mro__) # revealed: tuple[Literal[Baz], Unknown, Literal[object]] +``` + +## Classes with cycles in their MRO, and a sub-graph + +```py path=a.pyi +class FooCycle(BarCycle): ... # error: [cyclic-class-def] +class Foo: ... +class BarCycle(FooCycle): ... # error: [cyclic-class-def] +class Bar(Foo): ... + +# TODO: can we avoid emitting the errors for these? +# The classes have cyclic superclasses, +# but are not themselves cyclic... +class Baz(Bar, BarCycle): ... # error: [cyclic-class-def] +class Spam(Baz): ... # error: [cyclic-class-def] + +reveal_type(FooCycle.__mro__) # revealed: tuple[Literal[FooCycle], Unknown, Literal[object]] +reveal_type(BarCycle.__mro__) # revealed: tuple[Literal[BarCycle], Unknown, Literal[object]] +reveal_type(Baz.__mro__) # revealed: tuple[Literal[Baz], Unknown, Literal[object]] +reveal_type(Spam.__mro__) # revealed: tuple[Literal[Spam], Unknown, Literal[object]] +``` diff --git a/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md b/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md index f854018156df3b..0763c129f0d15b 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md +++ b/crates/red_knot_python_semantic/resources/mdtest/scopes/moduletype_attrs.md @@ -58,11 +58,10 @@ reveal_type(typing.__name__) # revealed: str reveal_type(typing.__init__) # revealed: Literal[__init__] # These come from `builtins.object`, not `types.ModuleType`: -# TODO: we don't currently understand `types.ModuleType` as inheriting from `object`; -# these should not reveal `Unknown`: -reveal_type(typing.__eq__) # revealed: Unknown -reveal_type(typing.__class__) # revealed: Unknown -reveal_type(typing.__module__) # revealed: Unknown +reveal_type(typing.__eq__) # revealed: Literal[__eq__] + +# TODO: understand properties +reveal_type(typing.__class__) # revealed: Literal[__class__] # TODO: needs support for attribute access on instances, properties and generics; # should be `dict[str, Any]` diff --git a/crates/red_knot_python_semantic/resources/mdtest/stubs/class.md b/crates/red_knot_python_semantic/resources/mdtest/stubs/class.md index fc5ddc8b43013b..63d60c4d5dac1a 100644 --- a/crates/red_knot_python_semantic/resources/mdtest/stubs/class.md +++ b/crates/red_knot_python_semantic/resources/mdtest/stubs/class.md @@ -6,7 +6,12 @@ In type stubs, classes can reference themselves in their base class definitions. `typeshed`, we have `class str(Sequence[str]): ...`. ```py path=a.pyi -class C(C): ... +class Foo[T]: ... -reveal_type(C) # revealed: Literal[C] +# TODO: actually is subscriptable +# error: [non-subscriptable] +class Bar(Foo[Bar]): ... + +reveal_type(Bar) # revealed: Literal[Bar] +reveal_type(Bar.__mro__) # revealed: tuple[Literal[Bar], Unknown, Literal[object]] ``` diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index fd4d34e96843d9..1703518d0ab0f4 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -1,6 +1,9 @@ +use mro::{ClassBase, Mro, MroError, MroIterator}; use ruff_db::files::File; use ruff_python_ast as ast; +use itertools::Itertools; + use crate::module_resolver::file_to_module; use crate::semantic_index::ast_ids::HasScopedAstId; use crate::semantic_index::definition::{Definition, DefinitionKind}; @@ -26,6 +29,7 @@ mod builder; mod diagnostic; mod display; mod infer; +mod mro; mod narrow; mod unpacker; @@ -1808,12 +1812,11 @@ pub struct ClassType<'db> { known: Option, } +#[salsa::tracked] impl<'db> ClassType<'db> { + /// Return `true` if this class represents `known_class` pub fn is_known(self, db: &'db dyn Db, known_class: KnownClass) -> bool { - match self.known(db) { - Some(known) => known == known_class, - None => false, - } + self.known(db) == Some(known_class) } /// Return true if this class is a standard library type with given module name and name. @@ -1824,70 +1827,122 @@ impl<'db> ClassType<'db> { }) } - /// Return an iterator over the types of this class's bases. + /// Return an iterator over the inferred types of this class's *explicit* bases. /// - /// # Panics: - /// If `definition` is not a `DefinitionKind::Class`. - pub fn bases(&self, db: &'db dyn Db) -> impl Iterator> { + /// Note that any class (except for `object`) that has no explicit + /// bases will implicitly inherit from `object` at runtime. Nonetheless, + /// this method does *not* include `object` in the bases it iterates over. + fn explicit_bases(self, db: &'db dyn Db) -> impl Iterator> { let definition = self.definition(db); - let DefinitionKind::Class(class_stmt_node) = definition.kind(db) else { - panic!("Class type definition must have DefinitionKind::Class"); - }; - class_stmt_node + let class_stmt = self.node(db); + let has_type_params = class_stmt.type_params.is_some(); + + class_stmt .bases() .iter() - .map(move |base_expr: &ast::Expr| { - if class_stmt_node.type_params.is_some() { - // when we have a specialized scope, we'll look up the inference - // within that scope - let model: SemanticModel<'db> = SemanticModel::new(db, definition.file(db)); - base_expr.ty(&model) - } else { - // Otherwise, we can do the lookup based on the definition scope - definition_expression_ty(db, definition, base_expr) - } - }) + .map(move |base_node| infer_class_base_type(db, base_node, definition, has_type_params)) + } + + /// Return the original [`ast::StmtClassDef`] node associated with this class + fn node(self, db: &'db dyn Db) -> &'db ast::StmtClassDef { + match self.definition(db).kind(db) { + DefinitionKind::Class(class_stmt_node) => class_stmt_node, + _ => unreachable!("Class type definition should always have DefinitionKind::Class"), + } } + /// Attempt to resolve the [method resolution order] ("MRO") for this class. + /// If the MRO is unresolvable, return an error indicating why the class's MRO + /// cannot be accurately determined. The error returned contains a fallback MRO + /// that will be used instead for the purposes of type inference. + /// + /// The MRO is the tuple of classes that can be retrieved as the `__mro__` + /// attribute on a class at runtime. + /// + /// [method resolution order]: https://docs.python.org/3/glossary.html#term-method-resolution-order + #[salsa::tracked(return_ref)] + fn try_mro(self, db: &'db dyn Db) -> Result, MroError<'db>> { + Mro::of_class(db, self) + } + + /// Iterate over the [method resolution order] ("MRO") of the class. + /// + /// If the MRO could not be accurately resolved, this method falls back to iterating + /// over an MRO that has the class directly inheriting from `Unknown`. Use + /// [`ClassType::try_mro`] if you need to distinguish between the success and failure + /// cases rather than simply iterating over the inferred resolution order for the class. + /// + /// [method resolution order]: https://docs.python.org/3/glossary.html#term-method-resolution-order + fn iter_mro(self, db: &'db dyn Db) -> impl Iterator> { + MroIterator::new(db, self) + } + + /// Return `true` if `other` is present in this class's MRO. pub fn is_subclass_of(self, db: &'db dyn Db, other: ClassType) -> bool { - // TODO: we need to iterate over the *MRO* here, not the bases - (other == self) - || self.bases(db).any(|base| match base { - Type::ClassLiteral(base_class) => base_class == other, - // `is_subclass_of` is checking the subtype relation, in which gradual types do not - // participate, so we should not return `True` if we find `Any/Unknown` in the - // bases. - _ => false, - }) + // `is_subclass_of` is checking the subtype relation, in which gradual types do not + // participate, so we should not return `True` if we find `Any/Unknown` in the MRO. + self.iter_mro(db).contains(&ClassBase::Class(other)) } /// Returns the class member of this class named `name`. /// - /// The member resolves to a member of the class itself or any of its bases. + /// The member resolves to a member on the class itself or any of its proper superclasses. pub(crate) fn class_member(self, db: &'db dyn Db, name: &str) -> Symbol<'db> { - let member = self.own_class_member(db, name); - if !member.is_unbound() { - return member; + if name == "__mro__" { + let tuple_elements: Box<_> = self.iter_mro(db).map(Type::from).collect(); + return Symbol::Type( + Type::Tuple(TupleType::new(db, tuple_elements)), + Boundness::Bound, + ); } - self.inherited_class_member(db, name) + for superclass in self.iter_mro(db) { + match superclass { + // TODO we may instead want to record the fact that we encountered dynamic, and intersect it with + // the type found on the next "real" class. + ClassBase::Any | ClassBase::Unknown | ClassBase::Todo => { + return Type::from(superclass).member(db, name) + } + ClassBase::Class(class) => { + let member = class.own_class_member(db, name); + if !member.is_unbound() { + return member; + } + } + } + } + + Symbol::Unbound } /// Returns the inferred type of the class member named `name`. + /// + /// Returns [`Symbol::Unbound`] if `name` cannot be found in this class's scope + /// directly. Use [`ClassType::class_member`] if you require a method that will + /// traverse through the MRO until it finds the member. pub(crate) fn own_class_member(self, db: &'db dyn Db, name: &str) -> Symbol<'db> { let scope = self.body_scope(db); symbol(db, scope, name) } +} - pub(crate) fn inherited_class_member(self, db: &'db dyn Db, name: &str) -> Symbol<'db> { - for base in self.bases(db) { - let member = base.member(db, name); - if !member.is_unbound() { - return member; - } - } - - Symbol::Unbound +/// Infer the type of a node representing an explicit class base. +/// +/// For example, infer the type of `Foo` in the statement `class Bar(Foo, Baz): ...`. +fn infer_class_base_type<'db>( + db: &'db dyn Db, + base_node: &'db ast::Expr, + class_definition: Definition<'db>, + class_has_type_params: bool, +) -> Type<'db> { + if class_has_type_params { + // when we have a specialized scope, we'll look up the inference + // within that scope + let model = SemanticModel::new(db, class_definition.file(db)); + base_node.ty(&model) + } else { + // Otherwise, we can do the lookup based on the definition scope + definition_expression_ty(db, class_definition, base_node) } } @@ -2131,6 +2186,10 @@ mod tests { #[test_case(Ty::Tuple(vec![Ty::IntLiteral(42), Ty::StringLiteral("foo")]), Ty::Tuple(vec![Ty::BuiltinInstance("int"), Ty::BuiltinInstance("str")]))] #[test_case(Ty::Tuple(vec![Ty::BuiltinInstance("int"), Ty::StringLiteral("foo")]), Ty::Tuple(vec![Ty::BuiltinInstance("int"), Ty::BuiltinInstance("str")]))] #[test_case(Ty::Tuple(vec![Ty::IntLiteral(42), Ty::BuiltinInstance("str")]), Ty::Tuple(vec![Ty::BuiltinInstance("int"), Ty::BuiltinInstance("str")]))] + #[test_case( + Ty::BuiltinInstance("FloatingPointError"), + Ty::BuiltinInstance("Exception") + )] fn is_subtype_of(from: Ty, to: Ty) { let db = setup_db(); assert!(from.into_type(&db).is_subtype_of(&db, to.into_type(&db))); diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 43ad1ab30cb7b4..c7838114e11067 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -62,6 +62,8 @@ use crate::unpack::Unpack; use crate::util::subscript::{PyIndex, PySlice}; use crate::Db; +use super::mro::MroErrorKind; + /// Infer all types for a [`ScopeId`], including all definitions and expressions in that scope. /// Use when checking a scope, or needing to provide a type for an arbitrary expression in the /// scope. @@ -431,19 +433,83 @@ impl<'db> TypeInferenceBuilder<'db> { } if self.types.has_deferred { - let mut deferred_expression_types: FxHashMap> = - FxHashMap::default(); // invariant: only annotations and base classes are deferred, and both of these only // occur within a declaration (annotated assignment, function or class definition) for definition in self.types.declarations.keys() { if infer_definition_types(self.db, *definition).has_deferred { let deferred = infer_deferred_types(self.db, *definition); - deferred_expression_types.extend(deferred.expressions.iter()); + self.types.expressions.extend(&deferred.expressions); + self.diagnostics.extend(&deferred.diagnostics); + } + } + } + + self.check_class_definitions(); + } + + /// Iterate over all class definitions to check that Python will be able to create a + /// consistent "[method resolution order]" for each class at runtime. If not, issue a diagnostic. + /// + /// [method resolution order]: https://docs.python.org/3/glossary.html#term-method-resolution-order + fn check_class_definitions(&mut self) { + let class_definitions = self + .types + .declarations + .values() + .filter_map(|ty| ty.into_class_literal_type()); + + let invalid_mros = class_definitions.filter_map(|class| { + class + .try_mro(self.db) + .as_ref() + .err() + .map(|mro_error| (class, mro_error)) + }); + + for (class, mro_error) in invalid_mros { + match mro_error.reason() { + MroErrorKind::DuplicateBases(duplicates) => { + let base_nodes = class.node(self.db).bases(); + for (index, duplicate) in duplicates { + self.diagnostics.add( + (&base_nodes[*index]).into(), + "duplicate-base", + format_args!("Duplicate base class `{}`", duplicate.name(self.db)) + ); + } } + MroErrorKind::CyclicClassDefinition => self.diagnostics.add( + class.node(self.db).into(), + "cyclic-class-def", + format_args!( + "Cyclic definition of `{}` or bases of `{}` (class cannot inherit from itself)", + class.name(self.db), + class.name(self.db) + ) + ), + MroErrorKind::InvalidBases(bases) => { + let base_nodes = class.node(self.db).bases(); + for (index, base_ty) in bases { + self.diagnostics.add( + (&base_nodes[*index]).into(), + "invalid-base", + format_args!( + "Invalid class base with type `{}` (all bases must be a class, `Any`, `Unknown` or `Todo`)", + base_ty.display(self.db) + ) + ); + } + }, + MroErrorKind::UnresolvableMro{bases_list} => self.diagnostics.add( + class.node(self.db).into(), + "inconsistent-mro", + format_args!( + "Cannot create a consistent method resolution order (MRO) for class `{}` with bases list `[{}]`", + class.name(self.db), + bases_list.iter().map(|base| base.display(self.db)).join(", ") + ) + ) } - self.types - .expressions - .extend(deferred_expression_types.iter()); } } @@ -4154,9 +4220,7 @@ mod tests { use crate::semantic_index::definition::Definition; use crate::semantic_index::symbol::FileScopeId; use crate::semantic_index::{global_scope, semantic_index, symbol_table, use_def_map}; - use crate::types::{ - check_types, global_symbol, infer_definition_types, symbol, TypeCheckDiagnostics, - }; + use crate::types::check_types; use crate::{HasTy, ProgramSettings, SemanticModel}; use ruff_db::files::{system_path_to_file, File}; use ruff_db::parsed::parsed_module; @@ -4164,7 +4228,7 @@ mod tests { use ruff_db::testing::assert_function_query_was_not_run; use ruff_python_ast::name::Name; - use super::TypeInferenceBuilder; + use super::*; fn setup_db() -> TestDb { let db = TestDb::new(); @@ -4276,36 +4340,6 @@ mod tests { Ok(()) } - #[test] - fn resolve_base_class_by_name() -> anyhow::Result<()> { - let mut db = setup_db(); - - db.write_dedented( - "src/mod.py", - " - class Base: - pass - - class Sub(Base): - pass - ", - )?; - - let mod_file = system_path_to_file(&db, "src/mod.py").expect("file to exist"); - let ty = global_symbol(&db, mod_file, "Sub").expect_type(); - - let class = ty.expect_class_literal(); - - let base_names: Vec<_> = class - .bases(&db) - .map(|base_ty| format!("{}", base_ty.display(&db))) - .collect(); - - assert_eq!(base_names, vec!["Literal[Base]"]); - - Ok(()) - } - #[test] fn resolve_method() -> anyhow::Result<()> { let mut db = setup_db(); @@ -4534,13 +4568,13 @@ mod tests { let a = system_path_to_file(&db, "src/a.py").expect("file to exist"); let c_ty = global_symbol(&db, a, "C").expect_type(); let c_class = c_ty.expect_class_literal(); - let mut c_bases = c_class.bases(&db); - let b_ty = c_bases.next().unwrap(); - let b_class = b_ty.expect_class_literal(); + let mut c_mro = c_class.iter_mro(&db); + let b_ty = c_mro.nth(1).unwrap(); + let b_class = b_ty.expect_class(); assert_eq!(b_class.name(&db), "B"); - let mut b_bases = b_class.bases(&db); - let a_ty = b_bases.next().unwrap(); - let a_class = a_ty.expect_class_literal(); + let mut b_mro = b_class.iter_mro(&db); + let a_ty = b_mro.nth(1).unwrap(); + let a_class = a_ty.expect_class(); assert_eq!(a_class.name(&db), "A"); Ok(()) @@ -4689,15 +4723,8 @@ mod tests { db.write_file("/src/a.pyi", "class C(object): pass")?; let file = system_path_to_file(&db, "/src/a.pyi").unwrap(); let ty = global_symbol(&db, file, "C").expect_type(); - - let base = ty - .expect_class_literal() - .bases(&db) - .next() - .expect("there should be at least one base"); - - assert_eq!(base.display(&db).to_string(), "Literal[object]"); - + let base = ty.expect_class_literal().iter_mro(&db).nth(1).unwrap(); + assert_eq!(base.display(&db).to_string(), ""); Ok(()) } diff --git a/crates/red_knot_python_semantic/src/types/mro.rs b/crates/red_knot_python_semantic/src/types/mro.rs new file mode 100644 index 00000000000000..ad8d991d66f52b --- /dev/null +++ b/crates/red_knot_python_semantic/src/types/mro.rs @@ -0,0 +1,518 @@ +use std::collections::VecDeque; +use std::ops::Deref; + +use indexmap::IndexSet; +use itertools::Either; +use rustc_hash::FxHashSet; + +use ruff_python_ast as ast; + +use super::{infer_class_base_type, ClassType, KnownClass, Type}; +use crate::semantic_index::definition::Definition; +use crate::Db; + +/// The inferred method resolution order of a given class. +/// +/// See [`ClassType::iter_mro`] for more details. +#[derive(PartialEq, Eq, Clone, Debug)] +pub(super) struct Mro<'db>(Box<[ClassBase<'db>]>); + +impl<'db> Mro<'db> { + /// Attempt to resolve the MRO of a given class + /// + /// In the event that a possible list of bases would (or could) lead to a + /// `TypeError` being raised at runtime due to an unresolvable MRO, we infer + /// the MRO of the class as being `[, Unknown, object]`. + /// This seems most likely to reduce the possibility of cascading errors + /// elsewhere. + /// + /// (We emit a diagnostic warning about the runtime `TypeError` in + /// [`super::infer::TypeInferenceBuilder::infer_region_scope`].) + pub(super) fn of_class(db: &'db dyn Db, class: ClassType<'db>) -> Result> { + Self::of_class_impl(db, class).map_err(|error_kind| { + let fallback_mro = Self::from([ + ClassBase::Class(class), + ClassBase::Unknown, + ClassBase::object(db), + ]); + MroError { + kind: error_kind, + fallback_mro, + } + }) + } + + fn of_class_impl(db: &'db dyn Db, class: ClassType<'db>) -> Result> { + let class_stmt_node = class.node(db); + let class_bases = class_stmt_node.bases(); + + match class_bases { + // `builtins.object` is the special case: + // the only class in Python that has an MRO with length <2 + [] if class.is_known(db, KnownClass::Object) => { + Ok(Self::from([ClassBase::Class(class)])) + } + + // All other classes in Python have an MRO with length >=2. + // Even if a class has no explicit base classes, + // it will implicitly inherit from `object` at runtime; + // `object` will appear in the class's `__bases__` list and `__mro__`: + // + // ```pycon + // >>> class Foo: ... + // ... + // >>> Foo.__bases__ + // (,) + // >>> Foo.__mro__ + // (, ) + // ``` + [] => Ok(Self::from([ClassBase::Class(class), ClassBase::object(db)])), + + // Fast path for a class that has only a single explicit base. + // + // This *could* theoretically be handled by the final branch below, + // but it's a common case (i.e., worth optimizing for), + // and the `c3_merge` function requires lots of allocations. + [single_base_node] => { + let single_base = ClassBase::try_from_node( + db, + single_base_node, + class.definition(db), + class_stmt_node.type_params.is_some(), + ); + single_base.map_or_else( + |invalid_base_ty| { + let bases_info = Box::from([(0, invalid_base_ty)]); + Err(MroErrorKind::InvalidBases(bases_info)) + }, + |single_base| { + if let ClassBase::Class(class_base) = single_base { + if class_is_cyclically_defined(db, class_base) { + return Err(MroErrorKind::CyclicClassDefinition); + } + } + let mro = std::iter::once(ClassBase::Class(class)) + .chain(single_base.mro(db)) + .collect(); + Ok(mro) + }, + ) + } + + // The class has multiple explicit bases. + // + // We'll fallback to a full implementation of the C3-merge algorithm to determine + // what MRO Python will give this class at runtime + // (if an MRO is indeed resolvable at all!) + multiple_bases => { + if class_is_cyclically_defined(db, class) { + return Err(MroErrorKind::CyclicClassDefinition); + } + + let definition = class.definition(db); + let has_type_params = class_stmt_node.type_params.is_some(); + let mut valid_bases = vec![]; + let mut invalid_bases = vec![]; + + for (i, base_node) in multiple_bases.iter().enumerate() { + match ClassBase::try_from_node(db, base_node, definition, has_type_params) { + Ok(valid_base) => valid_bases.push(valid_base), + Err(invalid_base) => invalid_bases.push((i, invalid_base)), + } + } + + if !invalid_bases.is_empty() { + return Err(MroErrorKind::InvalidBases(invalid_bases.into_boxed_slice())); + } + + let mut seqs = vec![VecDeque::from([ClassBase::Class(class)])]; + for base in &valid_bases { + seqs.push(base.mro(db).collect()); + } + seqs.push(valid_bases.iter().copied().collect()); + + c3_merge(seqs).ok_or_else(|| { + let mut seen_bases = FxHashSet::default(); + let mut duplicate_bases = vec![]; + for (index, base) in valid_bases + .iter() + .enumerate() + .filter_map(|(index, base)| Some((index, base.into_class_literal_type()?))) + { + if !seen_bases.insert(base) { + duplicate_bases.push((index, base)); + } + } + + if duplicate_bases.is_empty() { + MroErrorKind::UnresolvableMro { + bases_list: valid_bases.into_boxed_slice(), + } + } else { + MroErrorKind::DuplicateBases(duplicate_bases.into_boxed_slice()) + } + }) + } + } + } +} + +impl<'db, const N: usize> From<[ClassBase<'db>; N]> for Mro<'db> { + fn from(value: [ClassBase<'db>; N]) -> Self { + Self(Box::from(value)) + } +} + +impl<'db> From>> for Mro<'db> { + fn from(value: Vec>) -> Self { + Self(value.into_boxed_slice()) + } +} + +impl<'db> Deref for Mro<'db> { + type Target = [ClassBase<'db>]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'db> FromIterator> for Mro<'db> { + fn from_iter>>(iter: T) -> Self { + Self(iter.into_iter().collect()) + } +} + +/// Iterator that yields elements of a class's MRO. +/// +/// We avoid materialising the *full* MRO unless it is actually necessary: +/// - Materialising the full MRO is expensive +/// - We need to do it for every class in the code that we're checking, as we need to make sure +/// that there are no class definitions in the code we're checking that would cause an +/// exception to be raised at runtime. But the same does *not* necessarily apply for every class +/// in third-party and stdlib dependencies: we never emit diagnostics about non-first-party code. +/// - However, we *do* need to resolve attribute accesses on classes/instances from +/// third-party and stdlib dependencies. That requires iterating over the MRO of third-party/stdlib +/// classes, but not necessarily the *whole* MRO: often just the first element is enough. +/// Luckily we know that for any class `X`, the first element of `X`'s MRO will always be `X` itself. +/// We can therefore avoid resolving the full MRO for many third-party/stdlib classes while still +/// being faithful to the runtime semantics. +/// +/// Even for first-party code, where we will have to resolve the MRO for every class we encounter, +/// loading the cached MRO comes with a certain amount of overhead, so it's best to avoid calling the +/// Salsa-tracked [`ClassType::try_mro`] method unless it's absolutely necessary. +pub(super) struct MroIterator<'db> { + db: &'db dyn Db, + + /// The class whose MRO we're iterating over + class: ClassType<'db>, + + /// Whether or not we've already yielded the first element of the MRO + first_element_yielded: bool, + + /// Iterator over all elements of the MRO except the first. + /// + /// The full MRO is expensive to materialize, so this field is `None` + /// unless we actually *need* to iterate past the first element of the MRO, + /// at which point it is lazily materialized. + subsequent_elements: Option>>, +} + +impl<'db> MroIterator<'db> { + pub(super) fn new(db: &'db dyn Db, class: ClassType<'db>) -> Self { + Self { + db, + class, + first_element_yielded: false, + subsequent_elements: None, + } + } + + /// Materialize the full MRO of the class. + /// Return an iterator over that MRO which skips the first element of the MRO. + fn full_mro_except_first_element(&mut self) -> impl Iterator> + '_ { + self.subsequent_elements + .get_or_insert_with(|| { + let mut full_mro_iter = match self.class.try_mro(self.db) { + Ok(mro) => mro.iter(), + Err(error) => error.fallback_mro().iter(), + }; + full_mro_iter.next(); + full_mro_iter + }) + .copied() + } +} + +impl<'db> Iterator for MroIterator<'db> { + type Item = ClassBase<'db>; + + fn next(&mut self) -> Option { + if !self.first_element_yielded { + self.first_element_yielded = true; + return Some(ClassBase::Class(self.class)); + } + self.full_mro_except_first_element().next() + } +} + +impl std::iter::FusedIterator for MroIterator<'_> {} + +#[derive(Debug, PartialEq, Eq)] +pub(super) struct MroError<'db> { + kind: MroErrorKind<'db>, + fallback_mro: Mro<'db>, +} + +impl<'db> MroError<'db> { + /// Return an [`MroErrorKind`] variant describing why we could not resolve the MRO for this class. + pub(super) fn reason(&self) -> &MroErrorKind<'db> { + &self.kind + } + + /// Return the fallback MRO we should infer for this class during type inference + /// (since accurate resolution of its "true" MRO was impossible) + pub(super) fn fallback_mro(&self) -> &Mro<'db> { + &self.fallback_mro + } +} + +/// Possible ways in which attempting to resolve the MRO of a class might fail. +#[derive(Debug, PartialEq, Eq)] +pub(super) enum MroErrorKind<'db> { + /// The class inherits from one or more invalid bases. + /// + /// To avoid excessive complexity in our implementation, + /// we only permit classes to inherit from class-literal types, + /// `Todo`, `Unknown` or `Any`. Anything else results in us + /// emitting a diagnostic. + /// + /// This variant records the indices and types of class bases + /// that we deem to be invalid. The indices are the indices of nodes + /// in the bases list of the class's [`ast::StmtClassDef`] node. + /// Each index is the index of a node representing an invalid base. + InvalidBases(Box<[(usize, Type<'db>)]>), + + /// The class inherits from itself! + /// + /// This is very unlikely to happen in working real-world code, + /// but it's important to explicitly account for it. + /// If we don't, there's a possibility of an infinite loop and a panic. + CyclicClassDefinition, + + /// The class has one or more duplicate bases. + /// + /// This variant records the indices and [`ClassType`]s + /// of the duplicate bases. The indices are the indices of nodes + /// in the bases list of the class's [`ast::StmtClassDef`] node. + /// Each index is the index of a node representing a duplicate base. + DuplicateBases(Box<[(usize, ClassType<'db>)]>), + + /// The MRO is otherwise unresolvable through the C3-merge algorithm. + /// + /// See [`c3_merge`] for more details. + UnresolvableMro { bases_list: Box<[ClassBase<'db>]> }, +} + +/// Enumeration of the possible kinds of types we allow in class bases. +/// +/// This is much more limited than the [`Type`] enum: +/// all types that would be invalid to have as a class base are +/// transformed into [`ClassBase::Unknown`] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub(super) enum ClassBase<'db> { + Any, + Unknown, + Todo, + Class(ClassType<'db>), +} + +impl<'db> ClassBase<'db> { + pub(super) fn display(self, db: &'db dyn Db) -> impl std::fmt::Display + 'db { + struct Display<'db> { + base: ClassBase<'db>, + db: &'db dyn Db, + } + + impl std::fmt::Display for Display<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.base { + ClassBase::Any => f.write_str("Any"), + ClassBase::Todo => f.write_str("Todo"), + ClassBase::Unknown => f.write_str("Unknown"), + ClassBase::Class(class) => write!(f, "", class.name(self.db)), + } + } + } + + Display { base: self, db } + } + + #[cfg(test)] + #[track_caller] + pub(super) fn expect_class(self) -> ClassType<'db> { + match self { + ClassBase::Class(class) => class, + _ => panic!("Expected a `ClassBase::Class()` variant"), + } + } + + /// Return a `ClassBase` representing the class `builtins.object` + fn object(db: &'db dyn Db) -> Self { + KnownClass::Object + .to_class(db) + .into_class_literal_type() + .map_or(Self::Unknown, Self::Class) + } + + /// Attempt to resolve the node `base_node` into a `ClassBase`. + /// + /// If the inferred type of `base_node` is not an acceptable class-base type, + /// return an error indicating what the inferred type was. + fn try_from_node( + db: &'db dyn Db, + base_node: &'db ast::Expr, + class_definition: Definition<'db>, + class_has_type_params: bool, + ) -> Result> { + let base_ty = infer_class_base_type(db, base_node, class_definition, class_has_type_params); + Self::try_from_ty(base_ty).ok_or(base_ty) + } + + /// Attempt to resolve `ty` into a `ClassBase`. + /// + /// Return `None` if `ty` is not an acceptable type for a class base. + fn try_from_ty(ty: Type<'db>) -> Option { + match ty { + Type::Any => Some(Self::Any), + Type::Unknown => Some(Self::Unknown), + Type::Todo => Some(Self::Todo), + Type::ClassLiteral(class) => Some(Self::Class(class)), + Type::Union(_) => None, // TODO -- forces consideration of multiple possible MROs? + Type::Intersection(_) => None, // TODO -- probably incorrect? + Type::Instance(_) => None, // TODO -- handle `__mro_entries__`? + Type::Never + | Type::BooleanLiteral(_) + | Type::FunctionLiteral(_) + | Type::BytesLiteral(_) + | Type::IntLiteral(_) + | Type::StringLiteral(_) + | Type::LiteralString + | Type::Tuple(_) + | Type::SliceLiteral(_) + | Type::ModuleLiteral(_) => None, + } + } + + fn into_class_literal_type(self) -> Option> { + match self { + Self::Class(class) => Some(class), + _ => None, + } + } + + /// Iterate over the MRO of this base + fn mro( + self, + db: &'db dyn Db, + ) -> Either>, impl Iterator>> { + match self { + ClassBase::Any => Either::Left([ClassBase::Any, ClassBase::object(db)].into_iter()), + ClassBase::Unknown => { + Either::Left([ClassBase::Unknown, ClassBase::object(db)].into_iter()) + } + ClassBase::Todo => Either::Left([ClassBase::Todo, ClassBase::object(db)].into_iter()), + ClassBase::Class(class) => Either::Right(class.iter_mro(db)), + } + } +} + +impl<'db> From> for Type<'db> { + fn from(value: ClassBase<'db>) -> Self { + match value { + ClassBase::Any => Type::Any, + ClassBase::Todo => Type::Todo, + ClassBase::Unknown => Type::Unknown, + ClassBase::Class(class) => Type::ClassLiteral(class), + } + } +} + +/// Implementation of the [C3-merge algorithm] for calculating a Python class's +/// [method resolution order]. +/// +/// [C3-merge algorithm]: https://docs.python.org/3/howto/mro.html#python-2-3-mro +/// [method resolution order]: https://docs.python.org/3/glossary.html#term-method-resolution-order +fn c3_merge(mut sequences: Vec>) -> Option { + // Most MROs aren't that long... + let mut mro = Vec::with_capacity(8); + + loop { + sequences.retain(|sequence| !sequence.is_empty()); + + if sequences.is_empty() { + return Some(Mro::from(mro)); + } + + // If the candidate exists "deeper down" in the inheritance hierarchy, + // we should refrain from adding it to the MRO for now. Add the first candidate + // for which this does not hold true. If this holds true for all candidates, + // return `None`; it will be impossible to find a consistent MRO for the class + // with the given bases. + let mro_entry = sequences.iter().find_map(|outer_sequence| { + let candidate = outer_sequence[0]; + + let not_head = sequences + .iter() + .all(|sequence| sequence.iter().skip(1).all(|base| base != &candidate)); + + not_head.then_some(candidate) + })?; + + mro.push(mro_entry); + + // Make sure we don't try to add the candidate to the MRO twice: + for sequence in &mut sequences { + if sequence[0] == mro_entry { + sequence.pop_front(); + } + } + } +} + +/// Return `true` if this class appears to be a cyclic definition, +/// i.e., it inherits either directly or indirectly from itself. +/// +/// A class definition like this will fail at runtime, +/// but we must be resilient to it or we could panic. +fn class_is_cyclically_defined(db: &dyn Db, class: ClassType) -> bool { + fn is_cyclically_defined_recursive<'db>( + db: &'db dyn Db, + class: ClassType<'db>, + classes_to_watch: &mut IndexSet>, + ) -> bool { + if !classes_to_watch.insert(class) { + return true; + } + for explicit_base_class in class + .explicit_bases(db) + .filter_map(Type::into_class_literal_type) + { + // Each base must be considered in isolation. + // This is due to the fact that if a class uses multiple inheritance, + // there could easily be a situation where two bases have the same class in their MROs; + // that isn't enough to constitute the class being cyclically defined. + let classes_to_watch_len = classes_to_watch.len(); + if is_cyclically_defined_recursive(db, explicit_base_class, classes_to_watch) { + return true; + } + classes_to_watch.truncate(classes_to_watch_len); + } + false + } + + class + .explicit_bases(db) + .filter_map(Type::into_class_literal_type) + .any(|base_class| is_cyclically_defined_recursive(db, base_class, &mut IndexSet::default())) +} diff --git a/crates/ruff_benchmark/benches/red_knot.rs b/crates/ruff_benchmark/benches/red_knot.rs index f7d40a68c805cc..cc1c4f90284220 100644 --- a/crates/ruff_benchmark/benches/red_knot.rs +++ b/crates/ruff_benchmark/benches/red_knot.rs @@ -27,6 +27,7 @@ static EXPECTED_DIAGNOSTICS: &[&str] = &[ "/src/tomllib/_parser.py:7:29: Module `collections.abc` has no member `Iterable`", // We don't support terminal statements in control flow yet: "/src/tomllib/_parser.py:246:15: Method `__class_getitem__` of type `Literal[frozenset]` is possibly unbound", + "/src/tomllib/_parser.py:692:8354: Invalid class base with type `GenericAlias` (all bases must be a class, `Any`, `Unknown` or `Todo`)", "/src/tomllib/_parser.py:66:18: Name `s` used when possibly not defined", "/src/tomllib/_parser.py:98:12: Name `char` used when possibly not defined", "/src/tomllib/_parser.py:101:12: Name `char` used when possibly not defined",