From 9df55cc6c507ea5d78f88166664ab1325d1fd30d Mon Sep 17 00:00:00 2001
From: Jesse Grabowski <48652735+jessegrabowski@users.noreply.github.com>
Date: Mon, 11 Dec 2023 22:28:06 +0100
Subject: [PATCH] Update aliases to reflect package name (#547)

* Change `import pytensor.tensor as at` to `as pt` everywhere in the docs

* Change `import pytensor.tensor as at` to `as pt` everywhere

Change `import pytensor.scalar as aes` to `as ps` everywhere

Change `import pytensor.tensor.random as aer` to `as ptr` everywhere

Change test variables with `_at` suffix or `at_` prefix to `_pt` and `pt_`, respectively

* More renaming

* Rename remaining instances of `aes` and `aer`
---
 doc/extending/creating_a_numba_jax_op.rst     |   6 +-
 doc/extending/creating_an_op.rst              |  10 +-
 .../extending_pytensor_solution_1.py          |  30 +-
 doc/extending/graph_rewriting.rst             |  28 +-
 doc/extending/graphstructures.rst             |   6 +-
 doc/extending/tips.rst                        |   4 +-
 doc/extending/unittest.rst                    |  12 +-
 doc/glossary.rst                              |   6 +-
 doc/introduction.rst                          |   6 +-
 doc/library/compile/debugmode.rst             |   4 +-
 doc/library/compile/io.rst                    |  20 +-
 doc/library/compile/nanguardmode.rst          |   6 +-
 doc/library/d3viz/index.ipynb                 |  38 +-
 doc/library/d3viz/index.rst                   |  38 +-
 doc/library/printing.rst                      |  10 +-
 doc/library/scan.rst                          |  78 ++--
 doc/library/tensor/basic.rst                  | 100 ++---
 doc/library/tensor/fft.rst                    |   4 +-
 doc/sandbox/logistic_regression_example.rst   |  20 +-
 doc/tutorial/adding.rst                       |  28 +-
 doc/tutorial/aliasing.rst                     |   4 +-
 doc/tutorial/broadcasting.rst                 |   8 +-
 doc/tutorial/conditions.rst                   |  10 +-
 doc/tutorial/debug_faq.rst                    |  28 +-
 doc/tutorial/examples.rst                     |  34 +-
 doc/tutorial/gradients.rst                    |  80 ++--
 doc/tutorial/index.rst                        |   2 +-
 doc/tutorial/loop.rst                         | 104 ++---
 doc/tutorial/loop_solution_1.py               |  22 +-
 doc/tutorial/modes.rst                        |  14 +-
 doc/tutorial/modes_solution_1.py              |  14 +-
 doc/tutorial/printing_drawing.rst             |  12 +-
 pytensor/breakpoint.py                        |   8 +-
 pytensor/compile/builders.py                  |  16 +-
 pytensor/compile/profiling.py                 | 104 ++---
 pytensor/graph/basic.py                       |  12 +-
 pytensor/graph/rewriting/kanren.py            |   8 +-
 pytensor/ifelse.py                            |  26 +-
 pytensor/link/jax/dispatch/random.py          |  62 +--
 pytensor/link/jax/dispatch/shape.py           |   6 +-
 pytensor/link/jax/dispatch/subtensor.py       |  12 +-
 pytensor/link/jax/dispatch/tensor_basic.py    |   4 +-
 pytensor/link/numba/dispatch/random.py        |  58 +--
 pytensor/raise_op.py                          |  10 +-
 pytensor/scan/basic.py                        |  26 +-
 pytensor/scan/checkpoints.py                  |  10 +-
 pytensor/scan/op.py                           |  34 +-
 pytensor/scan/rewriting.py                    |  42 +-
 pytensor/scan/utils.py                        |  12 +-
 pytensor/sparse/basic.py                      | 126 +++---
 pytensor/sparse/rewriting.py                  |  20 +-
 pytensor/sparse/sandbox/sp.py                 |  34 +-
 pytensor/sparse/sandbox/sp2.py                |  10 +-
 pytensor/sparse/type.py                       |   4 +-
 pytensor/tensor/basic.py                      |  94 ++---
 pytensor/tensor/blas.py                       |  40 +-
 pytensor/tensor/conv/abstract_conv.py         |  36 +-
 pytensor/tensor/elemwise.py                   |   4 +-
 pytensor/tensor/extra_ops.py                  | 116 +++---
 pytensor/tensor/math.py                       |  30 +-
 pytensor/tensor/nlinalg.py                    |  52 +--
 pytensor/tensor/rewriting/basic.py            |  28 +-
 pytensor/tensor/rewriting/blas.py             |  30 +-
 pytensor/tensor/rewriting/blas_c.py           |   6 +-
 pytensor/tensor/rewriting/elemwise.py         |  44 +-
 pytensor/tensor/rewriting/extra_ops.py        |   4 +-
 pytensor/tensor/rewriting/jax.py              |   8 +-
 pytensor/tensor/rewriting/math.py             | 172 ++++----
 pytensor/tensor/rewriting/special.py          |   4 +-
 pytensor/tensor/rewriting/subtensor.py        |  16 +-
 pytensor/tensor/rewriting/uncanonicalize.py   |   4 +-
 pytensor/tensor/shape.py                      |  38 +-
 pytensor/tensor/slinalg.py                    |  28 +-
 pytensor/tensor/subtensor.py                  |  42 +-
 pytensor/tensor/type.py                       |  34 +-
 pytensor/tensor/variable.py                   | 248 ++++++------
 pytensor/tensor/xlogx.py                      |  18 +-
 pytensor/typed_list/basic.py                  |   8 +-
 tests/compile/function/test_pfunc.py          |  12 +-
 tests/compile/function/test_types.py          |  10 +-
 tests/compile/test_builders.py                |  38 +-
 tests/compile/test_debugmode.py               |  16 +-
 tests/compile/test_misc.py                    |   4 +-
 tests/compile/test_nanguardmode.py            |   4 +-
 tests/compile/test_profiling.py               |   6 +-
 tests/d3viz/models.py                         |  10 +-
 tests/graph/rewriting/test_kanren.py          |  60 +--
 tests/graph/rewriting/test_unify.py           | 138 +++----
 tests/graph/test_basic.py                     |  28 +-
 tests/graph/test_compute_test_value.py        |  24 +-
 tests/graph/test_op.py                        |  12 +-
 tests/link/c/test_cmodule.py                  |   4 +-
 tests/link/c/test_op.py                       |  16 +-
 tests/link/c/test_params_type.py              |   6 +-
 tests/link/c/test_type.py                     |  10 +-
 tests/link/jax/test_elemwise.py               |  72 ++--
 tests/link/jax/test_extra_ops.py              |  34 +-
 tests/link/jax/test_nlinalg.py                |  28 +-
 tests/link/jax/test_random.py                 | 196 ++++-----
 tests/link/jax/test_scalar.py                 |  44 +-
 tests/link/jax/test_scan.py                   |  48 +--
 tests/link/jax/test_shape.py                  |  34 +-
 tests/link/jax/test_slinalg.py                |  28 +-
 tests/link/jax/test_subtensor.py              | 222 +++++-----
 tests/link/jax/test_tensor_basic.py           |  46 +--
 tests/link/numba/test_basic.py                | 286 ++++++-------
 tests/link/numba/test_elemwise.py             | 176 ++++----
 tests/link/numba/test_extra_ops.py            | 122 +++---
 tests/link/numba/test_nlinalg.py              |  58 +--
 tests/link/numba/test_random.py               | 206 +++++-----
 tests/link/numba/test_scalar.py               |  48 +--
 tests/link/numba/test_scan.py                 |  90 ++---
 tests/link/numba/test_tensor_basic.py         | 126 +++---
 tests/scalar/test_basic.py                    |   4 +-
 tests/scalar/test_math.py                     |  32 +-
 tests/scan/test_basic.py                      | 146 +++----
 tests/scan/test_printing.py                   |  24 +-
 tests/scan/test_rewriting.py                  |  52 +--
 tests/scan/test_utils.py                      |  56 +--
 tests/scan/test_views.py                      |  18 +-
 tests/sparse/test_basic.py                    |  56 +--
 tests/sparse/test_rewriting.py                |   4 +-
 tests/sparse/test_var.py                      |  22 +-
 tests/tensor/conv/test_abstract_conv.py       |  40 +-
 tests/tensor/random/rewriting/test_basic.py   |  46 +--
 tests/tensor/random/test_basic.py             | 112 +++---
 tests/tensor/random/test_op.py                |  18 +-
 tests/tensor/rewriting/test_basic.py          | 252 ++++++------
 tests/tensor/rewriting/test_elemwise.py       | 116 +++---
 tests/tensor/rewriting/test_extra_ops.py      |   6 +-
 tests/tensor/rewriting/test_linalg.py         |   8 +-
 tests/tensor/rewriting/test_math.py           | 378 +++++++++---------
 tests/tensor/rewriting/test_shape.py          |  18 +-
 tests/tensor/rewriting/test_subtensor.py      | 142 +++----
 tests/tensor/rewriting/test_uncanonicalize.py |  36 +-
 tests/tensor/test_basic.py                    | 258 ++++++------
 tests/tensor/test_blas.py                     |  44 +-
 tests/tensor/test_blas_c.py                   |  32 +-
 tests/tensor/test_blas_scipy.py               |   4 +-
 tests/tensor/test_complex.py                  |  14 +-
 tests/tensor/test_elemwise.py                 | 130 +++---
 tests/tensor/test_extra_ops.py                |  56 +--
 tests/tensor/test_keepdims.py                 |  20 +-
 tests/tensor/test_math.py                     |  56 +--
 tests/tensor/test_math_scipy.py               | 110 ++---
 tests/tensor/test_merge.py                    |  10 +-
 tests/tensor/test_sharedvar.py                |   4 +-
 tests/tensor/test_subtensor.py                | 116 +++---
 tests/tensor/test_type.py                     |   4 +-
 tests/tensor/test_utils.py                    |   4 +-
 tests/test_gradient.py                        |  22 +-
 tests/test_ifelse.py                          |  12 +-
 tests/test_raise_op.py                        |  20 +-
 tests/test_rop.py                             |  24 +-
 tests/typed_list/test_rewriting.py            |   4 +-
 tests/unittest_tools.py                       |   4 +-
 156 files changed, 3663 insertions(+), 3667 deletions(-)

diff --git a/doc/extending/creating_a_numba_jax_op.rst b/doc/extending/creating_a_numba_jax_op.rst
index f6e50556bf..abf3f528bf 100644
--- a/doc/extending/creating_a_numba_jax_op.rst
+++ b/doc/extending/creating_a_numba_jax_op.rst
@@ -135,16 +135,16 @@ Here's a small example of a test for :class:`Eye`:
 
 .. code:: python
 
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
 
    def test_jax_Eye():
        """Test JAX conversion of the `Eye` `Op`."""
 
        # Create a symbolic input for `Eye`
-       x_at = at.scalar()
+       x_at = pt.scalar()
 
        # Create a variable that is the output of an `Eye` `Op`
-       eye_var = at.eye(x_at)
+       eye_var = pt.eye(x_at)
 
        # Create an PyTensor `FunctionGraph`
        out_fg = FunctionGraph(outputs=[eye_var])
diff --git a/doc/extending/creating_an_op.rst b/doc/extending/creating_an_op.rst
index 47745fcaf1..746342ad4a 100644
--- a/doc/extending/creating_an_op.rst
+++ b/doc/extending/creating_an_op.rst
@@ -786,7 +786,7 @@ signature:
 .. testcode:: asop
 
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
     import numpy as np
     from pytensor import function
     from pytensor.compile.ops import as_op
@@ -797,8 +797,8 @@ signature:
         return [ashp[:-1] + bshp[-1:]]
 
 
-    @as_op(itypes=[at.matrix, at.matrix],
-           otypes=[at.matrix], infer_shape=infer_shape_numpy_dot)
+    @as_op(itypes=[pt.matrix, pt.matrix],
+           otypes=[pt.matrix], infer_shape=infer_shape_numpy_dot)
     def numpy_dot(a, b):
        return np.dot(a, b)
 
@@ -806,8 +806,8 @@ You can try it as follows:
 
 .. testcode:: asop
 
-    x = at.matrix()
-    y = at.matrix()
+    x = pt.matrix()
+    y = pt.matrix()
     f = function([x, y], numpy_dot(x, y))
     inp1 = np.random.random_sample((5, 4))
     inp2 = np.random.random_sample((4, 7))
diff --git a/doc/extending/extending_pytensor_solution_1.py b/doc/extending/extending_pytensor_solution_1.py
index d8bb773134..45329c73d6 100644
--- a/doc/extending/extending_pytensor_solution_1.py
+++ b/doc/extending/extending_pytensor_solution_1.py
@@ -14,8 +14,8 @@
 
 class ProdOp(Op):
     def make_node(self, x, y):
-        x = at.as_tensor_variable(x)
-        y = at.as_tensor_variable(y)
+        x = pt.as_tensor_variable(x)
+        y = pt.as_tensor_variable(y)
         outdim = x.type.ndim
         output = TensorType(
             dtype=pytensor.scalar.upcast(x.dtype, y.dtype), shape=(None,) * outdim
@@ -39,8 +39,8 @@ def grad(self, inputs, output_grads):
 
 class SumDiffOp(Op):
     def make_node(self, x, y):
-        x = at.as_tensor_variable(x)
-        y = at.as_tensor_variable(y)
+        x = pt.as_tensor_variable(x)
+        y = pt.as_tensor_variable(y)
         outdim = x.type.ndim
         output1 = TensorType(
             dtype=pytensor.scalar.upcast(x.dtype, y.dtype), shape=(None,) * outdim
@@ -62,20 +62,16 @@ def infer_shape(self, fgraph, node, i0_shapes):
     def grad(self, inputs, output_grads):
         og1, og2 = output_grads
         if og1 is None:
-            og1 = at.zeros_like(og2)
+            og1 = pt.zeros_like(og2)
         if og2 is None:
-            og2 = at.zeros_like(og1)
+            og2 = pt.zeros_like(og1)
         return [og1 + og2, og1 - og2]
 
 
 # 3. Testing apparatus
-
-import numpy as np
-
 from tests import unittest_tools as utt
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.graph.basic import Apply
-from pytensor.graph.op import Op
 from pytensor.tensor.type import dmatrix, matrix
 
 
@@ -182,8 +178,8 @@ def infer_shape_numpy_dot(fgraph, node, input_shapes):
 
 
 @as_op(
-    itypes=[at.fmatrix, at.fmatrix],
-    otypes=[at.fmatrix],
+    itypes=[pt.fmatrix, pt.fmatrix],
+    otypes=[pt.fmatrix],
     infer_shape=infer_shape_numpy_dot,
 )
 def numpy_add(a, b):
@@ -197,8 +193,8 @@ def infer_shape_numpy_add_sub(fgraph, node, input_shapes):
 
 
 @as_op(
-    itypes=[at.fmatrix, at.fmatrix],
-    otypes=[at.fmatrix],
+    itypes=[pt.fmatrix, pt.fmatrix],
+    otypes=[pt.fmatrix],
     infer_shape=infer_shape_numpy_add_sub,
 )
 def numpy_add(a, b):
@@ -206,8 +202,8 @@ def numpy_add(a, b):
 
 
 @as_op(
-    itypes=[at.fmatrix, at.fmatrix],
-    otypes=[at.fmatrix],
+    itypes=[pt.fmatrix, pt.fmatrix],
+    otypes=[pt.fmatrix],
     infer_shape=infer_shape_numpy_add_sub,
 )
 def numpy_sub(a, b):
diff --git a/doc/extending/graph_rewriting.rst b/doc/extending/graph_rewriting.rst
index acf2c4f3d8..2112d5f276 100644
--- a/doc/extending/graph_rewriting.rst
+++ b/doc/extending/graph_rewriting.rst
@@ -443,7 +443,7 @@ The following is an example that distributes dot products across additions.
 .. code::
 
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
     from pytensor.graph.rewriting.kanren import KanrenRelationSub
     from pytensor.graph.rewriting.basic import EquilibriumGraphRewriter
     from pytensor.graph.rewriting.utils import rewrite_graph
@@ -462,7 +462,7 @@ The following is an example that distributes dot products across additions.
     )
 
     # Tell `kanren` that `add` is associative
-    fact(associative, at.add)
+    fact(associative, pt.add)
 
 
     def dot_distributeo(in_lv, out_lv):
@@ -473,13 +473,13 @@ The following is an example that distributes dot products across additions.
             # Make sure the input is a `_dot`
             eq(in_lv, etuple(_dot, A_lv, add_term_lv)),
             # Make sure the term being `_dot`ed is an `add`
-            heado(at.add, add_term_lv),
+            heado(pt.add, add_term_lv),
             # Flatten the associative pairings of `add` operations
             assoc_flatten(add_term_lv, add_flat_lv),
             # Get the flattened `add` arguments
             tailo(add_cdr_lv, add_flat_lv),
             # Add all the `_dot`ed arguments and set the output
-            conso(at.add, dot_cdr_lv, out_lv),
+            conso(pt.add, dot_cdr_lv, out_lv),
             # Apply the `_dot` to all the flattened `add` arguments
             mapo(lambda x, y: conso(_dot, etuple(A_lv, x), y), add_cdr_lv, dot_cdr_lv),
         )
@@ -490,10 +490,10 @@ The following is an example that distributes dot products across additions.
 
 Below, we apply `dot_distribute_rewrite` to a few example graphs.  First we create simple test graph:
 
->>> x_at = at.vector("x")
->>> y_at = at.vector("y")
->>> A_at = at.matrix("A")
->>> test_at = A_at.dot(x_at + y_at)
+>>> x_at = pt.vector("x")
+>>> y_at = pt.vector("y")
+>>> A_at = pt.matrix("A")
+>>> test_at = A_pt.dot(x_at + y_at)
 >>> print(pytensor.pprint(test_at))
 (A @ (x + y))
 
@@ -506,18 +506,18 @@ Next we apply the rewrite to the graph:
 We see that the dot product has been distributed, as desired.  Now, let's try a
 few more test cases:
 
->>> z_at = at.vector("z")
->>> w_at = at.vector("w")
->>> test_at = A_at.dot((x_at + y_at) + (z_at + w_at))
+>>> z_at = pt.vector("z")
+>>> w_at = pt.vector("w")
+>>> test_at = A_pt.dot((x_at + y_at) + (z_at + w_at))
 >>> print(pytensor.pprint(test_at))
 (A @ ((x + y) + (z + w)))
 >>> res = rewrite_graph(test_at, include=[], custom_rewrite=dot_distribute_rewrite, clone=False)
 >>> print(pytensor.pprint(res))
 (((A @ x) + (A @ y)) + ((A @ z) + (A @ w)))
 
->>> B_at = at.matrix("B")
->>> w_at = at.vector("w")
->>> test_at = A_at.dot(x_at + (y_at + B_at.dot(z_at + w_at)))
+>>> B_at = pt.matrix("B")
+>>> w_at = pt.vector("w")
+>>> test_at = A_pt.dot(x_at + (y_at + B_pt.dot(z_at + w_at)))
 >>> print(pytensor.pprint(test_at))
 (A @ (x + (y + ((B @ z) + (B @ w)))))
 >>> res = rewrite_graph(test_at, include=[], custom_rewrite=dot_distribute_rewrite, clone=False)
diff --git a/doc/extending/graphstructures.rst b/doc/extending/graphstructures.rst
index 24e0365ac5..c6dacf73ed 100644
--- a/doc/extending/graphstructures.rst
+++ b/doc/extending/graphstructures.rst
@@ -28,10 +28,10 @@ The following illustrates these elements:
 
 .. testcode::
 
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
 
-   x = at.dmatrix('x')
-   y = at.dmatrix('y')
+   x = pt.dmatrix('x')
+   y = pt.dmatrix('y')
    z = x + y
 
 **Diagram**
diff --git a/doc/extending/tips.rst b/doc/extending/tips.rst
index a2d284c0e0..a5f724b301 100644
--- a/doc/extending/tips.rst
+++ b/doc/extending/tips.rst
@@ -20,10 +20,10 @@ simple function:
 
 .. code::
 
-   from pytensor import tensor as at
+   from pytensor import tensor as pt
 
    def sum_square_difference(a, b):
-       return at.sum((a - b)**2)
+       return pt.sum((a - b)**2)
 
 Even without taking PyTensor's rewrites into account, it is likely
 to work just as well as a custom implementation. It also supports all
diff --git a/doc/extending/unittest.rst b/doc/extending/unittest.rst
index a22a3c3217..17f472fac0 100644
--- a/doc/extending/unittest.rst
+++ b/doc/extending/unittest.rst
@@ -98,13 +98,13 @@ Example:
 .. code-block:: python
 
     import numpy as np
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
 
 
     def test_dot_validity():
-        a = at.dmatrix('a')
-        b = at.dmatrix('b')
-        c = at.dot(a, b)
+        a = pt.dmatrix('a')
+        b = pt.dmatrix('b')
+        c = pt.dot(a, b)
 
         c_fn = pytensor.function([a, b], [c])
 
@@ -187,7 +187,7 @@ symbolic variable:
 
     def test_verify_exprgrad():
         def fun(x,y,z):
-            return (x + at.cos(y)) / (4 * z)**2
+            return (x + pt.cos(y)) / (4 * z)**2
 
         x_val = np.asarray([[1], [1.1], [1.2]])
         y_val = np.asarray([0.1, 0.2])
@@ -207,7 +207,7 @@ Here is an example showing how to use :func:`verify_grad` on an :class:`Op` inst
         """
         a_val = np.asarray([[0,1,2],[3,4,5]], dtype='float64')
         rng = np.random.default_rng(42)
-        pytensor.gradient.verify_grad(at.Flatten(), [a_val], rng=rng)
+        pytensor.gradient.verify_grad(pt.Flatten(), [a_val], rng=rng)
 
 .. note::
 
diff --git a/doc/glossary.rst b/doc/glossary.rst
index e86078f031..36c4f1f037 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -6,7 +6,7 @@ Glossary
 .. testsetup::
 
    import pytensor
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
 
 .. glossary::
 
@@ -31,7 +31,7 @@ Glossary
         A variable with an immutable value.
         For example, when you type
 
-        >>> x = at.ivector()
+        >>> x = pt.ivector()
         >>> y = x + 3
 
         Then a `constant` is created to represent the ``3`` in the graph.
@@ -151,7 +151,7 @@ Glossary
         The the main data structure you work with when using PyTensor.
         For example,
 
-        >>> x = at.ivector()
+        >>> x = pt.ivector()
         >>> y = -x**2
 
         ``x`` and ``y`` are both :class:`Variable`\s, i.e. instances of the :class:`Variable` class.
diff --git a/doc/introduction.rst b/doc/introduction.rst
index d0325f1da1..cfbfeaf90f 100644
--- a/doc/introduction.rst
+++ b/doc/introduction.rst
@@ -66,11 +66,11 @@ its features, but it illustrates concretely what PyTensor is.
 .. code-block:: python
 
     import pytensor
-    from pytensor import tensor as at
+    from pytensor import tensor as pt
 
     # declare two symbolic floating-point scalars
-    a = at.dscalar()
-    b = at.dscalar()
+    a = pt.dscalar()
+    b = pt.dscalar()
 
     # create a simple expression
     c = a + b
diff --git a/doc/library/compile/debugmode.rst b/doc/library/compile/debugmode.rst
index ad324fa680..4844bab3b6 100644
--- a/doc/library/compile/debugmode.rst
+++ b/doc/library/compile/debugmode.rst
@@ -28,10 +28,10 @@ a cluster.
 .. testcode::
 
     import pytensor
-    from pytensor import tensor as at
+    from pytensor import tensor as pt
     from pytensor.compile.debugmode import DebugMode
 
-    x = at.dscalar('x')
+    x = pt.dscalar('x')
 
     f = pytensor.function([x], 10*x, mode='DebugMode')
 
diff --git a/doc/library/compile/io.rst b/doc/library/compile/io.rst
index 406ac89548..02458468d4 100644
--- a/doc/library/compile/io.rst
+++ b/doc/library/compile/io.rst
@@ -80,10 +80,10 @@ A non-None `value` argument makes an In() instance an optional parameter
 of the compiled function.  For example, in the following code we are
 defining an arity-2 function ``inc``.
 
->>> import pytensor.tensor as at
+>>> import pytensor.tensor as pt
 >>> from pytensor import function
 >>> from pytensor.compile.io import In
->>> u, x, s = at.scalars('u', 'x', 's')
+>>> u, x, s = pt.scalars('u', 'x', 's')
 >>> inc = function([u, In(x, value=3), In(s, update=(s+x*u), value=10.0)], [])
 
 Since we provided a ``value`` for ``s`` and ``x``, we can call it with just a value for ``u`` like this:
@@ -183,8 +183,8 @@ method to access values by indexing a Function directly by typing
 To show some examples of these access methods...
 
 
->>> from pytensor import tensor as at, function
->>> a, b, c = at.scalars('xys') # set the internal names of graph nodes
+>>> from pytensor import tensor as pt, function
+>>> a, b, c = pt.scalars('xys') # set the internal names of graph nodes
 >>> # Note that the name of c is 's', not 'c'!
 >>> fn = function([a, b, ((c, c+a+b), 10.0)], [])
 
@@ -236,12 +236,12 @@ Every element of the inputs list will be upgraded to an In instance if necessary
 Example:
 
 >>> import pytensor
->>> from pytensor import tensor as at
+>>> from pytensor import tensor as pt
 >>> from pytensor.compile.io import In
->>> x = at.scalar()
->>> y = at.scalar('y')
->>> z = at.scalar('z')
->>> w = at.scalar('w')
+>>> x = pt.scalar()
+>>> y = pt.scalar('y')
+>>> z = pt.scalar('z')
+>>> w = pt.scalar('w')
 
 >>> fn = pytensor.function(inputs=[x, y, In(z, value=42), ((w, w+x), 0)],
 ...                      outputs=x + y + z)
@@ -308,7 +308,7 @@ If a list of ``Variable`` or ``Out`` instances is given as argument, then the co
 
 >>> import numpy
 >>> from pytensor.compile.io import Out
->>> x, y, s = at.matrices('xys')
+>>> x, y, s = pt.matrices('xys')
 
 >>> # print a list of 2 ndarrays
 >>> fn1 = pytensor.function([x], [x+x, Out((x+x).T, borrow=True)])
diff --git a/doc/library/compile/nanguardmode.rst b/doc/library/compile/nanguardmode.rst
index 9cca7dca29..7f12003618 100644
--- a/doc/library/compile/nanguardmode.rst
+++ b/doc/library/compile/nanguardmode.rst
@@ -25,12 +25,12 @@ of abnormal values: NaNs, Infs, and abnormally big values.
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
     from pytensor.compile.nanguardmode import NanGuardMode
 
-    x = at.matrix()
+    x = pt.matrix()
     w = pytensor.shared(np.random.standard_normal((5, 7)).astype(pytensor.config.floatX))
-    y = at.dot(x, w)
+    y = pt.dot(x, w)
     fun = pytensor.function(
         [x], y,
         mode=NanGuardMode(nan_is_error=True, inf_is_error=True, big_is_error=True)
diff --git a/doc/library/d3viz/index.ipynb b/doc/library/d3viz/index.ipynb
index 83e350a6b9..778647daa3 100644
--- a/doc/library/d3viz/index.ipynb
+++ b/doc/library/d3viz/index.ipynb
@@ -70,8 +70,8 @@
    },
    "outputs": [],
    "source": [
-    "import pytensor as th\n",
-    "import pytensor.tensor as at\n",
+    "import pytensor\n",
+    "import pytensor.tensor as pt\n",
     "import numpy as np"
    ]
   },
@@ -96,16 +96,16 @@
     "nhiddens = 50\n",
     "\n",
     "rng = np.random.RandomState(0)\n",
-    "x = at.dmatrix('x')\n",
-    "wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)\n",
-    "bh = th.shared(np.zeros(nhiddens), borrow=True)\n",
-    "h = at.sigmoid(at.dot(x, wh) + bh)\n",
+    "x = pt.dmatrix('x')\n",
+    "wh = pytensor.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)\n",
+    "bh = pytensor.shared(np.zeros(nhiddens), borrow=True)\n",
+    "h = pt.sigmoid(pt.dot(x, wh) + bh)\n",
     "\n",
-    "wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))\n",
-    "by = th.shared(np.zeros(noutputs), borrow=True)\n",
-    "y = at.special.softmax(at.dot(h, wy) + by)\n",
+    "wy = pytensor.shared(rng.normal(0, 1, (nhiddens, noutputs)))\n",
+    "by = pytensor.shared(np.zeros(noutputs), borrow=True)\n",
+    "y = pt.special.softmax(pt.dot(h, wy) + by)\n",
     "\n",
-    "predict = th.function([x], y)"
+    "predict = pytensor.function([x], y)"
    ]
   },
   {
@@ -276,7 +276,7 @@
    },
    "outputs": [],
    "source": [
-    "predict_profiled = th.function([x], y, profile=True)\n",
+    "predict_profiled = pytensor.function([x], y, profile=True)\n",
     "\n",
     "x_val = rng.normal(0, 1, (ninputs, nfeatures))\n",
     "y_val = predict_profiled(x_val)"
@@ -388,12 +388,12 @@
    },
    "outputs": [],
    "source": [
-    "x, y, z = at.scalars('xyz')\n",
-    "e = at.sigmoid((x + y + z)**2)\n",
-    "op = th.compile.builders.OpFromGraph([x, y, z], [e])\n",
+    "x, y, z = pt.scalars('xyz')\n",
+    "e = pt.sigmoid((x + y + z)**2)\n",
+    "op = pytensor.compile.builders.OpFromGraph([x, y, z], [e])\n",
     "\n",
     "e2 = op(x, y, z) + op(z, y, x)\n",
-    "f = th.function([x, y, z], e2)"
+    "f = pytensor.function([x, y, z], e2)"
    ]
   },
   {
@@ -433,13 +433,13 @@
    },
    "outputs": [],
    "source": [
-    "x, y, z = at.scalars('xyz')\n",
+    "x, y, z = pt.scalars('xyz')\n",
     "e = x * y\n",
-    "op = th.compile.builders.OpFromGraph([x, y], [e])\n",
+    "op = pytensor.compile.builders.OpFromGraph([x, y], [e])\n",
     "e2 = op(x, y) + z\n",
-    "op2 = th.compile.builders.OpFromGraph([x, y, z], [e2])\n",
+    "op2 = pytensor.compile.builders.OpFromGraph([x, y, z], [e2])\n",
     "e3 = op2(x, y, z) + z\n",
-    "f = th.function([x, y, z], [e3])"
+    "f = pytensor.function([x, y, z], [e3])"
    ]
   },
   {
diff --git a/doc/library/d3viz/index.rst b/doc/library/d3viz/index.rst
index 6df979213d..4053312533 100644
--- a/doc/library/d3viz/index.rst
+++ b/doc/library/d3viz/index.rst
@@ -49,8 +49,8 @@ hidden layer and a softmax output layer.
 
 .. code:: python
 
-    import pytensor as th
-    import pytensor.tensor as at
+    import pytensor
+    import pytensor.tensor as pt
     import numpy as np
 
     ninputs = 1000
@@ -59,16 +59,16 @@ hidden layer and a softmax output layer.
     nhiddens = 50
 
     rng = np.random.RandomState(0)
-    x = at.dmatrix('x')
-    wh = th.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
-    bh = th.shared(np.zeros(nhiddens), borrow=True)
-    h = at.sigmoid(at.dot(x, wh) + bh)
+    x = pt.dmatrix('x')
+    wh = pytensor.shared(rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
+    bh = pytensor.shared(np.zeros(nhiddens), borrow=True)
+    h = pt.sigmoid(pt.dot(x, wh) + bh)
 
-    wy = th.shared(rng.normal(0, 1, (nhiddens, noutputs)))
-    by = th.shared(np.zeros(noutputs), borrow=True)
-    y = at.special.softmax(at.dot(h, wy) + by)
+    wy = pytensor.shared(rng.normal(0, 1, (nhiddens, noutputs)))
+    by = pytensor.shared(np.zeros(noutputs), borrow=True)
+    y = pt.special.softmax(pt.dot(h, wy) + by)
 
-    predict = th.function([x], y)
+    predict = pytensor.function([x], y)
 
 The function ``predict`` outputs the probability of 10 classes. You can
 visualize it with :py:func:`pytensor.printing.pydotprint` as follows:
@@ -151,7 +151,7 @@ random data:
 
 .. code:: python
 
-    predict_profiled = th.function([x], y, profile=True)
+    predict_profiled = pytensor.function([x], y, profile=True)
 
     x_val = rng.normal(0, 1, (ninputs, nfeatures))
     y_val = predict_profiled(x_val)
@@ -209,12 +209,12 @@ node defines a nested graph, which will be visualized accordingly by ``d3viz``.
 
 .. code:: python
 
-    x, y, z = at.scalars('xyz')
-    e = at.sigmoid((x + y + z)**2)
-    op = th.compile.builders.OpFromGraph([x, y, z], [e])
+    x, y, z = pt.scalars('xyz')
+    e = pt.sigmoid((x + y + z)**2)
+    op = pytensor.compile.builders.OpFromGraph([x, y, z], [e])
 
     e2 = op(x, y, z) + op(z, y, x)
-    f = th.function([x, y, z], e2)
+    f = pytensor.function([x, y, z], e2)
 
 .. code:: python
 
@@ -238,13 +238,13 @@ the following example.
 
 .. code:: python
 
-    x, y, z = at.scalars('xyz')
+    x, y, z = pt.scalars('xyz')
     e = x * y
-    op = th.compile.builders.OpFromGraph([x, y], [e])
+    op = pytensor.compile.builders.OpFromGraph([x, y], [e])
     e2 = op(x, y) + z
-    op2 = th.compile.builders.OpFromGraph([x, y, z], [e2])
+    op2 = pytensor.compile.builders.OpFromGraph([x, y, z], [e2])
     e3 = op2(x, y, z) + z
-    f = th.function([x, y, z], [e3])
+    f = pytensor.function([x, y, z], [e3])
 
 .. code:: python
 
diff --git a/doc/library/printing.rst b/doc/library/printing.rst
index 0087702ae9..56bccf691d 100644
--- a/doc/library/printing.rst
+++ b/doc/library/printing.rst
@@ -23,8 +23,8 @@ Intermediate values in a computation cannot be printed in
 the normal python way with the print statement, because PyTensor has no *statements*.
 Instead there is the :class:`Print` Op.
 
->>> from pytensor import tensor as at, function, printing
->>> x = at.dvector()
+>>> from pytensor import tensor as pt, function, printing
+>>> x = pt.dvector()
 >>> hello_world_op = printing.Print('hello world')
 >>> printed_x = hello_world_op(x)
 >>> f = function([x], printed_x)
@@ -52,8 +52,8 @@ PyTensor also provides :func:`pytensor.printing.pydotprint` that creates a png i
 1) The first is :func:`pytensor.pp`.
 
 >>> from pytensor import pp, grad,
->>> from pytensor import tensor as at
->>> x = at.dscalar('x')
+>>> from pytensor import tensor as pt
+>>> x = pt.dscalar('x')
 >>> y = x ** 2
 >>> gy = grad(y, x)
 >>> pp(gy)  # print out the gradient prior to rewriting
@@ -62,7 +62,7 @@ PyTensor also provides :func:`pytensor.printing.pydotprint` that creates a png i
 >>> pp(f.maker.fgraph.outputs[0])
 '(TensorConstant{2.0} * x)'
 
-The parameter in at.dscalar('x') in the first line is the name of this variable
+The parameter in pt.dscalar('x') in the first line is the name of this variable
 in the graph. This name is used when printing the graph to make it more readable.
 If no name is provided the variable x is printed as its type as returned by
 ``x.type()``. In this example - ``<TensorType(float64, ())>``.
diff --git a/doc/library/scan.rst b/doc/library/scan.rst
index 5f11fa64e9..eb2940c029 100644
--- a/doc/library/scan.rst
+++ b/doc/library/scan.rst
@@ -38,14 +38,14 @@ The equivalent PyTensor code would be:
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
 
-  k = at.iscalar("k")
-  A = at.vector("A")
+  k = pt.iscalar("k")
+  A = pt.vector("A")
 
   # Symbolic description of the result
   result, updates = pytensor.scan(fn=lambda prior_result, A: prior_result * A,
-                                outputs_info=at.ones_like(A),
+                                outputs_info=pt.ones_like(A),
                                 non_sequences=A,
                                 n_steps=k)
 
@@ -103,7 +103,7 @@ from a list of its coefficients:
     import numpy
 
     coefficients = pytensor.tensor.vector("coefficients")
-    x = at.scalar("x")
+    x = pt.scalar("x")
 
     max_coefficients_supported = 10000
 
@@ -164,21 +164,21 @@ downcast** of the latter.
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
 
-    up_to = at.iscalar("up_to")
+    up_to = pt.iscalar("up_to")
 
     # define a named function, rather than using lambda
     def accumulate_by_adding(arange_val, sum_to_date):
         return sum_to_date + arange_val
-    seq = at.arange(up_to)
+    seq = pt.arange(up_to)
 
     # An unauthorized implicit downcast from the dtype of 'seq', to that of
-    # 'at.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
+    # 'pt.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
     # if this instruction were to be used instead of the next one:
-    # outputs_info = at.as_tensor_variable(0)
+    # outputs_info = pt.as_tensor_variable(0)
 
-    outputs_info = at.as_tensor_variable(np.asarray(0, seq.dtype))
+    outputs_info = pt.as_tensor_variable(np.asarray(0, seq.dtype))
     scan_result, scan_updates = pytensor.scan(fn=accumulate_by_adding,
                                             outputs_info=outputs_info,
                                             sequences=seq)
@@ -206,14 +206,14 @@ with all values set to zero except at the provided array indices.
 
 .. testcode::
 
-    location = at.imatrix("location")
-    values = at.vector("values")
-    output_model = at.matrix("output_model")
+    location = pt.imatrix("location")
+    values = pt.vector("values")
+    output_model = pt.matrix("output_model")
 
     def set_value_at_position(a_location, a_value, output_model):
-        zeros = at.zeros_like(output_model)
+        zeros = pt.zeros_like(output_model)
         zeros_subtensor = zeros[a_location[0], a_location[1]]
-        return at.set_subtensor(zeros_subtensor, a_value)
+        return pt.set_subtensor(zeros_subtensor, a_value)
 
     result, updates = pytensor.scan(fn=set_value_at_position,
                                   outputs_info=None,
@@ -257,7 +257,7 @@ the following:
 .. testcode:: scan1
 
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
     import numpy as np
 
     rng = np.random.default_rng(203940)
@@ -269,16 +269,16 @@ the following:
     bvis = pytensor.shared(bvis_values)
     bhid = pytensor.shared(bhid_values)
 
-    srng = at.random.RandomStream(1234)
+    srng = pt.random.RandomStream(1234)
 
     def one_step(vsample):
-        hmean = at.sigmoid(at.dot(vsample, W) + bhid)
+        hmean = pt.sigmoid(pt.dot(vsample, W) + bhid)
         hsample = srng.binomial(1, hmean, size=hmean.shape)
-        vmean = at.sigmoid(at.dot(hsample, W.T) + bvis)
+        vmean = pt.sigmoid(pt.dot(hsample, W.T) + bvis)
 
         return srng.binomial(1, vmean, size=vsample.shape)
 
-    sample = at.lvector()
+    sample = pt.lvector()
 
     values, updates = pytensor.scan(one_step, outputs_info=sample, n_steps=10)
 
@@ -353,9 +353,9 @@ updated:
 
     # OneStep, with explicit use of the shared variables (W, bvis, bhid)
     def OneStep(vsample, W, bvis, bhid):
-        hmean = at.sigmoid(pytensor.dot(vsample, W) + bhid)
+        hmean = pt.sigmoid(pytensor.dot(vsample, W) + bhid)
         hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = at.sigmoid(pytensor.dot(hsample, W.T) + bvis)
+        vmean = pt.sigmoid(pytensor.dot(hsample, W.T) + bvis)
         return trng.binomial(size=vsample.shape, n=1, p=vmean,
                          dtype=pytensor.config.floatX)
 
@@ -389,9 +389,9 @@ Using the original Gibbs sampling example, with ``strict=True`` added to the
 
     # Same OneStep as in original example.
     def OneStep(vsample) :
-        hmean = at.sigmoid(pytensor.dot(vsample, W) + bhid)
+        hmean = pt.sigmoid(pytensor.dot(vsample, W) + bhid)
         hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = at.sigmoid(pytensor.dot(hsample, W.T) + bvis)
+        vmean = pt.sigmoid(pytensor.dot(hsample, W.T) + bvis)
         return trng.binomial(size=vsample.shape, n=1, p=vmean,
                              dtype=pytensor.config.floatX)
 
@@ -418,9 +418,9 @@ variables passed explicitly to ``OneStep`` and to scan:
 
     # OneStep, with explicit use of the shared variables (W, bvis, bhid)
     def OneStep(vsample, W, bvis, bhid) :
-        hmean = at.sigmoid(pytensor.dot(vsample, W) + bhid)
+        hmean = pt.sigmoid(pytensor.dot(vsample, W) + bhid)
         hsample = trng.binomial(size=hmean.shape, n=1, p=hmean)
-        vmean = at.sigmoid(pytensor.dot(hsample, W.T) + bvis)
+        vmean = pt.sigmoid(pytensor.dot(hsample, W.T) + bvis)
         return trng.binomial(size=vsample.shape, n=1, p=vmean,
                              dtype=pytensor.config.floatX)
 
@@ -460,13 +460,13 @@ construct a function that computes one iteration step :
 .. testsetup:: scan3
 
    import pytensor
-   from pytensor import tensor as at
+   from pytensor import tensor as pt
 
 .. testcode:: scan3
 
   def oneStep(u_tm4, u_t, x_tm3, x_tm1, y_tm1, W, W_in_1, W_in_2,  W_feedback, W_out):
 
-    x_t = at.tanh(pytensor.dot(x_tm1, W) + \
+    x_t = pt.tanh(pytensor.dot(x_tm1, W) + \
                  pytensor.dot(u_t,   W_in_1) + \
                  pytensor.dot(u_tm4, W_in_2) + \
                  pytensor.dot(y_tm1, W_feedback))
@@ -487,16 +487,16 @@ the PyTensor variables needed we construct our RNN as follows :
 
 .. testcode:: scan3
 
-   W = at.matrix()
-   W_in_1 = at.matrix()
-   W_in_2 = at.matrix()
-   W_feedback = at.matrix()
-   W_out = at.matrix()
+   W = pt.matrix()
+   W_in_1 = pt.matrix()
+   W_in_2 = pt.matrix()
+   W_feedback = pt.matrix()
+   W_out = pt.matrix()
 
-   u = at.matrix() # it is a sequence of vectors
-   x0 = at.matrix() # initial state of x has to be a matrix, since
+   u = pt.matrix() # it is a sequence of vectors
+   x0 = pt.matrix() # initial state of x has to be a matrix, since
                    # it has to cover x[-3]
-   y0 = at.vector() # y0 is just a vector since scan has only to provide
+   y0 = pt.vector() # y0 is just a vector since scan has only to provide
                    # y[-1]
 
 
@@ -536,9 +536,9 @@ value ``max_value``.
     def power_of_2(previous_power, max_value):
         return previous_power*2, pytensor.scan.utils.until(previous_power*2 > max_value)
 
-    max_value = at.scalar()
+    max_value = pt.scalar()
     values, _ = pytensor.scan(power_of_2,
-                            outputs_info = at.constant(1.),
+                            outputs_info = pt.constant(1.),
                             non_sequences = max_value,
                             n_steps = 1024)
 
diff --git a/doc/library/tensor/basic.rst b/doc/library/tensor/basic.rst
index 911583da92..1f73220888 100644
--- a/doc/library/tensor/basic.rst
+++ b/doc/library/tensor/basic.rst
@@ -10,21 +10,21 @@ Basic Tensor Functionality
 
    import numpy as np
    import pytensor
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
    from pytensor.tensor.type import scalar, iscalar, TensorType, dmatrix, ivector, fmatrix
    from pytensor.tensor import set_subtensor, inc_subtensor, batched_dot
    from pytensor import shared
 
 PyTensor supports symbolic tensor expressions.  When you type,
 
->>> import pytensor.tensor as at
->>> x = at.fmatrix()
+>>> import pytensor.tensor as pt
+>>> x = pt.fmatrix()
 
 the ``x`` is a :class:`TensorVariable` instance.
 
-The ``at.fmatrix`` object itself is an instance of :class:`TensorType`.
+The ``pt.fmatrix`` object itself is an instance of :class:`TensorType`.
 PyTensor knows what type of variable ``x`` is because ``x.type``
-points back to ``at.fmatrix``.
+points back to ``pt.fmatrix``.
 
 This section explains the various ways in which a tensor variable can be created,
 the attributes and methods of :class:`TensorVariable` and :class:`TensorType`,
@@ -45,9 +45,9 @@ and all of these constructors accept an optional ``name`` argument.
 For example, the following each produce a `TensorVariable` instance that stands
 for a 0-dimensional `ndarray` of integers with the name ``'myvar'``:
 
->>> x = at.scalar('myvar', dtype='int32')
->>> x = at.iscalar('myvar')
->>> x = at.tensor(dtype='int32', shape=(), name='myvar')
+>>> x = pt.scalar('myvar', dtype='int32')
+>>> x = pt.iscalar('myvar')
+>>> x = pt.tensor(dtype='int32', shape=(), name='myvar')
 >>> from pytensor.tensor.type import TensorType
 >>> x = TensorType(dtype='int32', shape=())('myvar')
 
@@ -119,9 +119,9 @@ They are all callable, and accept an optional ``name`` argument.  So for example
 
 .. testcode:: constructors
 
-   x = at.dmatrix()        # creates one Variable with no name
-   x = at.dmatrix('x')     # creates one Variable with name 'x'
-   xyz = at.dmatrix('xyz') # creates one Variable with name 'xyz'
+   x = pt.dmatrix()        # creates one Variable with no name
+   x = pt.dmatrix('x')     # creates one Variable with name 'x'
+   xyz = pt.dmatrix('xyz') # creates one Variable with name 'xyz'
 
 .. #COMMENT
     table generated by
@@ -247,9 +247,9 @@ name. For example:
 .. testcode:: constructors
 
    # Creates three matrix `Variable`s with no names
-   x, y, z = at.dmatrices(3)
+   x, y, z = pt.dmatrices(3)
    # Creates three matrix `Variables` named 'x', 'y' and 'z'
-   x, y, z = at.dmatrices('x', 'y', 'z')
+   x, y, z = pt.dmatrices('x', 'y', 'z')
 
 
 Custom tensor types
@@ -547,7 +547,7 @@ them perfectly, but a `dscalar` otherwise.
 
         Transpose of this tensor.
 
-        >>> x = at.zmatrix()
+        >>> x = pt.zmatrix()
         >>> y = 3+.2j * x.T
 
     .. method:: {any,all}(axis=None, keepdims=False)
@@ -843,10 +843,10 @@ Creating Tensors
     :param tensors: one or more tensors of the same rank
     :returns: A tensor such that ``rval[0] == tensors[0]``, ``rval[1] == tensors[1]``, etc.
 
-    >>> x0 = at.scalar()
-    >>> x1 = at.scalar()
-    >>> x2 = at.scalar()
-    >>> x = at.stack(x0, x1, x2)
+    >>> x0 = pt.scalar()
+    >>> x1 = pt.scalar()
+    >>> x2 = pt.scalar()
+    >>> x = pt.stack(x0, x1, x2)
     >>> x.ndim # x is a vector of length 3.
     1
 
@@ -859,10 +859,10 @@ Creating Tensors
     :param axis: Tensors will be joined along this axis, so they may have different
         ``shape[axis]``
 
-    >>> x0 = at.fmatrix()
-    >>> x1 = at.ftensor3()
-    >>> x2 = at.fvector()
-    >>> x = at.concatenate([x0, x1[0], at.shape_padright(x2)], axis=1)
+    >>> x0 = pt.fmatrix()
+    >>> x1 = pt.ftensor3()
+    >>> x2 = pt.fvector()
+    >>> x = pt.concatenate([x0, x1[0], pt.shape_padright(x2)], axis=1)
     >>> x.ndim
     2
 
@@ -1166,7 +1166,7 @@ Operator Support
 
 Many Python operators are supported.
 
->>> a, b = at.itensor3(), at.itensor3() # example inputs
+>>> a, b = pt.itensor3(), pt.itensor3() # example inputs
 
 Arithmetic
 --------------
@@ -1174,13 +1174,13 @@ Arithmetic
 .. doctest::
    :options: +SKIP
 
-   >>> a + 3      # at.add(a, 3) -> itensor3
-   >>> 3 - a      # at.sub(3, a)
-   >>> a * 3.5    # at.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting)
-   >>> 2.2 / a    # at.truediv(2.2, a)
-   >>> 2.2 // a   # at.intdiv(2.2, a)
-   >>> 2.2**a     # at.pow(2.2, a)
-   >>> b % a      # at.mod(b, a)
+   >>> a + 3      # pt.add(a, 3) -> itensor3
+   >>> 3 - a      # pt.sub(3, a)
+   >>> a * 3.5    # pt.mul(a, 3.5) -> ftensor3 or dtensor3 (depending on casting)
+   >>> 2.2 / a    # pt.truediv(2.2, a)
+   >>> 2.2 // a   # pt.intdiv(2.2, a)
+   >>> 2.2**a     # pt.pow(2.2, a)
+   >>> b % a      # pt.mod(b, a)
 
 Bitwise
 -------------
@@ -1188,10 +1188,10 @@ Bitwise
 .. doctest::
    :options: +SKIP
 
-   >>> a & b      # at.and_(a,b)    bitwise and (alias at.bitwise_and)
-   >>> a ^ 1      # at.xor(a,1)     bitwise xor (alias at.bitwise_xor)
-   >>> a | b      # at.or_(a,b)     bitwise or (alias at.bitwise_or)
-   >>> ~a         # at.invert(a)    bitwise invert (alias at.bitwise_not)
+   >>> a & b      # pt.and_(a,b)    bitwise and (alias pt.bitwise_and)
+   >>> a ^ 1      # pt.xor(a,1)     bitwise xor (alias pt.bitwise_xor)
+   >>> a | b      # pt.or_(a,b)     bitwise or (alias pt.bitwise_or)
+   >>> ~a         # pt.invert(a)    bitwise invert (alias pt.bitwise_not)
 
 Inplace
 -------
@@ -1220,9 +1220,9 @@ Casting
 
     .. testcode:: cast
 
-        import pytensor.tensor as at
-        x = at.matrix()
-        x_as_int = at.cast(x, 'int32')
+        import pytensor.tensor as pt
+        x = pt.matrix()
+        x_as_int = pt.cast(x, 'int32')
 
     Attempting to casting a complex value to a real value is ambiguous and
     will raise an exception.  Use `real`, `imag`, `abs`, or `angle`.
@@ -1256,9 +1256,9 @@ The six usual equality and inequality operators share the same interface.
 
   .. testcode:: oper
 
-    import pytensor.tensor as at
-    x,y = at.dmatrices('x','y')
-    z = at.le(x,y)
+    import pytensor.tensor as pt
+    x,y = pt.dmatrices('x','y')
+    z = pt.le(x,y)
 
 .. function:: lt(a, b)
 
@@ -1349,10 +1349,10 @@ Condition
 
     .. testcode:: switch
 
-      import pytensor.tensor as at
-      a,b = at.dmatrices('a','b')
-      x,y = at.dmatrices('x','y')
-      z = at.switch(at.lt(a,b), x, y)
+      import pytensor.tensor as pt
+      a,b = pt.dmatrices('a','b')
+      x,y = pt.dmatrices('x','y')
+      z = pt.switch(pt.lt(a,b), x, y)
 
 .. function:: where(cond, ift, iff)
 
@@ -1420,8 +1420,8 @@ Here is an example using the bit-wise ``and_`` via the ``&`` operator:
 
 .. testcode:: bitwise
 
-    import pytensor.tensor as at
-    x,y = at.imatrices('x','y')
+    import pytensor.tensor as pt
+    x,y = pt.imatrices('x','y')
     z = x & y
 
 
@@ -1667,8 +1667,8 @@ Linear Algebra
     Returns a tensor of size e.g. if it is 3D: (dim1, dim3, dim4)
     Example:
 
-    >>> first = at.tensor3('first')
-    >>> second = at.tensor3('second')
+    >>> first = pt.tensor3('first')
+    >>> second = pt.tensor3('second')
     >>> result = batched_dot(first, second)
 
     :note:  This is a subset of `numpy.einsum`, but we do not provide it for now.
@@ -1725,7 +1725,7 @@ Linear Algebra
 
     Example:
 
-    >>> a = at.mgrid[0:5, 0:3]
+    >>> a = pt.mgrid[0:5, 0:3]
     >>> a[0].eval()
     array([[0, 0, 0],
            [1, 1, 1],
@@ -1749,7 +1749,7 @@ Linear Algebra
 
     Example:
 
-    >>> b = at.ogrid[0:5, 0:3]
+    >>> b = pt.ogrid[0:5, 0:3]
     >>> b[0].eval()
     array([[0],
            [1],
diff --git a/doc/library/tensor/fft.rst b/doc/library/tensor/fft.rst
index 1835dde2b5..de20c19442 100644
--- a/doc/library/tensor/fft.rst
+++ b/doc/library/tensor/fft.rst
@@ -24,10 +24,10 @@ oscillates due to the box function being shifted to the middle of the array.
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
     from pytensor.tensor import fft
 
-    x = at.matrix('x', dtype='float64')
+    x = pt.matrix('x', dtype='float64')
 
     rfft = fft.rfft(x, norm='ortho')
     f_rfft = pytensor.function([x], rfft)
diff --git a/doc/sandbox/logistic_regression_example.rst b/doc/sandbox/logistic_regression_example.rst
index 73a1e7fa0c..1631dcce1e 100644
--- a/doc/sandbox/logistic_regression_example.rst
+++ b/doc/sandbox/logistic_regression_example.rst
@@ -13,25 +13,25 @@ BUT, YOU GOTTA RUN THIS CODE AND MAKE SURE IT STILL WORKS NICELY, HEY?
     def build_logistic_regression_model(n_in, n_out, l2_coef=30.0)
         # DECLARE SOME VARIABLES
 
-        import tensor as at
+        import pytensor.tensor as pt
 
-        x = at.matrix()  #our points, one point per row
-        y = at.matrix()  #store our labels as place codes (label 3 of 5 is vector [00100])
+        x = pt.matrix()  #our points, one point per row
+        y = pt.matrix()  #store our labels as place codes (label 3 of 5 is vector [00100])
 
-        w = at.matrix()  #the linear transform to apply to our input points
-        b = at.vector()  #a vector of biases, which make our transform affine instead of linear
+        w = pt.matrix()  #the linear transform to apply to our input points
+        b = pt.vector()  #a vector of biases, which make our transform affine instead of linear
 
-        stepsize = at.scalar('stepsize')  # a stepsize for gradient descent
+        stepsize = pt.scalar('stepsize')  # a stepsize for gradient descent
 
         # REGRESSION MODEL AND COSTS TO MINIMIZE
 
-        prediction = at.softmax(at.dot(x, w) + b)
-        cross_entropy = at.sum(y * at.log(prediction), axis=1)
-        cost = at.sum(cross_entropy) + l2_coef * at.sum(at.sum(w*w))
+        prediction = pt.softmax(pt.dot(x, w) + b)
+        cross_entropy = pt.sum(y * pt.log(prediction), axis=1)
+        cost = pt.sum(cross_entropy) + l2_coef * pt.sum(pt.sum(w*w))
 
         # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
 
-        grad_w, grad_b = at.grad(cost, [w, b])
+        grad_w, grad_b = pt.grad(cost, [w, b])
 
         #
         # GET THE GRADIENTS NECESSARY TO FIT OUR PARAMETERS
diff --git a/doc/tutorial/adding.rst b/doc/tutorial/adding.rst
index 1b39b0b90a..d558217dc7 100644
--- a/doc/tutorial/adding.rst
+++ b/doc/tutorial/adding.rst
@@ -12,10 +12,10 @@ let's make a simple function: add two numbers together. Here is how you do
 it:
 
 >>> import numpy
->>> import pytensor.tensor as at
+>>> import pytensor.tensor as pt
 >>> from pytensor import function
->>> x = at.dscalar('x')
->>> y = at.dscalar('y')
+>>> x = pt.dscalar('x')
+>>> y = pt.dscalar('y')
 >>> z = x + y
 >>> f = function([x, y], z)
 
@@ -55,10 +55,10 @@ instruction. Behind the scene, *f* was being compiled into C code.
 
 **Step 1**
 
->>> x = at.dscalar('x')
->>> y = at.dscalar('y')
+>>> x = pt.dscalar('x')
+>>> y = pt.dscalar('y')
 
-In PyTensor, all symbols must be typed. In particular, ``at.dscalar``
+In PyTensor, all symbols must be typed. In particular, ``pt.dscalar``
 is the type we assign to "0-dimensional arrays (`scalar`) of doubles
 (`d`)". It is an PyTensor :ref:`type`.
 
@@ -72,12 +72,12 @@ field, as you can see here:
 <class 'pytensor.tensor.var.TensorVariable'>
 >>> x.type
 TensorType(float64, ())
->>> at.dscalar
+>>> pt.dscalar
 TensorType(float64, ())
->>> x.type is at.dscalar
+>>> x.type is pt.dscalar
 True
 
-By calling ``at.dscalar`` with a string argument, you create a
+By calling ``pt.dscalar`` with a string argument, you create a
 *Variable* representing a floating-point scalar quantity with the
 given name. If you provide no argument, the symbol will be unnamed. Names
 are not required, but they can help debugging.
@@ -124,9 +124,9 @@ then be used like a normal Python function.
     you to import :func:`function` . Here is how :func:`eval` works:
 
     >>> import numpy
-    >>> import pytensor.tensor as at
-    >>> x = at.dscalar('x')
-    >>> y = at.dscalar('y')
+    >>> import pytensor.tensor as pt
+    >>> x = pt.dscalar('x')
+    >>> y = pt.dscalar('y')
     >>> z = x + y
     >>> numpy.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
     True
@@ -149,8 +149,8 @@ You might already have guessed how to do this. Indeed, the only change
 from the previous example is that you need to instantiate *x* and
 *y* using the matrix Types:
 
->>> x = at.dmatrix('x')
->>> y = at.dmatrix('y')
+>>> x = pt.dmatrix('x')
+>>> y = pt.dmatrix('y')
 >>> z = x + y
 >>> f = function([x, y], z)
 
diff --git a/doc/tutorial/aliasing.rst b/doc/tutorial/aliasing.rst
index 35bcf9e697..3de6b4e305 100644
--- a/doc/tutorial/aliasing.rst
+++ b/doc/tutorial/aliasing.rst
@@ -232,10 +232,10 @@ that control how ``pytensor.function`` handles its argument[s] and return value[
 .. testcode::
 
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
     from pytensor.compile.io import In, Out
 
-    x = at.matrix()
+    x = pt.matrix()
     y = 2 * x
     f = pytensor.function([In(x, borrow=True)], Out(y, borrow=True))
 
diff --git a/doc/tutorial/broadcasting.rst b/doc/tutorial/broadcasting.rst
index 06700c18aa..6bf0c55e33 100644
--- a/doc/tutorial/broadcasting.rst
+++ b/doc/tutorial/broadcasting.rst
@@ -2,7 +2,7 @@
 
    import numpy as np
    import pytensor
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
 
 .. _tutbroadcasting:
 
@@ -39,10 +39,10 @@ information is given in the :ref:`type` of a *Variable*.
 
 The following code illustrates how rows and columns are broadcasted in order to perform an addition operation with a matrix:
 
->>> r = at.row()
+>>> r = pt.row()
 >>> r.broadcastable
 (True, False)
->>> mtr = at.matrix()
+>>> mtr = pt.matrix()
 >>> mtr.broadcastable
 (False, False)
 >>> f_row = pytensor.function([r, mtr], [r + mtr])
@@ -58,7 +58,7 @@ array([[0, 1, 2],
 [array([[  0.,   2.,   4.],
        [  3.,   5.,   7.],
        [  6.,   8.,  10.]])]
->>> c = at.col()
+>>> c = pt.col()
 >>> c.broadcastable
 (False, True)
 >>> f_col = pytensor.function([c, mtr], [c + mtr])
diff --git a/doc/tutorial/conditions.rst b/doc/tutorial/conditions.rst
index b7ab188e83..3382bfe7fb 100644
--- a/doc/tutorial/conditions.rst
+++ b/doc/tutorial/conditions.rst
@@ -20,15 +20,15 @@ IfElse vs Switch
 
 .. testcode::
 
-   from pytensor import tensor as at
+   from pytensor import tensor as pt
    from pytensor.ifelse import ifelse
    import pytensor, time, numpy
 
-   a,b = at.scalars('a', 'b')
-   x,y = at.matrices('x', 'y')
+   a,b = pt.scalars('a', 'b')
+   x,y = pt.matrices('x', 'y')
 
-   z_switch = at.switch(at.lt(a, b), at.mean(x), at.mean(y))
-   z_lazy = ifelse(at.lt(a, b), at.mean(x), at.mean(y))
+   z_switch = pt.switch(pt.lt(a, b), pt.mean(x), pt.mean(y))
+   z_lazy = ifelse(pt.lt(a, b), pt.mean(x), pt.mean(y))
 
    f_switch = pytensor.function([a, b, x, y], z_switch,
                               mode=pytensor.compile.mode.Mode(linker='vm'))
diff --git a/doc/tutorial/debug_faq.rst b/doc/tutorial/debug_faq.rst
index a5ae5b2fa3..74c8b49e5e 100644
--- a/doc/tutorial/debug_faq.rst
+++ b/doc/tutorial/debug_faq.rst
@@ -27,10 +27,10 @@ messages. Consider the following faulty code.
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
 
-    x = at.vector()
-    y = at.vector()
+    x = pt.vector()
+    y = pt.vector()
     z = x + x
     z = z + y
     f = pytensor.function([x, y], z)
@@ -102,7 +102,7 @@ following example. Here, we use ``exception_verbosity=high`` and
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
 
     # compute_test_value is 'off' by default, meaning this feature is inactive
     pytensor.config.compute_test_value = 'off' # Use 'warn' to activate this feature
@@ -114,7 +114,7 @@ following example. Here, we use ``exception_verbosity=high`` and
     W2 = pytensor.shared(W2val, 'W2')
 
     # input which will be of shape (5,10)
-    x  = at.matrix('x')
+    x  = pt.matrix('x')
     # provide PyTensor with a default test-value
     #x.tag.test_value = np.random.random((5, 10))
 
@@ -123,10 +123,10 @@ following example. Here, we use ``exception_verbosity=high`` and
     func_of_W1 = W1.dimshuffle(2, 0, 1).flatten(2).T
 
     # source of error: dot product of 5x10 with 20x10
-    h1 = at.dot(x, func_of_W1)
+    h1 = pt.dot(x, func_of_W1)
 
     # do more stuff
-    h2 = at.dot(h1, W2.T)
+    h2 = pt.dot(h1, W2.T)
 
     # compile and call the actual function
     f = pytensor.function([x], h2)
@@ -171,7 +171,7 @@ so slightly, we can get PyTensor to reveal the exact source of the error.
     ...
 
     # Input which will have the shape (5, 10)
-    x  = at.matrix('x')
+    x  = pt.matrix('x')
     # Provide PyTensor with a default test-value
     x.tag.test_value = np.random.random((5, 10))
 
@@ -186,7 +186,7 @@ following error message, which properly identifies *line 24* as the culprit.
 
     Traceback (most recent call last):
       File "test2.py", line 24, in <module>
-        h1 = at.dot(x, func_of_W1)
+        h1 = pt.dot(x, func_of_W1)
       File "PATH_TO_PYTENSOR/pytensor/tensor/basic.py", line 4734, in dot
         return _dot(a, b)
       File "PATH_TO_PYTENSOR/pytensor/graph/op.py", line 545, in __call__
@@ -224,12 +224,12 @@ It is also possible to override variables ``__repr__`` method to have them retur
 .. testsetup:: printtestvalue
 
    import pytensor
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
 
 
 .. testcode:: printtestvalue
 
-   x = at.scalar('x')
+   x = pt.scalar('x')
    # Assigning test value
    x.tag.test_value = 42
 
@@ -461,10 +461,10 @@ Consider this example script (``ex.py``):
 
    import numpy as np
    import pytensor
-   import pytensor.tensor as at
+   import pytensor.tensor as pt
 
-   a = at.dmatrix('a')
-   b = at.dmatrix('b')
+   a = pt.dmatrix('a')
+   b = pt.dmatrix('b')
 
    f = pytensor.function([a, b], [a * b])
 
diff --git a/doc/tutorial/examples.rst b/doc/tutorial/examples.rst
index 7937f951aa..51ea8496b2 100644
--- a/doc/tutorial/examples.rst
+++ b/doc/tutorial/examples.rst
@@ -41,9 +41,9 @@ Well, what you do is this:
 .. tests/test_tutorial.py:T_examples.test_examples_1
 
 >>> import pytensor
->>> import pytensor.tensor as at
->>> x = at.dmatrix('x')
->>> s = 1 / (1 + at.exp(-x))
+>>> import pytensor.tensor as pt
+>>> x = pt.dmatrix('x')
+>>> s = 1 / (1 + pt.exp(-x))
 >>> logistic = pytensor.function([x], s)
 >>> logistic([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
@@ -64,7 +64,7 @@ We can verify that this alternate form produces the same values:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_2
 
->>> s2 = (1 + at.tanh(x / 2)) / 2
+>>> s2 = (1 + pt.tanh(x / 2)) / 2
 >>> logistic2 = pytensor.function([x], s2)
 >>> logistic2([[0, 1], [-1, -2]])
 array([[ 0.5       ,  0.73105858],
@@ -81,7 +81,7 @@ squared difference between two matrices ``a`` and ``b`` at the same time:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_3
 
->>> a, b = at.dmatrices('a', 'b')
+>>> a, b = pt.dmatrices('a', 'b')
 >>> diff = a - b
 >>> abs_diff = abs(diff)
 >>> diff_squared = diff**2
@@ -114,7 +114,7 @@ one. You can do it like this:
 
 >>> from pytensor.compile.io import In
 >>> from pytensor import function
->>> x, y = at.dscalars('x', 'y')
+>>> x, y = pt.dscalars('x', 'y')
 >>> z = x + y
 >>> f = function([x, In(y, value=1)], z)
 >>> f(33)
@@ -135,7 +135,7 @@ parameters can be set positionally or by name, as in standard Python:
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_7
 
->>> x, y, w = at.dscalars('x', 'y', 'w')
+>>> x, y, w = pt.dscalars('x', 'y', 'w')
 >>> z = (x + y) * w
 >>> f = function([x, In(y, value=1), In(w, value=2, name='w_by_name')], z)
 >>> f(33)
@@ -180,7 +180,7 @@ internal state and returns the old state value.
 
 >>> from pytensor import shared
 >>> state = shared(0)
->>> inc = at.iscalar('inc')
+>>> inc = pt.iscalar('inc')
 >>> accumulator = function([inc], state, updates=[(state, state+inc)])
 
 This code introduces a few new concepts.  The ``shared`` function constructs
@@ -255,7 +255,7 @@ for the purpose of one particular function.
 >>> fn_of_state = state * 2 + inc
 >>> # The type of foo must match the shared variable we are replacing
 >>> # with the ``givens``
->>> foo = at.scalar(dtype=state.dtype)
+>>> foo = pt.scalar(dtype=state.dtype)
 >>> skip_shared = function([inc, foo], fn_of_state, givens=[(state, foo)])
 >>> skip_shared(1, 3)  # we're using 3 for the state, not state.value
 array(7)
@@ -298,9 +298,9 @@ needs to be performed once.
 Let's start from the accumulator defined above:
 
 >>> import pytensor
->>> import pytensor.tensor as at
+>>> import pytensor.tensor as pt
 >>> state = pytensor.shared(0)
->>> inc = at.iscalar('inc')
+>>> inc = pt.iscalar('inc')
 >>> accumulator = pytensor.function([inc], state, updates=[(state, state+inc)])
 
 We can use it to increment the state as usual:
@@ -461,7 +461,7 @@ It will be used repeatedly.
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
 
 
     rng = np.random.default_rng(2882)
@@ -474,8 +474,8 @@ It will be used repeatedly.
     training_steps = 10000
 
     # Declare PyTensor symbolic variables
-    x = at.dmatrix("x")
-    y = at.dvector("y")
+    x = pt.dmatrix("x")
+    y = pt.dvector("y")
 
     # initialize the weight vector w randomly
     #
@@ -492,11 +492,11 @@ It will be used repeatedly.
     print(b.get_value())
 
     # Construct PyTensor expression graph
-    p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b))       # Probability that target = 1
+    p_1 = 1 / (1 + pt.exp(-pt.dot(x, w) - b))       # Probability that target = 1
     prediction = p_1 > 0.5                          # The prediction thresholded
-    xent = -y * at.log(p_1) - (1-y) * at.log(1-p_1) # Cross-entropy loss function
+    xent = -y * pt.log(p_1) - (1-y) * pt.log(1-p_1) # Cross-entropy loss function
     cost = xent.mean() + 0.01 * (w ** 2).sum()      # The cost to minimize
-    gw, gb = at.grad(cost, [w, b])                  # Compute the gradient of the cost
+    gw, gb = pt.grad(cost, [w, b])                  # Compute the gradient of the cost
                                                     # w.r.t weight vector w and
                                                     # bias term b (we shall
                                                     # return to this in a
diff --git a/doc/tutorial/gradients.rst b/doc/tutorial/gradients.rst
index 0d573b86df..28cdda7165 100644
--- a/doc/tutorial/gradients.rst
+++ b/doc/tutorial/gradients.rst
@@ -11,7 +11,7 @@ Computing Gradients
 
 Now let's use PyTensor for a slightly more sophisticated task: create a
 function which computes the derivative of some expression ``y`` with
-respect to its parameter ``x``. To do this we will use the macro `at.grad`.
+respect to its parameter ``x``. To do this we will use the macro `pt.grad`.
 For instance, we can compute the gradient of :math:`x^2` with respect to
 :math:`x`. Note that: :math:`d(x^2)/dx = 2 \cdot x`.
 
@@ -22,11 +22,11 @@ Here is the code to compute this gradient:
 
 >>> import numpy
 >>> import pytensor
->>> import pytensor.tensor as at
+>>> import pytensor.tensor as pt
 >>> from pytensor import pp
->>> x = at.dscalar('x')
+>>> x = pt.dscalar('x')
 >>> y = x ** 2
->>> gy = at.grad(y, x)
+>>> gy = pt.grad(y, x)
 >>> pp(gy)  # print out the gradient prior to optimization
 '((fill((x ** TensorConstant{2}), TensorConstant{1.0}) * TensorConstant{2}) * (x ** (TensorConstant{2} - TensorConstant{1})))'
 >>> f = pytensor.function([x], gy)
@@ -64,30 +64,30 @@ logistic is: :math:`ds(x)/dx = s(x) \cdot (1 - s(x))`.
 .. If you modify this code, also change :
 .. tests/test_tutorial.py:T_examples.test_examples_5
 
->>> x = at.dmatrix('x')
->>> s = at.sum(1 / (1 + at.exp(-x)))
->>> gs = at.grad(s, x)
+>>> x = pt.dmatrix('x')
+>>> s = pt.sum(1 / (1 + pt.exp(-x)))
+>>> gs = pt.grad(s, x)
 >>> dlogistic = pytensor.function([x], gs)
 >>> dlogistic([[0, 1], [-1, -2]])
 array([[ 0.25      ,  0.19661193],
        [ 0.19661193,  0.10499359]])
 
-In general, for any **scalar** expression ``s``, ``at.grad(s, w)`` provides
+In general, for any **scalar** expression ``s``, ``pt.grad(s, w)`` provides
 the PyTensor expression for computing :math:`\frac{\partial s}{\partial w}`. In
 this way PyTensor can be used for doing **efficient** symbolic differentiation
-(as the expression returned by `at.grad` will be optimized during compilation), even for
+(as the expression returned by `pt.grad` will be optimized during compilation), even for
 function with many inputs. (see `automatic differentiation <http://en.wikipedia.org/wiki/Automatic_differentiation>`_ for a description
 of symbolic differentiation).
 
 .. note::
 
-   The second argument of `at.grad` can be a list, in which case the
+   The second argument of `pt.grad` can be a list, in which case the
    output is also a list. The order in both lists is important: element
    ``i`` of the output list is the gradient of the first argument of
-   `at.grad` with respect to the ``i``-th element of the list given as second argument.
-   The first argument of `at.grad` has to be a scalar (a tensor
+   `pt.grad` with respect to the ``i``-th element of the list given as second argument.
+   The first argument of `pt.grad` has to be a scalar (a tensor
    of size 1). For more information on the semantics of the arguments of
-   `at.grad` and details about the implementation, see
+   `pt.grad` and details about the implementation, see
    :ref:`this<libdoc_gradient>` section of the library.
 
    Additional information on the inner workings of differentiation may also be
@@ -117,25 +117,25 @@ do is to loop over the entries in ``y`` and compute the gradient of
     shall return to :ref:`scan<tutloop>` later in this tutorial.
 
 >>> import pytensor
->>> import pytensor.tensor as at
->>> x = at.dvector('x')
+>>> import pytensor.tensor as pt
+>>> x = pt.dvector('x')
 >>> y = x ** 2
->>> J, updates = pytensor.scan(lambda i, y, x : at.grad(y[i], x), sequences=at.arange(y.shape[0]), non_sequences=[y, x])
+>>> J, updates = pytensor.scan(lambda i, y, x : pt.grad(y[i], x), sequences=pt.arange(y.shape[0]), non_sequences=[y, x])
 >>> f = pytensor.function([x], J, updates=updates)
 >>> f([4, 4])
 array([[ 8.,  0.],
        [ 0.,  8.]])
 
 What we do in this code is to generate a sequence of integers from ``0`` to
-``y.shape[0]`` using `at.arange`. Then we loop through this sequence, and
+``y.shape[0]`` using `pt.arange`. Then we loop through this sequence, and
 at each step, we compute the gradient of element ``y[i]`` with respect to
 ``x``. `scan` automatically concatenates all these rows, generating a
 matrix which corresponds to the Jacobian.
 
 .. note::
-    There are some pitfalls to be aware of regarding `at.grad`. One of them is that you
+    There are some pitfalls to be aware of regarding `pt.grad`. One of them is that you
     cannot re-write the above expression of the Jacobian as
-    ``pytensor.scan(lambda y_i,x: at.grad(y_i,x), sequences=y, non_sequences=x)``,
+    ``pytensor.scan(lambda y_i,x: pt.grad(y_i,x), sequences=y, non_sequences=x)``,
     even though from the documentation of scan this
     seems possible. The reason is that ``y_i`` will not be a function of
     ``x`` anymore, while ``y[i]`` still is.
@@ -152,14 +152,14 @@ to do it manually.
 
 You can compute the Hessian manually similarly to the Jacobian. The only
 difference is that now, instead of computing the Jacobian of some expression
-``y``, we compute the Jacobian of ``at.grad(cost,x)``, where ``cost`` is some
+``y``, we compute the Jacobian of ``pt.grad(cost,x)``, where ``cost`` is some
 scalar.
 
->>> x = at.dvector('x')
+>>> x = pt.dvector('x')
 >>> y = x ** 2
 >>> cost = y.sum()
->>> gy = at.grad(cost, x)
->>> H, updates = pytensor.scan(lambda i, gy,x : at.grad(gy[i], x), sequences=at.arange(gy.shape[0]), non_sequences=[gy, x])
+>>> gy = pt.grad(cost, x)
+>>> H, updates = pytensor.scan(lambda i, gy,x : pt.grad(gy[i], x), sequences=pt.arange(gy.shape[0]), non_sequences=[gy, x])
 >>> f = pytensor.function([x], H, updates=updates)
 >>> f([4, 4])
 array([[ 2.,  0.],
@@ -195,10 +195,10 @@ form of the operation. In order to evaluate the R-operation of
 expression ``y``, with respect to ``x``, multiplying the Jacobian with ``V``
 you need to do something similar to this:
 
->>> W = at.dmatrix('W')
->>> V = at.dmatrix('V')
->>> x = at.dvector('x')
->>> y = at.dot(x, W)
+>>> W = pt.dmatrix('W')
+>>> V = pt.dmatrix('V')
+>>> x = pt.dvector('x')
+>>> y = pt.dot(x, W)
 >>> JV = pytensor.gradient.Rop(y, W, V)
 >>> f = pytensor.function([W, V, x], JV)
 >>> f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1])
@@ -214,10 +214,10 @@ the Jacobian. The mathematical formula would be :math:`v \frac{\partial
 f(x)}{\partial x}`. The L-operator is also supported for generic tensors
 (not only for vectors). Similarly, it can be implemented as follows:
 
->>> W = at.dmatrix('W')
->>> v = at.dvector('v')
->>> x = at.dvector('x')
->>> y = at.dot(x, W)
+>>> W = pt.dmatrix('W')
+>>> v = pt.dvector('v')
+>>> x = pt.dvector('x')
+>>> y = pt.dot(x, W)
 >>> VJ = pytensor.gradient.Lop(y, W, v)
 >>> f = pytensor.function([v,x], VJ)
 >>> f([2, 2], [0, 1])
@@ -246,11 +246,11 @@ Hessian matrix, you have two options that will
 give you the same result, though these options might exhibit differing performances.
 Hence, we suggest profiling the methods before using either one of the two:
 
->>> x = at.dvector('x')
->>> v = at.dvector('v')
->>> y = at.sum(x ** 2)
->>> gy = at.grad(y, x)
->>> vH = at.grad(at.sum(gy * v), x)
+>>> x = pt.dvector('x')
+>>> v = pt.dvector('v')
+>>> y = pt.sum(x ** 2)
+>>> gy = pt.grad(y, x)
+>>> vH = pt.grad(pt.sum(gy * v), x)
 >>> f = pytensor.function([x, v], vH)
 >>> f([4, 4], [2, 2])
 array([ 4.,  4.])
@@ -258,10 +258,10 @@ array([ 4.,  4.])
 
 or, making use of the R-operator:
 
->>> x = at.dvector('x')
->>> v = at.dvector('v')
->>> y = at.sum(x ** 2)
->>> gy = at.grad(y, x)
+>>> x = pt.dvector('x')
+>>> v = pt.dvector('v')
+>>> y = pt.sum(x ** 2)
+>>> gy = pt.grad(y, x)
 >>> Hv = pytensor.gradient.Rop(gy, x, v)
 >>> f = pytensor.function([x, v], Hv)
 >>> f([4, 4], [2, 2])
diff --git a/doc/tutorial/index.rst b/doc/tutorial/index.rst
index 06eeceacf1..ec0640ec53 100644
--- a/doc/tutorial/index.rst
+++ b/doc/tutorial/index.rst
@@ -13,7 +13,7 @@ Several of the symbols you will need to use are in the ``tensor`` subpackage
 of PyTensor. Let us import that subpackage under a handy name like
 ``at`` (the tutorials will frequently use this convention).
 
->>> import pytensor.tensor as at
+>>> import pytensor.tensor as pt
 
 If that succeeded you are ready for the tutorial, otherwise check your
 installation (see :ref:`install`).
diff --git a/doc/tutorial/loop.rst b/doc/tutorial/loop.rst
index 0ca5079d8f..39a8396946 100644
--- a/doc/tutorial/loop.rst
+++ b/doc/tutorial/loop.rst
@@ -31,15 +31,15 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # defining the tensor variables
-  X = at.matrix("X")
-  W = at.matrix("W")
-  b_sym = at.vector("b_sym")
+  X = pt.matrix("X")
+  W = pt.matrix("W")
+  b_sym = pt.vector("b_sym")
 
-  results, updates = pytensor.scan(lambda v: at.tanh(at.dot(v, W) + b_sym), sequences=X)
+  results, updates = pytensor.scan(lambda v: pt.tanh(pt.dot(v, W) + b_sym), sequences=X)
   compute_elementwise = pytensor.function(inputs=[X, W, b_sym], outputs=results)
 
   # test values
@@ -65,19 +65,19 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define tensor variables
-  X = at.vector("X")
-  W = at.matrix("W")
-  b_sym = at.vector("b_sym")
-  U = at.matrix("U")
-  Y = at.matrix("Y")
-  V = at.matrix("V")
-  P = at.matrix("P")
-
-  results, updates = pytensor.scan(lambda y, p, x_tm1: at.tanh(at.dot(x_tm1, W) + at.dot(y, U) + at.dot(p, V)),
+  X = pt.vector("X")
+  W = pt.matrix("W")
+  b_sym = pt.vector("b_sym")
+  U = pt.matrix("U")
+  Y = pt.matrix("Y")
+  V = pt.matrix("V")
+  P = pt.matrix("P")
+
+  results, updates = pytensor.scan(lambda y, p, x_tm1: pt.tanh(pt.dot(x_tm1, W) + pt.dot(y, U) + pt.dot(p, V)),
             sequences=[Y, P[::-1]], outputs_info=[X])
   compute_seq = pytensor.function(inputs=[X, W, Y, U, P, V], outputs=results)
 
@@ -119,12 +119,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define tensor variable
-  X = at.matrix("X")
-  results, updates = pytensor.scan(lambda x_i: at.sqrt((x_i ** 2).sum()), sequences=[X])
+  X = pt.matrix("X")
+  results, updates = pytensor.scan(lambda x_i: pt.sqrt((x_i ** 2).sum()), sequences=[X])
   compute_norm_lines = pytensor.function(inputs=[X], outputs=results)
 
   # test value
@@ -144,12 +144,12 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define tensor variable
-  X = at.matrix("X")
-  results, updates = pytensor.scan(lambda x_i: at.sqrt((x_i ** 2).sum()), sequences=[X.T])
+  X = pt.matrix("X")
+  results, updates = pytensor.scan(lambda x_i: pt.sqrt((x_i ** 2).sum()), sequences=[X.T])
   compute_norm_cols = pytensor.function(inputs=[X], outputs=results)
 
   # test value
@@ -169,14 +169,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
   floatX = "float32"
 
   # define tensor variable
-  X = at.matrix("X")
-  results, updates = pytensor.scan(lambda i, j, t_f: at.cast(X[i, j] + t_f, floatX),
-                    sequences=[at.arange(X.shape[0]), at.arange(X.shape[1])],
+  X = pt.matrix("X")
+  results, updates = pytensor.scan(lambda i, j, t_f: pt.cast(X[i, j] + t_f, floatX),
+                    sequences=[pt.arange(X.shape[0]), pt.arange(X.shape[1])],
                     outputs_info=np.asarray(0., dtype=floatX))
   result = results[-1]
   compute_trace = pytensor.function(inputs=[X], outputs=result)
@@ -200,18 +200,18 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define tensor variables
-  X = at.matrix("X")
-  W = at.matrix("W")
-  b_sym = at.vector("b_sym")
-  U = at.matrix("U")
-  V = at.matrix("V")
-  n_sym = at.iscalar("n_sym")
-
-  results, updates = pytensor.scan(lambda x_tm2, x_tm1: at.dot(x_tm2, U) + at.dot(x_tm1, V) + at.tanh(at.dot(x_tm1, W) + b_sym),
+  X = pt.matrix("X")
+  W = pt.matrix("W")
+  b_sym = pt.vector("b_sym")
+  U = pt.matrix("U")
+  V = pt.matrix("V")
+  n_sym = pt.iscalar("n_sym")
+
+  results, updates = pytensor.scan(lambda x_tm2, x_tm1: pt.dot(x_tm2, U) + pt.dot(x_tm1, V) + pt.tanh(pt.dot(x_tm1, W) + b_sym),
                       n_steps=n_sym, outputs_info=[dict(initial=X, taps=[-2, -1])])
   compute_seq2 = pytensor.function(inputs=[X, U, V, W, b_sym, n_sym], outputs=results)
 
@@ -265,14 +265,14 @@ The full documentation can be found in the library: :ref:`Scan <lib_scan>`.
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define tensor variables
-  v = at.vector()
-  A = at.matrix()
-  y = at.tanh(at.dot(v, A))
-  results, updates = pytensor.scan(lambda i: at.grad(y[i], v), sequences=[at.arange(y.shape[0])])
+  v = pt.vector()
+  A = pt.matrix()
+  y = pt.tanh(pt.dot(v, A))
+  results, updates = pytensor.scan(lambda i: pt.grad(y[i], v), sequences=[pt.arange(y.shape[0])])
   compute_jac_t = pytensor.function([A, v], results, allow_input_downcast=True) # shape (d_out, d_in)
 
   # test values
@@ -300,12 +300,12 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define shared variables
   k = pytensor.shared(0)
-  n_sym = at.iscalar("n_sym")
+  n_sym = pt.iscalar("n_sym")
 
   results, updates = pytensor.scan(lambda:{k:(k + 1)}, n_steps=n_sym)
   accumulator = pytensor.function([n_sym], [], updates=updates, allow_input_downcast=True)
@@ -319,19 +319,19 @@ Note that we need to iterate over the indices of ``y`` and not over the elements
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
   import numpy as np
 
   # define tensor variables
-  X = at.matrix("X")
-  W = at.matrix("W")
-  b_sym = at.vector("b_sym")
+  X = pt.matrix("X")
+  W = pt.matrix("W")
+  b_sym = pt.vector("b_sym")
 
   # define shared random stream
   trng = pytensor.tensor.random.utils.RandomStream(1234)
   d=trng.binomial(size=W[1].shape)
 
-  results, updates = pytensor.scan(lambda v: at.tanh(at.dot(v, W) + b_sym) * d, sequences=X)
+  results, updates = pytensor.scan(lambda v: pt.tanh(pt.dot(v, W) + b_sym) * d, sequences=X)
   compute_with_bnoise = pytensor.function(inputs=[X, W, b_sym], outputs=results,
                             updates=updates, allow_input_downcast=True)
   x = np.eye(10, 2, dtype=pytensor.config.floatX)
@@ -360,17 +360,17 @@ Note that if you want to use a random variable ``d`` that will not be updated th
 .. testcode::
 
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
 
-  k = at.iscalar("k")
-  A = at.vector("A")
+  k = pt.iscalar("k")
+  A = pt.vector("A")
 
   def inner_fct(prior_result, B):
       return prior_result * B
 
   # Symbolic description of the result
   result, updates = pytensor.scan(fn=inner_fct,
-                              outputs_info=at.ones_like(A),
+                              outputs_info=pt.ones_like(A),
                               non_sequences=A, n_steps=k)
 
   # Scan has provided us with A ** 1 through A ** k.  Keep only the last
@@ -393,10 +393,10 @@ Note that if you want to use a random variable ``d`` that will not be updated th
 
   import numpy
   import pytensor
-  import pytensor.tensor as at
+  import pytensor.tensor as pt
 
   coefficients = pytensor.tensor.vector("coefficients")
-  x = at.scalar("x")
+  x = pt.scalar("x")
   max_coefficients_supported = 10000
 
   # Generate the components of the polynomial
diff --git a/doc/tutorial/loop_solution_1.py b/doc/tutorial/loop_solution_1.py
index 0c108d8b32..b0d6a3941c 100755
--- a/doc/tutorial/loop_solution_1.py
+++ b/doc/tutorial/loop_solution_1.py
@@ -5,13 +5,13 @@
 import numpy as np
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 
 
 # 1. First example
 
-k = at.iscalar("k")
-A = at.vector("A")
+k = pt.iscalar("k")
+A = pt.vector("A")
 
 
 def inner_fct(prior_result, A):
@@ -19,7 +19,7 @@ def inner_fct(prior_result, A):
 
 # Symbolic description of the result
 result, updates = pytensor.scan(fn=inner_fct,
-                              outputs_info=at.ones_like(A),
+                              outputs_info=pt.ones_like(A),
                               non_sequences=A, n_steps=k)
 
 # Scan has provided us with A ** 1 through A ** k.  Keep only the last
@@ -35,12 +35,12 @@ def inner_fct(prior_result, A):
 
 # 2. Second example
 
-coefficients = at.vector("coefficients")
-x = at.scalar("x")
+coefficients = pt.vector("coefficients")
+x = pt.scalar("x")
 max_coefficients_supported = 10000
 
 # Generate the components of the polynomial
-full_range = at.arange(max_coefficients_supported)
+full_range = pt.arange(max_coefficients_supported)
 components, updates = pytensor.scan(fn=lambda coeff, power, free_var:
                                   coeff * (free_var ** power),
                                   sequences=[coefficients, full_range],
@@ -56,15 +56,15 @@ def inner_fct(prior_result, A):
 
 # 3. Reduction performed inside scan
 
-coefficients = at.vector("coefficients")
-x = at.scalar("x")
+coefficients = pt.vector("coefficients")
+x = pt.scalar("x")
 max_coefficients_supported = 10000
 
 # Generate the components of the polynomial
-full_range = at.arange(max_coefficients_supported)
+full_range = pt.arange(max_coefficients_supported)
 
 
-outputs_info = at.as_tensor_variable(np.asarray(0, 'float64'))
+outputs_info = pt.as_tensor_variable(np.asarray(0, 'float64'))
 
 components, updates = pytensor.scan(fn=lambda coeff, power, prior_value, free_var:
                                   prior_value + (coeff * (free_var ** power)),
diff --git a/doc/tutorial/modes.rst b/doc/tutorial/modes.rst
index 62c5aa41c3..bb9420ff9c 100644
--- a/doc/tutorial/modes.rst
+++ b/doc/tutorial/modes.rst
@@ -47,7 +47,7 @@ Consider the logistic regression:
 
     import numpy as np
     import pytensor
-    import pytensor.tensor as at
+    import pytensor.tensor as pt
 
 
     rng = np.random.default_rng(2498)
@@ -59,19 +59,19 @@ Consider the logistic regression:
     training_steps = 10000
 
     # Declare PyTensor symbolic variables
-    x = at.matrix("x")
-    y = at.vector("y")
+    x = pt.matrix("x")
+    y = pt.vector("y")
     w = pytensor.shared(rng.standard_normal(feats).astype(pytensor.config.floatX), name="w")
     b = pytensor.shared(np.asarray(0., dtype=pytensor.config.floatX), name="b")
     x.tag.test_value = D[0]
     y.tag.test_value = D[1]
 
     # Construct PyTensor expression graph
-    p_1 = 1 / (1 + at.exp(-at.dot(x, w)-b)) # Probability of having a one
+    p_1 = 1 / (1 + pt.exp(-pt.dot(x, w)-b)) # Probability of having a one
     prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
-    xent = -y*at.log(p_1) - (1-y)*at.log(1-p_1) # Cross-entropy
+    xent = -y*pt.log(p_1) - (1-y)*pt.log(1-p_1) # Cross-entropy
     cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
-    gw,gb = at.grad(cost, [w,b])
+    gw,gb = pt.grad(cost, [w,b])
 
     # Compile expressions to functions
     train = pytensor.function(
@@ -254,7 +254,7 @@ use it only during development.
 
 .. testcode::
 
-    x = at.dvector('x')
+    x = pt.dvector('x')
 
     f = pytensor.function([x], 10 * x, mode='DebugMode')
 
diff --git a/doc/tutorial/modes_solution_1.py b/doc/tutorial/modes_solution_1.py
index 66e23db01a..6868d2a4e8 100755
--- a/doc/tutorial/modes_solution_1.py
+++ b/doc/tutorial/modes_solution_1.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 
 
 pytensor.config.floatX = "float32"
@@ -20,8 +20,8 @@
 training_steps = 10000
 
 # Declare PyTensor symbolic variables
-x = at.matrix("x")
-y = at.vector("y")
+x = pt.matrix("x")
+y = pt.vector("y")
 w = pytensor.shared(rng.standard_normal(feats).astype(pytensor.config.floatX), name="w")
 b = pytensor.shared(np.asarray(0.0, dtype=pytensor.config.floatX), name="b")
 x.tag.test_value = D[0]
@@ -30,11 +30,11 @@
 # print w.get_value(), b.get_value()
 
 # Construct PyTensor expression graph
-p_1 = 1 / (1 + at.exp(-at.dot(x, w) - b))  # Probability of having a one
+p_1 = 1 / (1 + pt.exp(-pt.dot(x, w) - b))  # Probability of having a one
 prediction = p_1 > 0.5  # The prediction that is done: 0 or 1
-xent = -y * at.log(p_1) - (1 - y) * at.log(1 - p_1)  # Cross-entropy
-cost = at.cast(xent.mean(), "float32") + 0.01 * (w**2).sum()  # The cost to optimize
-gw, gb = at.grad(cost, [w, b])
+xent = -y * pt.log(p_1) - (1 - y) * pt.log(1 - p_1)  # Cross-entropy
+cost = pt.cast(xent.mean(), "float32") + 0.01 * (w**2).sum()  # The cost to optimize
+gw, gb = pt.grad(cost, [w, b])
 
 # Compile expressions to functions
 train = pytensor.function(
diff --git a/doc/tutorial/printing_drawing.rst b/doc/tutorial/printing_drawing.rst
index 23236cc45f..93e77f25f1 100644
--- a/doc/tutorial/printing_drawing.rst
+++ b/doc/tutorial/printing_drawing.rst
@@ -26,7 +26,7 @@ Consider again the logistic regression example:
 
 >>> import numpy as np
 >>> import pytensor
->>> import pytensor.tensor as at
+>>> import pytensor.tensor as pt
 >>> rng = np.random.default_rng(2382)
 >>> # Training data
 >>> N = 400
@@ -34,19 +34,19 @@ Consider again the logistic regression example:
 >>> D = (rng.standard_normal(N, feats).astype(pytensor.config.floatX), rng.integers(size=N,low=0, high=2).astype(pytensor.config.floatX))
 >>> training_steps = 10000
 >>> # Declare PyTensor symbolic variables
->>> x = at.matrix("x")
->>> y = at.vector("y")
+>>> x = pt.matrix("x")
+>>> y = pt.vector("y")
 >>> w = pytensor.shared(rng.standard_normal(feats).astype(pytensor.config.floatX), name="w")
 >>> b = pytensor.shared(np.asarray(0., dtype=pytensor.config.floatX), name="b")
 >>> x.tag.test_value = D[0]
 >>> y.tag.test_value = D[1]
 >>> # Construct PyTensor expression graph
->>> p_1 = 1 / (1 + at.exp(-at.dot(x, w)-b)) # Probability of having a one
+>>> p_1 = 1 / (1 + pt.exp(-pt.dot(x, w)-b)) # Probability of having a one
 >>> prediction = p_1 > 0.5 # The prediction that is done: 0 or 1
 >>> # Compute gradients
->>> xent = -y*at.log(p_1) - (1-y)*at.log(1-p_1) # Cross-entropy
+>>> xent = -y*pt.log(p_1) - (1-y)*pt.log(1-p_1) # Cross-entropy
 >>> cost = xent.mean() + 0.01*(w**2).sum() # The cost to optimize
->>> gw,gb = at.grad(cost, [w,b])
+>>> gw,gb = pt.grad(cost, [w,b])
 >>> # Training and prediction function
 >>> train = pytensor.function(inputs=[x,y], outputs=[prediction, xent], updates=[[w, w-0.01*gw], [b, b-0.01*gb]], name = "train")
 >>> predict = pytensor.function(inputs=[x], outputs=prediction, name = "predict")
diff --git a/pytensor/breakpoint.py b/pytensor/breakpoint.py
index 783f9d57de..f81711f39b 100644
--- a/pytensor/breakpoint.py
+++ b/pytensor/breakpoint.py
@@ -36,11 +36,11 @@ class PdbBreakpoint(Op):
     .. code-block:: python
 
         import pytensor
-        import pytensor.tensor as at
+        import pytensor.tensor as pt
         from pytensor.breakpoint import PdbBreakpoint
 
-        input = at.fvector()
-        target = at.fvector()
+        input = pt.fvector()
+        target = pt.fvector()
 
         # Mean squared error between input and target
         mse = (input - target) ** 2
@@ -49,7 +49,7 @@ class PdbBreakpoint(Op):
         # than 100. The breakpoint will monitor the inputs, targets as well
         # as the individual error values
         breakpointOp = PdbBreakpoint("MSE too high")
-        condition = at.gt(mse.sum(), 100)
+        condition = pt.gt(mse.sum(), 100)
         mse, monitored_input, monitored_target = breakpointOp(condition, mse,
                                                               input, target)
 
diff --git a/pytensor/compile/builders.py b/pytensor/compile/builders.py
index e02df51d5a..19e0146f8b 100644
--- a/pytensor/compile/builders.py
+++ b/pytensor/compile/builders.py
@@ -5,7 +5,7 @@
 from functools import partial
 from typing import Optional, cast
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function
 from pytensor.compile.function.pfunc import rebuild_collect_shared
 from pytensor.compile.mode import optdb
@@ -205,9 +205,9 @@ class OpFromGraph(Op, HasInnerGraph):
 
     .. code-block:: python
 
-        from pytensor import function, tensor as at
+        from pytensor import function, tensor as pt
         from pytensor.compile.builders import OpFromGraph
-        x, y, z = at.scalars('xyz')
+        x, y, z = pt.scalars('xyz')
         e = x + y * z
         op = OpFromGraph([x, y, z], [e])
         # op behaves like a normal pytensor op
@@ -220,10 +220,10 @@ class OpFromGraph(Op, HasInnerGraph):
 
         import numpy as np
         import pytensor
-        from pytensor import config, function, tensor as at
+        from pytensor import config, function, tensor as pt
         from pytensor.compile.builders import OpFromGraph
 
-        x, y, z = at.scalars('xyz')
+        x, y, z = pt.scalars('xyz')
         s = pytensor.shared(np.random.random((2, 2)).astype(config.floatX))
         e = x + y * z + s
         op = OpFromGraph([x, y, z], [e])
@@ -235,10 +235,10 @@ class OpFromGraph(Op, HasInnerGraph):
 
     .. code-block:: python
 
-        from pytensor import function, tensor as at, grad
+        from pytensor import function, tensor as pt, grad
         from pytensor.compile.builders import OpFromGraph
 
-        x, y, z = at.scalars('xyz')
+        x, y, z = pt.scalars('xyz')
         e = x + y * z
         def rescale_dy(inps, grads):
             x, y, z = inps
@@ -289,7 +289,7 @@ def _filter_grad_var(grad, inp):
             if hasattr(inp, "zeros_like"):
                 return inp.zeros_like(), grad
             else:
-                return at.constant(0.0), grad
+                return pt.constant(0.0), grad
         else:
             return grad, None
 
diff --git a/pytensor/compile/profiling.py b/pytensor/compile/profiling.py
index 78a1a091d0..986f6dc108 100644
--- a/pytensor/compile/profiling.py
+++ b/pytensor/compile/profiling.py
@@ -1485,65 +1485,65 @@ def print_tips(self, file):
             file=file,
         )
 
-        from pytensor import scalar as aes
+        from pytensor import scalar as ps
         from pytensor.tensor.elemwise import Elemwise
         from pytensor.tensor.math import Dot
 
         scalar_op_amdlibm_no_speed_up = [
-            aes.LT,
-            aes.GT,
-            aes.LE,
-            aes.GE,
-            aes.EQ,
-            aes.NEQ,
-            aes.InRange,
-            aes.Switch,
-            aes.OR,
-            aes.XOR,
-            aes.AND,
-            aes.Invert,
-            aes.ScalarMaximum,
-            aes.ScalarMinimum,
-            aes.Add,
-            aes.Mul,
-            aes.Sub,
-            aes.TrueDiv,
-            aes.IntDiv,
-            aes.Clip,
-            aes.Second,
-            aes.Identity,
-            aes.Cast,
-            aes.Sign,
-            aes.Neg,
-            aes.Reciprocal,
-            aes.Sqr,
+            ps.LT,
+            ps.GT,
+            ps.LE,
+            ps.GE,
+            ps.EQ,
+            ps.NEQ,
+            ps.InRange,
+            ps.Switch,
+            ps.OR,
+            ps.XOR,
+            ps.AND,
+            ps.Invert,
+            ps.ScalarMaximum,
+            ps.ScalarMinimum,
+            ps.Add,
+            ps.Mul,
+            ps.Sub,
+            ps.TrueDiv,
+            ps.IntDiv,
+            ps.Clip,
+            ps.Second,
+            ps.Identity,
+            ps.Cast,
+            ps.Sign,
+            ps.Neg,
+            ps.Reciprocal,
+            ps.Sqr,
         ]
         scalar_op_amdlibm_speed_up = [
-            aes.Mod,
-            aes.Pow,
-            aes.Ceil,
-            aes.Floor,
-            aes.RoundHalfToEven,
-            aes.RoundHalfAwayFromZero,
-            aes.Log,
-            aes.Log2,
-            aes.Log10,
-            aes.Log1p,
-            aes.Exp,
-            aes.Sqrt,
-            aes.Abs,
-            aes.Cos,
-            aes.Sin,
-            aes.Tan,
-            aes.Tanh,
-            aes.Cosh,
-            aes.Sinh,
-            aes.Sigmoid,
-            aes.Softplus,
+            ps.Mod,
+            ps.Pow,
+            ps.Ceil,
+            ps.Floor,
+            ps.RoundHalfToEven,
+            ps.RoundHalfAwayFromZero,
+            ps.Log,
+            ps.Log2,
+            ps.Log10,
+            ps.Log1p,
+            ps.Exp,
+            ps.Sqrt,
+            ps.Abs,
+            ps.Cos,
+            ps.Sin,
+            ps.Tan,
+            ps.Tanh,
+            ps.Cosh,
+            ps.Sinh,
+            ps.Sigmoid,
+            ps.Softplus,
         ]
 
         def get_scalar_ops(s):
-            if isinstance(s, aes.Composite):
+            if isinstance(s, ps.Composite):
                 l = []
                 for node in s.fgraph.toposort():
                     l += get_scalar_ops(node.op)
@@ -1552,7 +1552,7 @@ def get_scalar_ops(s):
                 return [s]
 
         def list_scalar_op(op):
-            if isinstance(op.scalar_op, aes.Composite):
+            if isinstance(op.scalar_op, ps.Composite):
                 return get_scalar_ops(op.scalar_op)
             else:
                 return [op.scalar_op]
@@ -1579,7 +1579,7 @@ def exp_float32_op(op):
                 return False
             else:
                 l = list_scalar_op(op)
-                return any(s_op.__class__ in [aes.Exp] for s_op in l)
+                return any(s_op.__class__ in [ps.Exp] for s_op in l)
 
         printed_tip = False
         # tip 1
diff --git a/pytensor/graph/basic.py b/pytensor/graph/basic.py
index 9b1399b72f..037ec0ca9a 100644
--- a/pytensor/graph/basic.py
+++ b/pytensor/graph/basic.py
@@ -383,10 +383,10 @@ class Variable(Node, Generic[_TypeType, OptionalApplyType]):
     .. code-block:: python
 
         import pytensor
-        import pytensor.tensor as at
+        import pytensor.tensor as pt
 
-        a = at.constant(1.5)            # declare a symbolic constant
-        b = at.fscalar()                # declare a symbolic floating-point scalar
+        a = pt.constant(1.5)            # declare a symbolic constant
+        b = pt.fscalar()                # declare a symbolic floating-point scalar
 
         c = a + b                       # create a simple expression
 
@@ -565,9 +565,9 @@ def eval(self, inputs_to_values=None):
         --------
 
         >>> import numpy as np
-        >>> import pytensor.tensor as at
-        >>> x = at.dscalar('x')
-        >>> y = at.dscalar('y')
+        >>> import pytensor.tensor as pt
+        >>> x = pt.dscalar('x')
+        >>> y = pt.dscalar('y')
         >>> z = x + y
         >>> np.allclose(z.eval({x : 16.3, y : 12.1}), 28.4)
         True
diff --git a/pytensor/graph/rewriting/kanren.py b/pytensor/graph/rewriting/kanren.py
index 7c9e123cd4..1ac1b57a56 100644
--- a/pytensor/graph/rewriting/kanren.py
+++ b/pytensor/graph/rewriting/kanren.py
@@ -24,15 +24,15 @@ class KanrenRelationSub(NodeRewriter):
 
         from kanren import eq, conso, var
 
-        import pytensor.tensor as at
+        import pytensor.tensor as pt
         from pytensor.graph.rewriting.kanren import KanrenRelationSub
 
 
         def relation(in_lv, out_lv):
-            # A `kanren` goal that changes `at.log` terms to `at.exp`
+            # A `kanren` goal that changes `pt.log` terms to `pt.exp`
             cdr_lv = var()
-            return eq(conso(at.log, cdr_lv, in_lv),
-                    conso(at.exp, cdr_lv, out_lv))
+            return eq(conso(pt.log, cdr_lv, in_lv),
+                    conso(pt.exp, cdr_lv, out_lv))
 
 
         kanren_sub_opt = KanrenRelationSub(relation)
diff --git a/pytensor/ifelse.py b/pytensor/ifelse.py
index 343c1e1b63..7858f51eba 100644
--- a/pytensor/ifelse.py
+++ b/pytensor/ifelse.py
@@ -17,7 +17,7 @@
 
 import numpy as np
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import as_symbolic
 from pytensor.compile import optdb
 from pytensor.configdefaults import config
@@ -159,7 +159,7 @@ def make_node(self, condition: "TensorLike", *true_false_branches: Any):
                 f"{int(2 * self.n_outs)}, got {len(true_false_branches)}"
             )
 
-        condition = at.basic.as_tensor_variable(condition)
+        condition = pt.basic.as_tensor_variable(condition)
 
         if condition.type.ndim > 0:
             raise TypeError("The condition argument must be a truthy scalar value")
@@ -185,7 +185,7 @@ def make_node(self, condition: "TensorLike", *true_false_branches: Any):
                 input_f.type, HasDataType
             ):
                 # TODO: Be smarter about dtype casting.
-                # up_dtype = aes.upcast(input_t.type.dtype, input_f.type.dtype)
+                # up_dtype = ps.upcast(input_t.type.dtype, input_f.type.dtype)
 
                 if input_t.type.dtype != input_f.type.dtype:
                     raise TypeError(
@@ -257,14 +257,14 @@ def grad(self, ins, grads):
             [condition]
             + grads
             + [
-                at.basic.zeros_like(t, dtype=grads[i].dtype)
+                pt.basic.zeros_like(t, dtype=grads[i].dtype)
                 for i, t in enumerate(inputs_true_branch)
             ]
         )
         inputs_false_grad = (
             [condition]
             + [
-                at.basic.zeros_like(f, dtype=grads[i].dtype)
+                pt.basic.zeros_like(f, dtype=grads[i].dtype)
                 for i, f in enumerate(inputs_false_branch)
             ]
             + grads
@@ -476,14 +476,14 @@ def cond_make_inplace(fgraph, node):
     SpecifyShape,
     Reshape,
     Unbroadcast,
-    at.math.Dot,
-    at.math.MaxAndArgmax,
-    at.subtensor.Subtensor,
-    at.subtensor.IncSubtensor,
-    at.basic.Alloc,
-    at.elemwise.Elemwise,
-    at.elemwise.DimShuffle,
-    at.blockwise.Blockwise,
+    pt.math.Dot,
+    pt.math.MaxAndArgmax,
+    pt.subtensor.Subtensor,
+    pt.subtensor.IncSubtensor,
+    pt.basic.Alloc,
+    pt.elemwise.Elemwise,
+    pt.elemwise.DimShuffle,
+    pt.blockwise.Blockwise,
 )
 
 
diff --git a/pytensor/link/jax/dispatch/random.py b/pytensor/link/jax/dispatch/random.py
index 05d8957b6b..1afe6610ff 100644
--- a/pytensor/link/jax/dispatch/random.py
+++ b/pytensor/link/jax/dispatch/random.py
@@ -7,7 +7,7 @@
     _coerce_to_uint32_array,
 )
 
-import pytensor.tensor.random.basic as aer
+import pytensor.tensor.random.basic as ptr
 from pytensor.link.jax.dispatch.basic import jax_funcify, jax_typify
 from pytensor.link.jax.dispatch.shape import JAXShapeTuple
 from pytensor.tensor.shape import Shape, Shape_i
@@ -26,13 +26,13 @@
 SIZE_NOT_COMPATIBLE = """JAX random variables require concrete values for the `size` parameter of the distributions.
 Concrete values are either constants:
 
->>> import pytensor.tensor as at
->>> x_rv = at.random.normal(0, 1, size=(3, 2))
+>>> import pytensor.tensor as pt
+>>> x_rv = pt.random.normal(0, 1, size=(3, 2))
 
 or the shape of an array:
 
->>> m = at.matrix()
->>> x_rv = at.random.normal(0, 1, size=m.shape)
+>>> m = pt.matrix()
+>>> x_rv = pt.random.normal(0, 1, size=m.shape)
 """
 
 
@@ -86,7 +86,7 @@ def jax_typify_Generator(rng, **kwargs):
     return state
 
 
-@jax_funcify.register(aer.RandomVariable)
+@jax_funcify.register(ptr.RandomVariable)
 def jax_funcify_RandomVariable(op, node, **kwargs):
     """JAX implementation of random variables."""
     rv = node.outputs[1]
@@ -121,10 +121,10 @@ def jax_sample_fn(op):
     )
 
 
-@jax_sample_fn.register(aer.BetaRV)
-@jax_sample_fn.register(aer.DirichletRV)
-@jax_sample_fn.register(aer.PoissonRV)
-@jax_sample_fn.register(aer.MvNormalRV)
+@jax_sample_fn.register(ptr.BetaRV)
+@jax_sample_fn.register(ptr.DirichletRV)
+@jax_sample_fn.register(ptr.PoissonRV)
+@jax_sample_fn.register(ptr.MvNormalRV)
 def jax_sample_fn_generic(op):
     """Generic JAX implementation of random variables."""
     name = op.name
@@ -140,12 +140,12 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.CauchyRV)
-@jax_sample_fn.register(aer.GumbelRV)
-@jax_sample_fn.register(aer.LaplaceRV)
-@jax_sample_fn.register(aer.LogisticRV)
-@jax_sample_fn.register(aer.NormalRV)
-@jax_sample_fn.register(aer.StandardNormalRV)
+@jax_sample_fn.register(ptr.CauchyRV)
+@jax_sample_fn.register(ptr.GumbelRV)
+@jax_sample_fn.register(ptr.LaplaceRV)
+@jax_sample_fn.register(ptr.LogisticRV)
+@jax_sample_fn.register(ptr.NormalRV)
+@jax_sample_fn.register(ptr.StandardNormalRV)
 def jax_sample_fn_loc_scale(op):
     """JAX implementation of random variables in the loc-scale families.
 
@@ -168,8 +168,8 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.BernoulliRV)
-@jax_sample_fn.register(aer.CategoricalRV)
+@jax_sample_fn.register(ptr.BernoulliRV)
+@jax_sample_fn.register(ptr.CategoricalRV)
 def jax_sample_fn_no_dtype(op):
     """Generic JAX implementation of random variables."""
     name = op.name
@@ -185,9 +185,9 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.RandIntRV)
-@jax_sample_fn.register(aer.IntegersRV)
-@jax_sample_fn.register(aer.UniformRV)
+@jax_sample_fn.register(ptr.RandIntRV)
+@jax_sample_fn.register(ptr.IntegersRV)
+@jax_sample_fn.register(ptr.UniformRV)
 def jax_sample_fn_uniform(op):
     """JAX implementation of random variables with uniform density.
 
@@ -197,7 +197,7 @@ def jax_sample_fn_uniform(op):
     """
     name = op.name
     # IntegersRV is equivalent to RandintRV
-    if isinstance(op, aer.IntegersRV):
+    if isinstance(op, ptr.IntegersRV):
         name = "randint"
     jax_op = getattr(jax.random, name)
 
@@ -214,8 +214,8 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.ParetoRV)
-@jax_sample_fn.register(aer.GammaRV)
+@jax_sample_fn.register(ptr.ParetoRV)
+@jax_sample_fn.register(ptr.GammaRV)
 def jax_sample_fn_shape_scale(op):
     """JAX implementation of random variables in the shape-scale family.
 
@@ -236,7 +236,7 @@ def sample_fn(rng, size, dtype, shape, scale):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.ExponentialRV)
+@jax_sample_fn.register(ptr.ExponentialRV)
 def jax_sample_fn_exponential(op):
     """JAX implementation of `ExponentialRV`."""
 
@@ -251,7 +251,7 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.StudentTRV)
+@jax_sample_fn.register(ptr.StudentTRV)
 def jax_sample_fn_t(op):
     """JAX implementation of `StudentTRV`."""
 
@@ -270,7 +270,7 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.ChoiceRV)
+@jax_sample_fn.register(ptr.ChoiceRV)
 def jax_funcify_choice(op):
     """JAX implementation of `ChoiceRV`."""
 
@@ -285,7 +285,7 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.PermutationRV)
+@jax_sample_fn.register(ptr.PermutationRV)
 def jax_sample_fn_permutation(op):
     """JAX implementation of `PermutationRV`."""
 
@@ -300,7 +300,7 @@ def sample_fn(rng, size, dtype, *parameters):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.BinomialRV)
+@jax_sample_fn.register(ptr.BinomialRV)
 def jax_sample_fn_binomial(op):
     if not numpyro_available:
         raise NotImplementedError(
@@ -323,7 +323,7 @@ def sample_fn(rng, size, dtype, n, p):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.MultinomialRV)
+@jax_sample_fn.register(ptr.MultinomialRV)
 def jax_sample_fn_multinomial(op):
     if not numpyro_available:
         raise NotImplementedError(
@@ -346,7 +346,7 @@ def sample_fn(rng, size, dtype, n, p):
     return sample_fn
 
 
-@jax_sample_fn.register(aer.VonMisesRV)
+@jax_sample_fn.register(ptr.VonMisesRV)
 def jax_sample_fn_vonmises(op):
     if not numpyro_available:
         raise NotImplementedError(
diff --git a/pytensor/link/jax/dispatch/shape.py b/pytensor/link/jax/dispatch/shape.py
index 7f8b44608d..7dd1ea3323 100644
--- a/pytensor/link/jax/dispatch/shape.py
+++ b/pytensor/link/jax/dispatch/shape.py
@@ -31,13 +31,13 @@ def shape_tuple_fn(*x):
 SHAPE_NOT_COMPATIBLE = """JAX requires concrete values for the `shape` parameter of `jax.numpy.reshape`.
 Concrete values are either constants:
 
->>> import pytensor.tensor as at
->>> x = at.ones(6)
+>>> import pytensor.tensor as pt
+>>> x = pt.ones(6)
 >>> y = x.reshape((2, 3))
 
 Or the shape of an array:
 
->>> mat = at.matrix('mat')
+>>> mat = pt.matrix('mat')
 >>> y = x.reshape(mat.shape)
 """
 
diff --git a/pytensor/link/jax/dispatch/subtensor.py b/pytensor/link/jax/dispatch/subtensor.py
index d20dee4dfc..f3dfd5768c 100644
--- a/pytensor/link/jax/dispatch/subtensor.py
+++ b/pytensor/link/jax/dispatch/subtensor.py
@@ -15,15 +15,15 @@
 masks. In some cases, however, it is possible to re-express your model
 in a form that JAX can compile:
 
->>> import pytensor.tensor as at
->>> x_at = at.vector('x')
->>> y_at = x_at[x_at > 0].sum()
+>>> import pytensor.tensor as pt
+>>> x_pt = pt.vector('x')
+>>> y_pt = x_pt[x_pt > 0].sum()
 
 can be re-expressed as:
 
->>> import pytensor.tensor as at
->>> x_at = at.vector('x')
->>> y_at = at.where(x_at > 0, x_at, 0).sum()
+>>> import pytensor.tensor as pt
+>>> x_pt = pt.vector('x')
+>>> y_pt = pt.where(x_pt > 0, x_pt, 0).sum()
 """
 
 DYNAMIC_SLICE_LENGTH_ERROR = """JAX does not support slicing arrays with a dynamic
diff --git a/pytensor/link/jax/dispatch/tensor_basic.py b/pytensor/link/jax/dispatch/tensor_basic.py
index 5acf5b1565..bf1a93ce5b 100644
--- a/pytensor/link/jax/dispatch/tensor_basic.py
+++ b/pytensor/link/jax/dispatch/tensor_basic.py
@@ -27,8 +27,8 @@
 ARANGE_CONCRETE_VALUE_ERROR = """JAX requires the arguments of `jax.numpy.arange`
 to be constants. The graph that you defined thus cannot be JIT-compiled
 by JAX. An example of a graph that can be compiled to JAX:
->>> import pytensor.tensor basic
->>> at.arange(1, 10, 2)
+>>> import pytensor.tensor as pt
+>>> pt.arange(1, 10, 2)
 """
 
 
diff --git a/pytensor/link/numba/dispatch/random.py b/pytensor/link/numba/dispatch/random.py
index 5cd657170c..39bb79ea56 100644
--- a/pytensor/link/numba/dispatch/random.py
+++ b/pytensor/link/numba/dispatch/random.py
@@ -8,7 +8,7 @@
 from numba.extending import NativeValue, box, models, register_model, typeof_impl, unbox
 from numpy.random import RandomState
 
-import pytensor.tensor.random.basic as aer
+import pytensor.tensor.random.basic as ptr
 from pytensor.graph.basic import Apply
 from pytensor.graph.op import Op
 from pytensor.link.numba.dispatch import basic as numba_basic
@@ -189,28 +189,28 @@ def {sized_fn_name}({random_fn_input_names}):
     return random_fn
 
 
-@numba_funcify.register(aer.UniformRV)
-@numba_funcify.register(aer.TriangularRV)
-@numba_funcify.register(aer.BetaRV)
-@numba_funcify.register(aer.NormalRV)
-@numba_funcify.register(aer.LogNormalRV)
-@numba_funcify.register(aer.GammaRV)
-@numba_funcify.register(aer.ParetoRV)
-@numba_funcify.register(aer.GumbelRV)
-@numba_funcify.register(aer.ExponentialRV)
-@numba_funcify.register(aer.WeibullRV)
-@numba_funcify.register(aer.LogisticRV)
-@numba_funcify.register(aer.VonMisesRV)
-@numba_funcify.register(aer.PoissonRV)
-@numba_funcify.register(aer.GeometricRV)
-@numba_funcify.register(aer.HyperGeometricRV)
-@numba_funcify.register(aer.WaldRV)
-@numba_funcify.register(aer.LaplaceRV)
-@numba_funcify.register(aer.BinomialRV)
-@numba_funcify.register(aer.MultinomialRV)
-@numba_funcify.register(aer.RandIntRV)  # only the first two arguments are supported
-@numba_funcify.register(aer.ChoiceRV)  # the `p` argument is not supported
-@numba_funcify.register(aer.PermutationRV)
+@numba_funcify.register(ptr.UniformRV)
+@numba_funcify.register(ptr.TriangularRV)
+@numba_funcify.register(ptr.BetaRV)
+@numba_funcify.register(ptr.NormalRV)
+@numba_funcify.register(ptr.LogNormalRV)
+@numba_funcify.register(ptr.GammaRV)
+@numba_funcify.register(ptr.ParetoRV)
+@numba_funcify.register(ptr.GumbelRV)
+@numba_funcify.register(ptr.ExponentialRV)
+@numba_funcify.register(ptr.WeibullRV)
+@numba_funcify.register(ptr.LogisticRV)
+@numba_funcify.register(ptr.VonMisesRV)
+@numba_funcify.register(ptr.PoissonRV)
+@numba_funcify.register(ptr.GeometricRV)
+@numba_funcify.register(ptr.HyperGeometricRV)
+@numba_funcify.register(ptr.WaldRV)
+@numba_funcify.register(ptr.LaplaceRV)
+@numba_funcify.register(ptr.BinomialRV)
+@numba_funcify.register(ptr.MultinomialRV)
+@numba_funcify.register(ptr.RandIntRV)  # only the first two arguments are supported
+@numba_funcify.register(ptr.ChoiceRV)  # the `p` argument is not supported
+@numba_funcify.register(ptr.PermutationRV)
 def numba_funcify_RandomVariable(op, node, **kwargs):
     name = op.name
     np_random_func = getattr(np.random, name)
@@ -266,12 +266,12 @@ def {np_random_fn_name}({np_input_names}):
     return make_numba_random_fn(node, np_random_fn)
 
 
-@numba_funcify.register(aer.NegBinomialRV)
+@numba_funcify.register(ptr.NegBinomialRV)
 def numba_funcify_NegBinomialRV(op, node, **kwargs):
     return make_numba_random_fn(node, np.random.negative_binomial)
 
 
-@numba_funcify.register(aer.CauchyRV)
+@numba_funcify.register(ptr.CauchyRV)
 def numba_funcify_CauchyRV(op, node, **kwargs):
     def body_fn(loc, scale):
         return f"    return ({loc} + np.random.standard_cauchy()) / {scale}"
@@ -279,7 +279,7 @@ def body_fn(loc, scale):
     return create_numba_random_fn(op, node, body_fn)
 
 
-@numba_funcify.register(aer.HalfNormalRV)
+@numba_funcify.register(ptr.HalfNormalRV)
 def numba_funcify_HalfNormalRV(op, node, **kwargs):
     def body_fn(a, b):
         return f"    return {a} + {b} * abs(np.random.normal(0, 1))"
@@ -287,7 +287,7 @@ def body_fn(a, b):
     return create_numba_random_fn(op, node, body_fn)
 
 
-@numba_funcify.register(aer.BernoulliRV)
+@numba_funcify.register(ptr.BernoulliRV)
 def numba_funcify_BernoulliRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
 
@@ -307,7 +307,7 @@ def body_fn(a):
     )
 
 
-@numba_funcify.register(aer.CategoricalRV)
+@numba_funcify.register(ptr.CategoricalRV)
 def numba_funcify_CategoricalRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
     size_len = int(get_vector_length(node.inputs[1]))
@@ -336,7 +336,7 @@ def categorical_rv(rng, size, dtype, p):
     return categorical_rv
 
 
-@numba_funcify.register(aer.DirichletRV)
+@numba_funcify.register(ptr.DirichletRV)
 def numba_funcify_DirichletRV(op, node, **kwargs):
     out_dtype = node.outputs[1].type.numpy_dtype
     alphas_ndim = node.inputs[3].type.ndim
diff --git a/pytensor/raise_op.py b/pytensor/raise_op.py
index 7325532b9c..25e1aebf52 100644
--- a/pytensor/raise_op.py
+++ b/pytensor/raise_op.py
@@ -72,13 +72,13 @@ def make_node(self, value: Variable, *conds: Variable):
         conds
             The conditions to evaluate.
         """
-        import pytensor.tensor as at
+        import pytensor.tensor as pt
 
         if not isinstance(value, Variable):
-            value = at.as_tensor_variable(value)
+            value = pt.as_tensor_variable(value)
 
         conds = [
-            at.as_tensor_variable(c) if not isinstance(c, Variable) else c
+            pt.as_tensor_variable(c) if not isinstance(c, Variable) else c
             for c in conds
         ]
 
@@ -182,9 +182,9 @@ class Assert(CheckAndRaise):
     Examples
     --------
     >>> import pytensor
-    >>> import pytensor.tensor as at
+    >>> import pytensor.tensor as pt
     >>> from pytensor.raise_op import Assert
-    >>> x = at.vector("x")
+    >>> x = pt.vector("x")
     >>> assert_op = Assert("This assert failed")
     >>> func = pytensor.function([x], assert_op(x, x.size < 2))
 
diff --git a/pytensor/scan/basic.py b/pytensor/scan/basic.py
index ddb4983d4c..93f8b98d37 100644
--- a/pytensor/scan/basic.py
+++ b/pytensor/scan/basic.py
@@ -2,7 +2,7 @@
 
 import numpy as np
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.function.pfunc import construct_pfunc_ins_and_outs
 from pytensor.compile.sharedvalue import SharedVariable, collect_new_shareds
 from pytensor.configdefaults import config
@@ -238,13 +238,13 @@ def scan(
 
         .. code-block:: python
 
-            import pytensor.tensor as at
+            import pytensor.tensor as pt
 
-            W   = at.matrix()
+            W   = pt.matrix()
             W_2 = W**2
 
             def f(x):
-                return at.dot(x,W_2)
+                return pt.dot(x,W_2)
 
         The function `fn` is expected to return two things. One is a list of
         outputs ordered in the same order as `outputs_info`, with the
@@ -462,7 +462,7 @@ def wrap_into_list(x):
     non_seqs = []
     for elem in wrap_into_list(non_sequences):
         if not isinstance(elem, Variable):
-            non_seqs.append(at.as_tensor_variable(elem))
+            non_seqs.append(pt.as_tensor_variable(elem))
         else:
             non_seqs.append(elem)
 
@@ -476,7 +476,7 @@ def wrap_into_list(x):
         n_fixed_steps = int(n_steps)
     else:
         try:
-            n_fixed_steps = at.get_underlying_scalar_constant_value(n_steps)
+            n_fixed_steps = pt.get_underlying_scalar_constant_value(n_steps)
         except NotScalarConstantError:
             n_fixed_steps = None
 
@@ -592,7 +592,7 @@ def wrap_into_list(x):
                 # If not we need to use copies, that will be replaced at
                 # each frame by the corresponding slice
                 actual_slice = seq["input"][k - mintap_proxy]
-                _seq_val = at.as_tensor_variable(seq["input"])
+                _seq_val = pt.as_tensor_variable(seq["input"])
                 _seq_val_slice = _seq_val[k - mintap_proxy]
                 nw_slice = _seq_val_slice.type()
 
@@ -652,7 +652,7 @@ def wrap_into_list(x):
 
     if not isNaN_or_Inf_or_None(n_steps):
         # ^ N_steps should also be considered
-        lengths_vec.append(at.as_tensor(n_steps))
+        lengths_vec.append(pt.as_tensor(n_steps))
 
     if len(lengths_vec) == 0:
         # ^ No information about the number of steps
@@ -670,7 +670,7 @@ def wrap_into_list(x):
         for contestant in lengths_vec[1:]:
             actual_n_steps = minimum(actual_n_steps, contestant)
     else:
-        actual_n_steps = at.as_tensor(n_steps)
+        actual_n_steps = pt.as_tensor(n_steps)
 
     scan_seqs = [seq[:actual_n_steps] for seq in scan_seqs]
     # Conventions :
@@ -716,7 +716,7 @@ def wrap_into_list(x):
         if init_out.get("taps", None) == [-1]:
             actual_arg = init_out["initial"]
             if not isinstance(actual_arg, Variable):
-                actual_arg = at.as_tensor_variable(actual_arg)
+                actual_arg = pt.as_tensor_variable(actual_arg)
             arg = safe_new(actual_arg)
             if isinstance(arg, Constant):
                 # safe new returns a clone of the constants, but that is not
@@ -774,7 +774,7 @@ def wrap_into_list(x):
             for k in init_out["taps"]:
                 # create a new slice
                 actual_nw_slice = init_out["initial"][k + mintap]
-                _init_out_var = at.as_tensor_variable(init_out["initial"])
+                _init_out_var = pt.as_tensor_variable(init_out["initial"])
                 _init_out_var_slice = _init_out_var[k + mintap]
                 nw_slice = _init_out_var_slice.type()
 
@@ -1003,7 +1003,7 @@ def wrap_into_list(x):
                     )
                 )
 
-                tensor_update = at.as_tensor_variable(input.update)
+                tensor_update = pt.as_tensor_variable(input.update)
                 sit_sot_inner_outputs.append(tensor_update)
                 # Note that `pos` is not a negative index. The sign of `pos` is used
                 # as a flag to indicate if this output should be part of the
@@ -1154,7 +1154,7 @@ def wrap_into_list(x):
     scan_inputs = []
     for arg in [actual_n_steps] + _scan_inputs:
         try:
-            arg = at.as_tensor_variable(arg)
+            arg = pt.as_tensor_variable(arg)
         except TypeError:
             # This happens for Random States for e.g. but it is a good way
             # to make sure all inputs are tensors.
diff --git a/pytensor/scan/checkpoints.py b/pytensor/scan/checkpoints.py
index 14cffc2b51..36dc2af1fe 100644
--- a/pytensor/scan/checkpoints.py
+++ b/pytensor/scan/checkpoints.py
@@ -1,4 +1,4 @@
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor.scan.basic import scan
 from pytensor.tensor.basic import Join
 from pytensor.tensor.math import ceil, eq
@@ -117,12 +117,12 @@ def scan_checkpoints(
         n_steps = sequences[0].shape[0]
 
     # Compute the number of steps of the outer scan
-    o_n_steps = at.cast(ceil(n_steps / save_every_N), "int64")
+    o_n_steps = ptb.cast(ceil(n_steps / save_every_N), "int64")
 
     # Compute the number of steps of the inner scan
-    i_n_steps = save_every_N * at.ones((o_n_steps,), "int64")
+    i_n_steps = save_every_N * ptb.ones((o_n_steps,), "int64")
     mod = n_steps % save_every_N
-    last_n_steps = at.switch(eq(mod, 0), save_every_N, mod)
+    last_n_steps = ptb.switch(eq(mod, 0), save_every_N, mod)
     i_n_steps = set_subtensor(i_n_steps[-1], last_n_steps)
 
     # Pad the sequences if needed
@@ -131,7 +131,7 @@ def scan_checkpoints(
         join = Join(view=0)
         for i, s in enumerate(sequences):
             n = s.shape[0] % save_every_N
-            z = at.zeros((n, s.shape[1:]), dtype=s.dtype)
+            z = ptb.zeros((n, s.shape[1:]), dtype=s.dtype)
             sequences[i] = join(0, [s, z])
 
     # Establish the input variables of the outer scan
diff --git a/pytensor/scan/op.py b/pytensor/scan/op.py
index d650ba93b5..b7cc7fa276 100644
--- a/pytensor/scan/op.py
+++ b/pytensor/scan/op.py
@@ -55,7 +55,7 @@
 
 import pytensor
 import pytensor.link.utils as link_utils
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.compile.builders import construct_nominal_fgraph, infer_shape
 from pytensor.compile.function.pfunc import pfunc
 from pytensor.compile.io import In, Out
@@ -2568,7 +2568,7 @@ def compute_all_gradients(known_grads):
         # mask inputs that get no gradients
         for dx in range(len(dC_dinps_t)):
             if not dC_dinps_t[dx]:
-                dC_dinps_t[dx] = at.zeros_like(diff_inputs[dx])
+                dC_dinps_t[dx] = pt.zeros_like(diff_inputs[dx])
             else:
                 disconnected_dC_dinps_t[dx] = False
                 for Xt, Xt_placeholder in zip(diff_outputs[info.n_mit_mot_outs :], Xts):
@@ -2696,7 +2696,7 @@ def compute_all_gradients(known_grads):
         for idx, taps in enumerate(info.mit_mot_in_slices):
             if isinstance(dC_douts[idx].type, DisconnectedType):
                 out = outs[idx]
-                outer_inp_mitmot.append(at.zeros_like(out))
+                outer_inp_mitmot.append(pt.zeros_like(out))
             else:
                 outer_inp_mitmot.append(dC_douts[idx][::-1])
             mitmot_inp_taps.append([])
@@ -2723,7 +2723,7 @@ def compute_all_gradients(known_grads):
                     # We cannot use Null in the inner graph, so we
                     # use a zero tensor of the appropriate shape instead.
                     inner_out_mitmot.append(
-                        at.zeros(diff_inputs[ins_pos].shape, dtype=config.floatX)
+                        pt.zeros(diff_inputs[ins_pos].shape, dtype=config.floatX)
                     )
                     undefined_msg = dC_dinps_t[ins_pos].type.why_null
                 else:
@@ -2792,7 +2792,7 @@ def compute_all_gradients(known_grads):
                     # We cannot use Null in the inner graph, so we
                     # use a zero tensor of the appropriate shape instead.
                     inner_out_mitmot.append(
-                        at.zeros(diff_inputs[ins_pos].shape, dtype=config.floatX)
+                        pt.zeros(diff_inputs[ins_pos].shape, dtype=config.floatX)
                     )
                     undefined_msg = dC_dinps_t[ins_pos].type.why_null
                 else:
@@ -2834,11 +2834,11 @@ def compute_all_gradients(known_grads):
                     # floatX instead, as it is a dummy value that will not
                     # be used anyway.
                     outer_inp_mitmot.append(
-                        at.zeros(outs[idx + offset].shape, dtype=config.floatX)
+                        pt.zeros(outs[idx + offset].shape, dtype=config.floatX)
                     )
                 else:
                     outer_inp_mitmot.append(
-                        at.zeros(
+                        pt.zeros(
                             outs[idx + offset].shape, dtype=dC_dinps_t[ins_pos].dtype
                         )
                     )
@@ -2847,7 +2847,7 @@ def compute_all_gradients(known_grads):
                 # We cannot use Null in the inner graph, so we
                 # use a zero tensor of the appropriate shape instead.
                 inner_out_mitmot.append(
-                    at.zeros(diff_inputs[ins_pos].shape, dtype=config.floatX)
+                    pt.zeros(diff_inputs[ins_pos].shape, dtype=config.floatX)
                 )
             else:
                 inner_out_mitmot.append(dC_dinps_t[ins_pos])
@@ -2887,7 +2887,7 @@ def compute_all_gradients(known_grads):
                 type_outs.append(vl.type.why_null)
                 # Replace the inner output with a zero tensor of
                 # the right shape
-                inner_out_sitsot[_p] = at.zeros(
+                inner_out_sitsot[_p] = pt.zeros(
                     diff_inputs[ins_pos + _p].shape, dtype=config.floatX
                 )
             elif through_shared:
@@ -2906,7 +2906,7 @@ def compute_all_gradients(known_grads):
                 type_outs.append(vl.type.why_null)
                 # Replace the inner output with a zero tensor of
                 # the right shape
-                inner_out_nitsot[_p] = at.zeros(
+                inner_out_nitsot[_p] = pt.zeros(
                     diff_inputs[_p].shape, dtype=config.floatX
                 )
 
@@ -2924,19 +2924,19 @@ def compute_all_gradients(known_grads):
             if isinstance(y.type, NullType):
                 # Cannot use dC_dXtm1s.dtype, so we use floatX instead.
                 outer_inp_sitsot.append(
-                    at.zeros(
+                    pt.zeros(
                         [grad_steps + 1] + [x.shape[i] for i in range(x.ndim)],
                         dtype=config.floatX,
                     )
                 )
                 # replace y by a zero tensor of the right shape
-                inner_inp_sitsot[_idx] = at.zeros(
+                inner_inp_sitsot[_idx] = pt.zeros(
                     diff_inputs[ins_pos + _idx].shape, dtype=config.floatX
                 )
 
             else:
                 outer_inp_sitsot.append(
-                    at.zeros(
+                    pt.zeros(
                         [grad_steps + 1] + [x.shape[i] for i in range(x.ndim)],
                         dtype=y.dtype,
                     )
@@ -3008,8 +3008,8 @@ def compute_all_gradients(known_grads):
                     shp = (n_zeros,)
                     if x.ndim > 1:
                         shp = shp + tuple(x.shape[i] for i in range(1, x.ndim))
-                    z = at.zeros(shp, dtype=x.dtype)
-                    x = at.concatenate([x[::-1], z], axis=0)
+                    z = pt.zeros(shp, dtype=x.dtype)
+                    x = pt.concatenate([x[::-1], z], axis=0)
                     gradients.append(x)
                 else:
                     gradients.append(x[::-1])
@@ -3036,8 +3036,8 @@ def compute_all_gradients(known_grads):
                     shp = (n_zeros,)
                     if x.ndim > 1:
                         shp = shp + tuple(x.shape[i] for i in range(1, x.ndim))
-                    z = at.zeros(shp, dtype=x.dtype)
-                    x = at.concatenate([x[::-1], z], axis=0)
+                    z = pt.zeros(shp, dtype=x.dtype)
+                    x = pt.concatenate([x[::-1], z], axis=0)
                     gradients.append(x)
                 else:
                     gradients.append(x[::-1])
diff --git a/pytensor/scan/rewriting.py b/pytensor/scan/rewriting.py
index b84bcf7bf7..80240f85e7 100644
--- a/pytensor/scan/rewriting.py
+++ b/pytensor/scan/rewriting.py
@@ -9,8 +9,8 @@
 import numpy as np
 
 import pytensor
-from pytensor import scalar as aes
-from pytensor import tensor as at
+from pytensor import scalar as ps
+from pytensor import tensor as pt
 from pytensor.compile import optdb
 from pytensor.compile.function.types import deep_copy_op
 from pytensor.configdefaults import config
@@ -391,7 +391,7 @@ def add_to_replace(y):
                 x = node.outputs[local_fgraph_outs_map[out]]
                 y = replace_with_out[idx]
                 y_shape = list(y.shape)
-                replace_with[x] = at.alloc(y, node.inputs[0], *y_shape)
+                replace_with[x] = pt.alloc(y, node.inputs[0], *y_shape)
 
         # We need to add one extra dimension to the outputs
         # because the scan op expects for a tensor3, to which an
@@ -668,7 +668,7 @@ def inner_sitsot_only_last_step_used(
         client = fgraph.clients[outer_var][0][0]
         if isinstance(client, Apply) and isinstance(client.op, Subtensor):
             lst = get_idx_list(client.inputs, client.op.idx_list)
-            if len(lst) == 1 and at.extract_constant(lst[0]) == -1:
+            if len(lst) == 1 and pt.extract_constant(lst[0]) == -1:
                 return True
 
     return False
@@ -851,7 +851,7 @@ def push_out_add_scan(fgraph, node):
     for nd in local_fgraph_topo:
         if (
             isinstance(nd.op, Elemwise)
-            and isinstance(nd.op.scalar_op, aes.Add)
+            and isinstance(nd.op.scalar_op, ps.Add)
             and nd.out in args.inner_out_sit_sot
             and inner_sitsot_only_last_step_used(fgraph, nd.out, args)
         ):
@@ -897,7 +897,7 @@ def push_out_add_scan(fgraph, node):
                     # so that they become matrices. This is because a
                     # dot is usually faster on two large matrices than
                     # a bunch of small ones
-                    outer_dot_inputs[0] = at.flatten(
+                    outer_dot_inputs[0] = pt.flatten(
                         outer_dot_inputs[0].dimshuffle(1, 0, 2), ndim=2
                     )
 
@@ -1114,7 +1114,7 @@ def sanitize(x):
     if x is None:
         return None
     else:
-        return at.as_tensor_variable(x)
+        return pt.as_tensor_variable(x)
 
 
 @node_rewriter([Scan])
@@ -1167,12 +1167,12 @@ def while_scan_merge_subtensor_last_element(fgraph, scan_node):
         if (
             len(slice1) == 1
             and isinstance(slice1[0], slice)
-            and isinstance(slice1[0].start, aes.ScalarConstant)
+            and isinstance(slice1[0].start, ps.ScalarConstant)
             and slice1[0].start.data == min_tap
             and slice1[0].stop is None
             and slice1[0].step is None
             and len(slice2) == 1
-            and isinstance(slice2[0], aes.ScalarConstant)
+            and isinstance(slice2[0], ps.ScalarConstant)
             and slice2[0].data == -1
         ):
             out = assert_non_zero_steps_op(x[-1], non_zero_steps_cond)
@@ -1342,10 +1342,10 @@ def save_mem_new_scan(fgraph, node):
                 if isinstance(this_slice[0], slice) and this_slice[0].stop is None:
                     global_nsteps = None
                 if isinstance(cf_slice[0], slice):
-                    stop = at.extract_constant(cf_slice[0].stop)
+                    stop = pt.extract_constant(cf_slice[0].stop)
                 else:
-                    stop = at.extract_constant(cf_slice[0]) + 1
-                if stop == maxsize or stop == at.extract_constant(length):
+                    stop = pt.extract_constant(cf_slice[0]) + 1
+                if stop == maxsize or stop == pt.extract_constant(length):
                     stop = None
                 else:
                     # there is a **gotcha** here ! Namely, scan returns an
@@ -1449,9 +1449,9 @@ def save_mem_new_scan(fgraph, node):
                     cf_slice = get_canonical_form_slice(this_slice[0], length)
 
                     if isinstance(cf_slice[0], slice):
-                        start = at.extract_constant(cf_slice[0].start)
+                        start = pt.extract_constant(cf_slice[0].start)
                     else:
-                        start = at.extract_constant(cf_slice[0])
+                        start = pt.extract_constant(cf_slice[0])
 
                 if start == 0 or store_steps[i] == 0:
                     store_steps[i] = 0
@@ -1534,13 +1534,13 @@ def save_mem_new_scan(fgraph, node):
                         )
                     ):
                         _nw_input = nw_inputs[offset + idx].owner.inputs[1]
-                        cval = at.as_tensor_variable(val)
-                        initl = at.as_tensor_variable(init_l[i])
-                        tmp_idx = at.switch(cval < initl, cval + initl, cval - initl)
+                        cval = pt.as_tensor_variable(val)
+                        initl = pt.as_tensor_variable(init_l[i])
+                        tmp_idx = pt.switch(cval < initl, cval + initl, cval - initl)
                         nw_input = expand_empty(_nw_input, tmp_idx)
                     else:
-                        tmp = at.as_tensor_variable(val)
-                        initl = at.as_tensor_variable(init_l[i])
+                        tmp = pt.as_tensor_variable(val)
+                        initl = pt.as_tensor_variable(init_l[i])
                         tmp = maximum(tmp, initl)
                         nw_input = nw_inputs[offset + idx][:tmp]
 
@@ -1628,7 +1628,7 @@ def save_mem_new_scan(fgraph, node):
         # 3.6 Compose the new scan
         # TODO: currently we don't support scan with 0 step. So
         # don't create one.
-        if at.extract_constant(node_ins[0]) == 0:
+        if pt.extract_constant(node_ins[0]) == 0:
             return False
 
         # Do not call make_node for test_value
@@ -2303,7 +2303,7 @@ def push_out_dot1_scan(fgraph, node):
         if (
             out.owner
             and isinstance(out.owner.op, Elemwise)
-            and isinstance(out.owner.op.scalar_op, aes.Add)
+            and isinstance(out.owner.op.scalar_op, ps.Add)
             and inp in out.owner.inputs
             and len(fgraph.clients[outer_out]) == 1
             and not isinstance(fgraph.clients[outer_out][0][0], str)
diff --git a/pytensor/scan/utils.py b/pytensor/scan/utils.py
index 2a75269434..e7d52e9c33 100644
--- a/pytensor/scan/utils.py
+++ b/pytensor/scan/utils.py
@@ -11,8 +11,8 @@
 
 import numpy as np
 
-from pytensor import scalar as aes
-from pytensor import tensor as at
+from pytensor import scalar as ps
+from pytensor import tensor as pt
 from pytensor.compile.profiling import ProfileStats
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Constant, Variable, equal_computations, graph_inputs
@@ -66,9 +66,9 @@ def safe_new(
     # Note, `as_tensor_variable` will convert the `ScalarType` into a
     # `TensorScalar` that will require a `ScalarFromTensor` `Op`, making the
     # push-out optimization fail
-    elif isinstance(x, aes.ScalarVariable):
+    elif isinstance(x, ps.ScalarVariable):
         if dtype:
-            nw_x = aes.get_scalar_type(dtype=dtype)()
+            nw_x = ps.get_scalar_type(dtype=dtype)()
         else:
             nw_x = x.type()
         nw_x.name = nw_name
@@ -84,7 +84,7 @@ def safe_new(
         return nw_x
     else:
         try:
-            x = at.as_tensor_variable(x)
+            x = pt.as_tensor_variable(x)
         except TypeError:
             # This could happen for example for random states
             pass
@@ -127,7 +127,7 @@ class until:
     """
 
     def __init__(self, condition):
-        self.condition = at.as_tensor_variable(condition)
+        self.condition = pt.as_tensor_variable(condition)
         assert self.condition.ndim == 0
 
 
diff --git a/pytensor/sparse/basic.py b/pytensor/sparse/basic.py
index b84cfb144e..363400416f 100644
--- a/pytensor/sparse/basic.py
+++ b/pytensor/sparse/basic.py
@@ -15,7 +15,7 @@
 
 import pytensor
 from pytensor import _as_symbolic, as_symbolic
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.configdefaults import config
 from pytensor.gradient import DisconnectedType, grad_not_implemented, grad_undefined
 from pytensor.graph.basic import Apply, Constant, Variable
@@ -25,14 +25,14 @@
 from pytensor.misc.safe_asarray import _asarray
 from pytensor.sparse.type import SparseTensorType, _is_sparse
 from pytensor.sparse.utils import hash_from_sparse
-from pytensor.tensor import basic as at
+from pytensor.tensor import basic as ptb
 from pytensor.tensor.basic import Split
 from pytensor.tensor.math import _conj
-from pytensor.tensor.math import add as at_add
+from pytensor.tensor.math import add as pt_add
 from pytensor.tensor.math import arcsin, arcsinh, arctan, arctanh, ceil, deg2rad
-from pytensor.tensor.math import dot as at_dot
+from pytensor.tensor.math import dot as pt_dot
 from pytensor.tensor.math import exp, expm1, floor, log, log1p, maximum, minimum
-from pytensor.tensor.math import pow as at_pow
+from pytensor.tensor.math import pow as pt_pow
 from pytensor.tensor.math import (
     rad2deg,
     round_half_to_even,
@@ -215,7 +215,7 @@ def sp_ones_like(x):
     """
     # TODO: don't restrict to CSM formats
     data, indices, indptr, _shape = csm_properties(x)
-    return CSM(format=x.format)(at.ones_like(data), indices, indptr, _shape)
+    return CSM(format=x.format)(ptb.ones_like(data), indices, indptr, _shape)
 
 
 def sp_zeros_like(x):
@@ -239,7 +239,7 @@ def sp_zeros_like(x):
     return CSM(format=x.format)(
         data=np.array([], dtype=x.type.dtype),
         indices=np.array([], dtype="int32"),
-        indptr=at.zeros_like(indptr),
+        indptr=ptb.zeros_like(indptr),
         shape=_shape,
     )
 
@@ -391,7 +391,7 @@ def toarray(self):
 
     @property
     def shape(self):
-        # TODO: The plan is that the ShapeFeature in at.opt will do shape
+        # TODO: The plan is that the ShapeFeature in ptb.opt will do shape
         # propagation and remove the dense_from_sparse from the graph.  This
         # will *NOT* actually expand your sparse matrix just to get the shape.
         return shape(dense_from_sparse(self))
@@ -689,7 +689,7 @@ def make_node(self, data, indices, indptr, shape):
             matrix to construct.
 
         """
-        data = at.as_tensor_variable(data)
+        data = ptb.as_tensor_variable(data)
 
         if not isinstance(indices, Variable):
             indices_ = np.asarray(indices)
@@ -707,9 +707,9 @@ def make_node(self, data, indices, indptr, shape):
             assert (shape_ == shape_32).all()
             shape = shape_32
 
-        indices = at.as_tensor_variable(indices)
-        indptr = at.as_tensor_variable(indptr)
-        shape = at.as_tensor_variable(shape)
+        indices = ptb.as_tensor_variable(indices)
+        indptr = ptb.as_tensor_variable(indptr)
+        shape = ptb.as_tensor_variable(shape)
 
         if data.type.ndim != 1:
             raise TypeError("data argument must be a vector", data.type, data.type.ndim)
@@ -1052,7 +1052,7 @@ def make_node(self, x):
             A dense matrix.
 
         """
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
         if x.ndim > 2:
             raise TypeError(
                 "PyTensor does not have sparse tensor types with more "
@@ -1114,7 +1114,7 @@ def make_node(self, x, index):
         x = as_sparse_variable(x)
         assert x.format in ("csr", "csc")
 
-        ind = at.as_tensor_variable(index)
+        ind = ptb.as_tensor_variable(index)
         assert ind.ndim == 1
         assert ind.dtype in integer_dtypes
 
@@ -1152,7 +1152,7 @@ def make_node(self, x, index, gz):
         assert x.format in ("csr", "csc")
         assert gz.format in ("csr", "csc")
 
-        ind = at.as_tensor_variable(index)
+        ind = ptb.as_tensor_variable(index)
         assert ind.ndim == 1
         assert ind.dtype in integer_dtypes
 
@@ -1201,8 +1201,8 @@ def make_node(self, x, ind1, ind2):
         """
         x = as_sparse_variable(x)
         assert x.format in ("csr", "csc")
-        ind1 = at.as_tensor_variable(ind1)
-        ind2 = at.as_tensor_variable(ind2)
+        ind1 = ptb.as_tensor_variable(ind1)
+        ind2 = ptb.as_tensor_variable(ind2)
         assert ind1.dtype in integer_dtypes
         assert ind2.dtype in integer_dtypes
 
@@ -1241,8 +1241,8 @@ def make_node(self, x, ind1, ind2, gz):
 
         assert x.format in ("csr", "csc")
 
-        ind1 = at.as_tensor_variable(ind1)
-        ind2 = at.as_tensor_variable(ind2)
+        ind1 = ptb.as_tensor_variable(ind1)
+        ind2 = ptb.as_tensor_variable(ind2)
         assert ind1.ndim == 1
         assert ind2.ndim == 1
         assert ind1.dtype in integer_dtypes
@@ -1332,7 +1332,7 @@ def make_node(self, x, index):
                     step = generic_None
                 else:
                     if not isinstance(step, Variable):
-                        step = at.as_tensor_variable(step)
+                        step = ptb.as_tensor_variable(step)
                     if not (step.ndim == 0 and step.dtype in tensor_discrete_dtypes):
                         raise ValueError(
                             (
@@ -1347,7 +1347,7 @@ def make_node(self, x, index):
                     start = generic_None
                 else:
                     if not isinstance(start, Variable):
-                        start = at.as_tensor_variable(start)
+                        start = ptb.as_tensor_variable(start)
                     if not (start.ndim == 0 and start.dtype in tensor_discrete_dtypes):
                         raise ValueError(
                             (
@@ -1362,7 +1362,7 @@ def make_node(self, x, index):
                     stop = generic_None
                 else:
                     if not isinstance(stop, Variable):
-                        stop = at.as_tensor_variable(stop)
+                        stop = ptb.as_tensor_variable(stop)
                     if not (stop.ndim == 0 and stop.dtype in tensor_discrete_dtypes):
                         raise ValueError(
                             (
@@ -1441,7 +1441,7 @@ def make_node(self, x, index):
 
             # in case of indexing using int instead of pytensor variable
             elif isinstance(ind, int):
-                ind = at.constant(ind)
+                ind = ptb.constant(ind)
                 input_op += [ind]
 
             # in case of indexing using pytensor variable
@@ -1777,9 +1777,9 @@ def grad(self, inputs, gout):
             if _is_sparse_variable(gz):
                 gz = dense_from_sparse(gz)
             if self.axis is None:
-                r = at.second(x, gz)
+                r = ptb.second(x, gz)
             else:
-                ones = at.ones_like(x)
+                ones = ptb.ones_like(x)
                 if self.axis == 0:
                     r = specify_broadcastable(gz.dimshuffle("x", 0), 0) * ones
                 elif self.axis == 1:
@@ -1903,7 +1903,7 @@ def make_node(self, diag):
             Dense vector for the diagonal.
 
         """
-        diag = at.as_tensor_variable(diag)
+        diag = ptb.as_tensor_variable(diag)
         if diag.type.ndim != 1:
             raise TypeError("data argument must be a vector", diag.type)
 
@@ -2026,7 +2026,7 @@ def make_node(self, x, y):
         x, y = map(as_sparse_variable, [x, y])
         assert x.format in ("csr", "csc")
         assert y.format in ("csr", "csc")
-        out_dtype = aes.upcast(x.type.dtype, y.type.dtype)
+        out_dtype = ps.upcast(x.type.dtype, y.type.dtype)
         return Apply(
             self, [x, y], [SparseTensorType(dtype=out_dtype, format=x.type.format)()]
         )
@@ -2117,9 +2117,9 @@ class AddSD(Op):
     __props__ = ()
 
     def make_node(self, x, y):
-        x, y = as_sparse_variable(x), at.as_tensor_variable(y)
+        x, y = as_sparse_variable(x), ptb.as_tensor_variable(y)
         assert x.format in ("csr", "csc")
-        out_dtype = aes.upcast(x.type.dtype, y.type.dtype)
+        out_dtype = ps.upcast(x.type.dtype, y.type.dtype)
 
         # The magic number two here arises because L{scipy.sparse}
         # objects must be matrices (have dimension 2)
@@ -2180,7 +2180,7 @@ def make_node(self, x, y):
         """
         x = as_sparse_variable(x)
         assert x.format in ("csr", "csc")
-        y = at.as_tensor_variable(y)
+        y = ptb.as_tensor_variable(y)
 
         assert y.type.ndim == 1
 
@@ -2244,9 +2244,9 @@ def add(x, y):
     if hasattr(y, "getnnz"):
         y = as_sparse_variable(y)
     if not isinstance(x, Variable):
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
     if not isinstance(y, Variable):
-        y = at.as_tensor_variable(y)
+        y = ptb.as_tensor_variable(y)
 
     x_is_sparse_variable = _is_sparse_variable(x)
     y_is_sparse_variable = _is_sparse_variable(y)
@@ -2301,7 +2301,7 @@ def make_node(self, x, y):
         x, y = as_sparse_variable(x), as_sparse_variable(y)
         assert x.format in ("csr", "csc")
         assert y.format in ("csr", "csc")
-        out_dtype = aes.upcast(x.type.dtype, y.type.dtype)
+        out_dtype = ps.upcast(x.type.dtype, y.type.dtype)
         return Apply(
             self, [x, y], [SparseTensorType(dtype=out_dtype, format=x.type.format)()]
         )
@@ -2334,12 +2334,12 @@ class MulSD(Op):
     __props__ = ()
 
     def make_node(self, x, y):
-        x, y = as_sparse_variable(x), at.as_tensor_variable(y)
+        x, y = as_sparse_variable(x), ptb.as_tensor_variable(y)
 
         assert x.format in ("csr", "csc")
 
         # upcast the tensor. Is the cast of sparse done implemented?
-        dtype = aes.upcast(x.type.dtype, y.type.dtype)
+        dtype = ps.upcast(x.type.dtype, y.type.dtype)
 
         # The magic number two here arises because L{scipy.sparse}
         # objects must be matrices (have dimension 2)
@@ -2442,7 +2442,7 @@ def make_node(self, x, y):
         """
         x = as_sparse_variable(x)
         assert x.format in ("csr", "csc")
-        y = at.as_tensor_variable(y)
+        y = ptb.as_tensor_variable(y)
 
         assert y.type.ndim == 1
 
@@ -2605,7 +2605,7 @@ def comparison(self, x, y):
         raise NotImplementedError()
 
     def make_node(self, x, y):
-        x, y = as_sparse_variable(x), at.as_tensor_variable(y)
+        x, y = as_sparse_variable(x), ptb.as_tensor_variable(y)
 
         assert y.type.ndim == 2
         out = TensorType(dtype="uint8", shape=(None, None))()
@@ -2660,9 +2660,9 @@ def helper(x, y):
         if hasattr(y, "getnnz"):
             y = as_sparse_variable(y)
         if not isinstance(x, Variable):
-            x = at.as_tensor_variable(x)
+            x = ptb.as_tensor_variable(x)
         if not isinstance(y, Variable):
-            y = at.as_tensor_variable(y)
+            y = ptb.as_tensor_variable(y)
 
         x_is_sparse_variable = _is_sparse_variable(x)
         y_is_sparse_variable = _is_sparse_variable(y)
@@ -2837,7 +2837,7 @@ def grad(self, inputs, gout):
         if _is_sparse_variable(gz):
             gz = dense_from_sparse(gz)
 
-        split = Split(len(inputs))(gz, 1, at.stack([x.shape[1] for x in inputs]))
+        split = Split(len(inputs))(gz, 1, ptb.stack([x.shape[1] for x in inputs]))
         if not isinstance(split, list):
             split = [split]
 
@@ -2892,7 +2892,7 @@ def hstack(blocks, format=None, dtype=None):
 
     blocks = [as_sparse_variable(i) for i in blocks]
     if dtype is None:
-        dtype = aes.upcast(*[i.dtype for i in blocks])
+        dtype = ps.upcast(*[i.dtype for i in blocks])
     return HStack(format=format, dtype=dtype)(*blocks)
 
 
@@ -2916,7 +2916,7 @@ def grad(self, inputs, gout):
         if _is_sparse_variable(gz):
             gz = dense_from_sparse(gz)
 
-        split = Split(len(inputs))(gz, 0, at.stack([x.shape[0] for x in inputs]))
+        split = Split(len(inputs))(gz, 0, ptb.stack([x.shape[0] for x in inputs]))
         if not isinstance(split, list):
             split = [split]
 
@@ -2968,7 +2968,7 @@ def vstack(blocks, format=None, dtype=None):
 
     blocks = [as_sparse_variable(i) for i in blocks]
     if dtype is None:
-        dtype = aes.upcast(*[i.dtype for i in blocks])
+        dtype = ps.upcast(*[i.dtype for i in blocks])
     return VStack(format=format, dtype=dtype)(*blocks)
 
 
@@ -3041,7 +3041,7 @@ def wrapper(*args):
             x = as_sparse_variable(args[0])
             assert x.format in ("csr", "csc")
 
-            xs = [aes.as_scalar(arg) for arg in args[1:]]
+            xs = [ps.as_scalar(arg) for arg in args[1:]]
 
             data, ind, ptr, _shape = csm_properties(x)
 
@@ -3079,7 +3079,7 @@ def structured_log(x):
     """
 
 
-@structured_monoid(at_pow)
+@structured_monoid(pt_pow)
 def structured_pow(x, y):
     """
     Structured elemwise power of sparse matrix x by scalar y.
@@ -3103,7 +3103,7 @@ def structured_maximum(x, y):
     """
 
 
-@structured_monoid(at_add)
+@structured_monoid(pt_add)
 def structured_add(x):
     """
     Structured addition of sparse matrix x and scalar y.
@@ -3436,7 +3436,7 @@ def make_node(self, a, b):
             raise TypeError(
                 "First argument must be of type SparseVariable or SparseConstant"
             )
-        dtype_out = aes.upcast(a.type.dtype, b.type.dtype)
+        dtype_out = ps.upcast(a.type.dtype, b.type.dtype)
         if b.type.ndim != 2:
             raise NotImplementedError("non-matrix b")
 
@@ -3887,8 +3887,8 @@ def make_node(self, x, y, p):
             Sparse matrix in csr format.
 
         """
-        x = at.as_tensor_variable(x)
-        y = at.as_tensor_variable(y)
+        x = ptb.as_tensor_variable(x)
+        y = ptb.as_tensor_variable(y)
         p = as_sparse_variable(p)
         assert p.format in ("csr", "csc")
 
@@ -3896,7 +3896,7 @@ def make_node(self, x, y, p):
             raise TypeError(p)
 
         # TODO: use it.
-        # dtype_out = aes.upcast(x.type.dtype, y.type.dtype, p.type.dtype)
+        # dtype_out = ps.upcast(x.type.dtype, y.type.dtype, p.type.dtype)
 
         return Apply(self, [x, y, p], [p.type()])
 
@@ -3948,7 +3948,7 @@ def infer_shape(self, fgraph, node, shapes):
         raise NotImplementedError()
 
     def make_node(self, x, y):
-        dtype_out = aes.upcast(x.dtype, y.dtype)
+        dtype_out = ps.upcast(x.dtype, y.dtype)
 
         # Sparse dot product should have at least one sparse variable
         # as input. If the other one is not sparse, it has to be converted
@@ -3971,7 +3971,7 @@ def make_node(self, x, y):
         if x_is_sparse_var:
             shape_x = (None,) * x.type.ndim
         else:
-            x = at.as_tensor_variable(x)
+            x = ptb.as_tensor_variable(x)
             shape_x = x.type.shape
             assert y.format in ("csr", "csc")
             if x.ndim not in (1, 2):
@@ -3983,7 +3983,7 @@ def make_node(self, x, y):
         if y_is_sparse_var:
             shape_y = (None,) * y.type.ndim
         else:
-            y = at.as_tensor_variable(y)
+            y = ptb.as_tensor_variable(y)
             shape_y = y.type.shape
             assert x.format in ("csr", "csc")
             if y.ndim not in (1, 2):
@@ -4022,11 +4022,11 @@ def grad(self, inputs, gout):
         rval = []
 
         if _is_dense_variable(y):
-            rval.append(at_dot(gz, y.T))
+            rval.append(pt_dot(gz, y.T))
         else:
             rval.append(dot(gz, y.T))
         if _is_dense_variable(x):
-            rval.append(at_dot(x.T, gz))
+            rval.append(pt_dot(x.T, gz))
         else:
             rval.append(dot(x.T, gz))
 
@@ -4112,20 +4112,20 @@ def make_node(self, alpha, x, y, z):
             # We should use Dot22 and Gemm in that case.
             raise TypeError(x)
 
-        dtype_out = aes.upcast(
+        dtype_out = ps.upcast(
             alpha.type.dtype, x.type.dtype, y.type.dtype, z.type.dtype
         )
-        alpha = at.as_tensor_variable(alpha)
-        z = at.as_tensor_variable(z)
+        alpha = ptb.as_tensor_variable(alpha)
+        z = ptb.as_tensor_variable(z)
 
         assert z.type.ndim == 2
         assert alpha.type.shape == (1,) * alpha.type.ndim
         if not _is_sparse_variable(x):
-            x = at.as_tensor_variable(x)
+            x = ptb.as_tensor_variable(x)
             assert y.format in ("csr", "csc")
             assert x.type.ndim == 2
         if not _is_sparse_variable(y):
-            y = at.as_tensor_variable(y)
+            y = ptb.as_tensor_variable(y)
             assert x.format in ("csr", "csc")
             assert y.type.ndim == 2
 
@@ -4198,9 +4198,9 @@ def make_node(self, x, values, ilist):
             It specifies where in the output to put the corresponding rows.
 
         """
-        x_ = at.as_tensor_variable(x)
-        values_ = at.as_tensor_variable(values)
-        ilist_ = at.as_tensor_variable(ilist)
+        x_ = ptb.as_tensor_variable(x)
+        values_ = ptb.as_tensor_variable(values)
+        ilist_ = ptb.as_tensor_variable(ilist)
 
         if ilist_.type.dtype not in integer_dtypes:
             raise TypeError("index must be integers")
diff --git a/pytensor/sparse/rewriting.py b/pytensor/sparse/rewriting.py
index 47ea1284ba..69bf87dcf8 100644
--- a/pytensor/sparse/rewriting.py
+++ b/pytensor/sparse/rewriting.py
@@ -1,7 +1,7 @@
 import scipy
 
 import pytensor
-import pytensor.scalar as aes
+import pytensor.scalar as ps
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Apply
 from pytensor.graph.rewriting.basic import (
@@ -116,7 +116,7 @@ def __str__(self):
 
     def make_node(self, x, y):
         x, y = sparse.as_sparse_variable(x), as_tensor_variable(y)
-        out_dtype = aes.upcast(x.type.dtype, y.type.dtype)
+        out_dtype = ps.upcast(x.type.dtype, y.type.dtype)
         if self.inplace:
             assert out_dtype == y.dtype
 
@@ -195,7 +195,7 @@ def c_code_cache_version(self):
 def local_inplace_addsd_ccode(fgraph, node):
     """Rewrite to insert inplace versions of `AddSD`."""
     if isinstance(node.op, sparse.AddSD) and config.cxx:
-        out_dtype = aes.upcast(*node.inputs)
+        out_dtype = ps.upcast(*node.inputs)
         if out_dtype != node.inputs[1].dtype:
             return
         new_node = AddSD_ccode(format=node.inputs[0].type.format, inplace=True)(
@@ -266,7 +266,7 @@ class StructuredDotCSC(COp):
     __props__ = ()
 
     def make_node(self, a_val, a_ind, a_ptr, a_nrows, b):
-        dtype_out = aes.upcast(a_val.type.dtype, b.type.dtype)
+        dtype_out = ps.upcast(a_val.type.dtype, b.type.dtype)
         r = Apply(
             self,
             [a_val, a_ind, a_ptr, a_nrows, b],
@@ -465,7 +465,7 @@ class StructuredDotCSR(COp):
     __props__ = ()
 
     def make_node(self, a_val, a_ind, a_ptr, b):
-        self.dtype_out = aes.upcast(a_val.type.dtype, b.type.dtype)
+        self.dtype_out = ps.upcast(a_val.type.dtype, b.type.dtype)
         r = Apply(
             self,
             [a_val, a_ind, a_ptr, b],
@@ -691,7 +691,7 @@ def make_node(self, alpha, x_val, x_ind, x_ptr, x_nrows, y, z):
         assert y.ndim == 2
         assert z.ndim == 2
 
-        dtype_out = aes.upcast(
+        dtype_out = ps.upcast(
             alpha.type.dtype, x_val.type.dtype, y.type.dtype, z.type.dtype
         )
 
@@ -957,7 +957,7 @@ def local_usmm_csx(fgraph, node):
             if x.type.format == "csc":
                 x_val, x_ind, x_ptr, x_shape = csm_properties(x)
                 x_nsparse = x_shape[0]
-                dtype_out = aes.upcast(
+                dtype_out = ps.upcast(
                     alpha.type.dtype, x.type.dtype, y.type.dtype, z.type.dtype
                 )
                 if dtype_out not in ("float32", "float64"):
@@ -1860,8 +1860,8 @@ def make_node(self, x, y, p_data, p_ind, p_ptr, p_ncols):
 
         assert p_ncols.dtype == "int32"
 
-        dtype_out = aes.upcast(x.type.dtype, y.type.dtype, p_data.type.dtype)
-        dot_out = aes.upcast(x.type.dtype, y.type.dtype)
+        dtype_out = ps.upcast(x.type.dtype, y.type.dtype, p_data.type.dtype)
+        dot_out = ps.upcast(x.type.dtype, y.type.dtype)
 
         # We call blas ?dot function that take only param of the same type
         x = cast(x, dot_out)
@@ -1905,7 +1905,7 @@ def c_code(self, node, name, inputs, outputs, sub):
         if node.inputs[2].type.dtype in ("complex64", "complex128"):
             raise NotImplementedError("Complex types are not supported for pattern")
 
-        dot_out = aes.upcast(node.inputs[0].type.dtype, node.inputs[1].type.dtype)
+        dot_out = ps.upcast(node.inputs[0].type.dtype, node.inputs[1].type.dtype)
 
         if dot_out == "float32":
             conv_type = "float"
diff --git a/pytensor/sparse/sandbox/sp.py b/pytensor/sparse/sandbox/sp.py
index 35a23f2a86..ce21dc3cce 100644
--- a/pytensor/sparse/sandbox/sp.py
+++ b/pytensor/sparse/sandbox/sp.py
@@ -14,10 +14,10 @@
 import pytensor
 import pytensor.sparse
 from pytensor import sparse
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.graph.op import Op
 from pytensor.tensor.math import dot
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.shape import reshape
 from pytensor.tensor.subtensor import DimShuffle
 
@@ -353,10 +353,10 @@ def convolve(
     patches = (sparse.structured_dot(csc, images.T)).T
 
     # compute output of linear classifier
-    pshape = at.stack(
+    pshape = pt.stack(
         [
-            images.shape[0] * at.as_tensor(np.prod(outshp)),
-            at.as_tensor(imgshp[0] * kern_size),
+            images.shape[0] * pt.as_tensor(np.prod(outshp)),
+            pt.as_tensor(imgshp[0] * kern_size),
         ]
     )
     patch_stack = reshape(patches, pshape, ndim=2)
@@ -371,13 +371,13 @@ def convolve(
 
     # now to have feature maps in raster order ...
     # go from bsize*outshp x nkern to bsize x nkern*outshp
-    newshp = at.stack(
-        [images.shape[0], at.as_tensor(np.prod(outshp)), at.as_tensor(nkern)]
+    newshp = pt.stack(
+        [images.shape[0], pt.as_tensor(np.prod(outshp)), pt.as_tensor(nkern)]
     )
     tensout = reshape(output, newshp, ndim=3)
     output = DimShuffle((False,) * tensout.ndim, (0, 2, 1))(tensout)
     if flatten:
-        output = at.flatten(output, 2)
+        output = pt.flatten(output, 2)
 
     return output, np.hstack((nkern, outshp))
 
@@ -423,26 +423,26 @@ def max_pool(images, imgshp, maxpoolshp):
     )
     patches = sparse.structured_dot(csc, images.T).T
 
-    pshape = at.stack(
+    pshape = pt.stack(
         [
-            images.shape[0] * at.as_tensor(np.prod(outshp)),
-            at.as_tensor(imgshp[0]),
-            at.as_tensor(poolsize),
+            images.shape[0] * pt.as_tensor(np.prod(outshp)),
+            pt.as_tensor(imgshp[0]),
+            pt.as_tensor(poolsize),
         ]
     )
     patch_stack = reshape(patches, pshape, ndim=3)
 
-    out1 = at_max(patch_stack, axis=2)
+    out1 = pt_max(patch_stack, axis=2)
 
-    pshape = at.stack(
+    pshape = pt.stack(
         [
             images.shape[0],
-            at.as_tensor(np.prod(outshp)),
-            at.as_tensor(imgshp[0]),
+            pt.as_tensor(np.prod(outshp)),
+            pt.as_tensor(imgshp[0]),
         ]
     )
     out2 = reshape(out1, pshape, ndim=3)
 
     out3 = DimShuffle(out2.broadcastable, (0, 2, 1))(out2)
 
-    return at.flatten(out3, 2), outshp
+    return pt.flatten(out3, 2), outshp
diff --git a/pytensor/sparse/sandbox/sp2.py b/pytensor/sparse/sandbox/sp2.py
index e8d44e6311..af95cfdb0f 100644
--- a/pytensor/sparse/sandbox/sp2.py
+++ b/pytensor/sparse/sandbox/sp2.py
@@ -2,7 +2,7 @@
 import scipy.sparse
 
 import pytensor
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.graph.basic import Apply
 from pytensor.graph.op import Op
 from pytensor.sparse.basic import (
@@ -99,9 +99,9 @@ def __init__(self, format, dtype):
         self.dtype = dtype
 
     def make_node(self, n, p, shape):
-        n = at.as_tensor_variable(n)
-        p = at.as_tensor_variable(p)
-        shape = at.as_tensor_variable(shape)
+        n = pt.as_tensor_variable(n)
+        p = pt.as_tensor_variable(p)
+        shape = pt.as_tensor_variable(shape)
 
         assert n.dtype in discrete_dtypes
         assert p.dtype in float_dtypes
@@ -171,7 +171,7 @@ class Multinomial(Op):
     __props__ = ()
 
     def make_node(self, n, p):
-        n = at.as_tensor_variable(n)
+        n = pt.as_tensor_variable(n)
         p = as_sparse_variable(p)
         assert p.format in ("csr", "csc")
 
diff --git a/pytensor/sparse/type.py b/pytensor/sparse/type.py
index c3e1b61873..ba53c519b6 100644
--- a/pytensor/sparse/type.py
+++ b/pytensor/sparse/type.py
@@ -5,7 +5,7 @@
 import scipy.sparse
 
 import pytensor
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.graph.basic import Variable
 from pytensor.graph.type import HasDataType
 from pytensor.tensor.type import DenseTensorType, TensorType
@@ -123,7 +123,7 @@ def filter(self, value, strict=False, allow_downcast=None):
             sp = self.format_cls[self.format](value, dtype=self.dtype)
         else:
             data = self.format_cls[self.format](value)
-            up_dtype = aes.upcast(self.dtype, data.dtype)
+            up_dtype = ps.upcast(self.dtype, data.dtype)
             if up_dtype != self.dtype:
                 raise TypeError(f"Expected {self.dtype} dtype but got {data.dtype}")
             sp = data.astype(up_dtype)
diff --git a/pytensor/tensor/basic.py b/pytensor/tensor/basic.py
index 9ab328266a..207fd4909a 100644
--- a/pytensor/tensor/basic.py
+++ b/pytensor/tensor/basic.py
@@ -20,7 +20,7 @@
 import pytensor
 import pytensor.scalar.sharedvar
 from pytensor import compile, config, printing
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.gradient import DisconnectedType, grad_undefined
 from pytensor.graph import RewriteDatabaseQuery
 from pytensor.graph.basic import Apply, Constant, Variable
@@ -161,7 +161,7 @@ def extract_constants(i):
             # In this instance, we have a sequence of constants with which we
             # want to construct a vector, so we can use `MakeVector` directly.
             if dtype is None:
-                dtype = aes.upcast(*[i.dtype for i in x if hasattr(i, "dtype")])
+                dtype = ps.upcast(*[i.dtype for i in x if hasattr(i, "dtype")])
             return MakeVector(dtype)(*x)
 
         # In this case, we have at least one non-`Constant` term, so we
@@ -215,7 +215,7 @@ def constant(x, name=None, ndim=None, dtype=None) -> TensorConstant:
         else:
             x = x.data
 
-    x_ = aes.convert(x, dtype=dtype)
+    x_ = ps.convert(x, dtype=dtype)
 
     if ndim is not None:
         if x_.ndim < ndim:
@@ -244,22 +244,22 @@ def _obj_is_wrappable_as_tensor(x):
 
 
 _scalar_constant_value_elemwise_ops = (
-    aes.Cast,
-    aes.Switch,
-    aes.NEQ,
-    aes.EQ,
-    aes.LT,
-    aes.GT,
-    aes.LE,
-    aes.GE,
-    aes.Sub,
-    aes.Add,
-    aes.Mod,
-    aes.Mul,
-    aes.IntDiv,
-    aes.TrueDiv,
-    aes.ScalarMinimum,
-    aes.ScalarMaximum,
+    ps.Cast,
+    ps.Switch,
+    ps.NEQ,
+    ps.EQ,
+    ps.LT,
+    ps.GT,
+    ps.LE,
+    ps.GE,
+    ps.Sub,
+    ps.Add,
+    ps.Mod,
+    ps.Mul,
+    ps.IntDiv,
+    ps.TrueDiv,
+    ps.ScalarMinimum,
+    ps.ScalarMaximum,
 )
 
 
@@ -392,8 +392,8 @@ def get_underlying_scalar_constant_value(
                 if builtins.all(0 == c.ndim and c != 0 for c in conds):
                     v = v.owner.inputs[0]
                     continue
-            elif isinstance(v.owner.op, aes.ScalarOp):
-                if isinstance(v.owner.op, aes.Second):
+            elif isinstance(v.owner.op, ps.ScalarOp):
+                if isinstance(v.owner.op, ps.Second):
                     # We don't need both input to be constant for second
                     shp, val = v.owner.inputs
                     v = val
@@ -410,7 +410,7 @@ def get_underlying_scalar_constant_value(
             # we need to investigate Second as Alloc. So elemwise
             # don't disable the check for Second.
             elif isinstance(v.owner.op, Elemwise):
-                if isinstance(v.owner.op.scalar_op, aes.Second):
+                if isinstance(v.owner.op.scalar_op, ps.Second):
                     # We don't need both input to be constant for second
                     shp, val = v.owner.inputs
                     v = val
@@ -560,7 +560,7 @@ class TensorFromScalar(COp):
     __props__ = ()
 
     def make_node(self, s):
-        if not isinstance(s.type, aes.ScalarType):
+        if not isinstance(s.type, ps.ScalarType):
             raise TypeError("Input must be a `ScalarType` `Type`")
 
         return Apply(self, [s], [tensor(dtype=s.type.dtype, shape=())])
@@ -622,7 +622,7 @@ def make_node(self, t):
             raise TypeError("Input must be a scalar `TensorType`")
 
         return Apply(
-            self, [t], [aes.get_scalar_type(dtype=t.type.dtype).make_variable()]
+            self, [t], [ps.get_scalar_type(dtype=t.type.dtype).make_variable()]
         )
 
     def perform(self, node, inp, out_):
@@ -675,49 +675,49 @@ def _conversion(real_value: Op, name: str) -> Op:
 # what types you are casting to what.  That logic is implemented by the
 # `cast()` function below.
 
-_convert_to_bool: Elemwise = _conversion(Elemwise(aes.convert_to_bool), "bool")
+_convert_to_bool: Elemwise = _conversion(Elemwise(ps.convert_to_bool), "bool")
 """Cast to boolean"""
 
-_convert_to_int8: Elemwise = _conversion(Elemwise(aes.convert_to_int8), "int8")
+_convert_to_int8: Elemwise = _conversion(Elemwise(ps.convert_to_int8), "int8")
 """Cast to 8-bit integer"""
 
-_convert_to_int16: Elemwise = _conversion(Elemwise(aes.convert_to_int16), "int16")
+_convert_to_int16: Elemwise = _conversion(Elemwise(ps.convert_to_int16), "int16")
 """Cast to 16-bit integer"""
 
-_convert_to_int32: Elemwise = _conversion(Elemwise(aes.convert_to_int32), "int32")
+_convert_to_int32: Elemwise = _conversion(Elemwise(ps.convert_to_int32), "int32")
 """Cast to 32-bit integer"""
 
-_convert_to_int64: Elemwise = _conversion(Elemwise(aes.convert_to_int64), "int64")
+_convert_to_int64: Elemwise = _conversion(Elemwise(ps.convert_to_int64), "int64")
 """Cast to 64-bit integer"""
 
-_convert_to_uint8: Elemwise = _conversion(Elemwise(aes.convert_to_uint8), "uint8")
+_convert_to_uint8: Elemwise = _conversion(Elemwise(ps.convert_to_uint8), "uint8")
 """Cast to unsigned 8-bit integer"""
 
-_convert_to_uint16: Elemwise = _conversion(Elemwise(aes.convert_to_uint16), "uint16")
+_convert_to_uint16: Elemwise = _conversion(Elemwise(ps.convert_to_uint16), "uint16")
 """Cast to unsigned 16-bit integer"""
 
-_convert_to_uint32: Elemwise = _conversion(Elemwise(aes.convert_to_uint32), "uint32")
+_convert_to_uint32: Elemwise = _conversion(Elemwise(ps.convert_to_uint32), "uint32")
 """Cast to unsigned 32-bit integer"""
 
-_convert_to_uint64: Elemwise = _conversion(Elemwise(aes.convert_to_uint64), "uint64")
+_convert_to_uint64: Elemwise = _conversion(Elemwise(ps.convert_to_uint64), "uint64")
 """Cast to unsigned 64-bit integer"""
 
-_convert_to_float16: Elemwise = _conversion(Elemwise(aes.convert_to_float16), "float16")
+_convert_to_float16: Elemwise = _conversion(Elemwise(ps.convert_to_float16), "float16")
 """Cast to half-precision floating point"""
 
-_convert_to_float32: Elemwise = _conversion(Elemwise(aes.convert_to_float32), "float32")
+_convert_to_float32: Elemwise = _conversion(Elemwise(ps.convert_to_float32), "float32")
 """Cast to single-precision floating point"""
 
-_convert_to_float64: Elemwise = _conversion(Elemwise(aes.convert_to_float64), "float64")
+_convert_to_float64: Elemwise = _conversion(Elemwise(ps.convert_to_float64), "float64")
 """Cast to double-precision floating point"""
 
 _convert_to_complex64: Elemwise = _conversion(
-    Elemwise(aes.convert_to_complex64), "complex64"
+    Elemwise(ps.convert_to_complex64), "complex64"
 )
 """Cast to single-precision complex"""
 
 _convert_to_complex128: Elemwise = _conversion(
-    Elemwise(aes.convert_to_complex128), "complex128"
+    Elemwise(ps.convert_to_complex128), "complex128"
 )
 """Cast to double-precision complex"""
 
@@ -1769,7 +1769,7 @@ def make_node(self, *inputs):
         if not all(a.type.dtype == inputs[0].type.dtype for a in inputs) or (
             len(inputs) > 0 and inputs[0].dtype != self.dtype
         ):
-            dtype = aes.upcast(self.dtype, *[i.dtype for i in inputs])
+            dtype = ps.upcast(self.dtype, *[i.dtype for i in inputs])
             inputs = [cast(i, dtype=dtype) for i in inputs]
 
             if not all(self.dtype == i.dtype for i in inputs):
@@ -1913,7 +1913,7 @@ def register_transfer(fn):
 
 
 """Create a duplicate of `a` (with duplicated storage)"""
-tensor_copy = Elemwise(aes.identity)
+tensor_copy = Elemwise(ps.identity)
 pprint.assign(tensor_copy, printing.IgnorePrinter())
 
 
@@ -1965,8 +1965,8 @@ def extract_constant(x, elemwise=True, only_process_constants=False):
         x = get_underlying_scalar_constant_value(x, elemwise, only_process_constants)
     except NotScalarConstantError:
         pass
-    if isinstance(x, aes.ScalarVariable) or isinstance(
-        x, aes.sharedvar.ScalarSharedVariable
+    if isinstance(x, ps.ScalarVariable) or isinstance(
+        x, ps.sharedvar.ScalarSharedVariable
     ):
         if x.owner and isinstance(x.owner.op, ScalarFromTensor):
             x = x.owner.inputs[0]
@@ -2314,7 +2314,7 @@ def make_node(self, axis, *tensors):
             raise ValueError("Cannot join an empty list of tensors")
 
         tensors = [as_tensor_variable(x) for x in tensors]
-        out_dtype = aes.upcast(*[x.type.dtype for x in tensors])
+        out_dtype = ps.upcast(*[x.type.dtype for x in tensors])
 
         if not builtins.all(targs.type.ndim for targs in tensors):
             raise TypeError(
@@ -2508,7 +2508,7 @@ def grad(self, axis_and_tensors, grads):
         rval = [grad_undefined(self, 0, axis)]
 
         dtypes = [as_tensor_variable(x).type.dtype for x in tens]
-        out_dtype = aes.upcast(*dtypes)
+        out_dtype = ps.upcast(*dtypes)
 
         if "float" in out_dtype or "complex" in out_dtype:
             # assume that this is differentiable
@@ -2740,7 +2740,7 @@ def stack(tensors: Sequence["TensorLike"], axis: int = 0):
     ):
         # In case there is direct scalar
         tensors = list(map(as_tensor_variable, tensors))
-        dtype = aes.upcast(*[i.dtype for i in tensors])
+        dtype = ps.upcast(*[i.dtype for i in tensors])
         return MakeVector(dtype)(*tensors)
     return join(axis, *[shape_padaxis(t, axis) for t in tensors])
 
@@ -3010,7 +3010,7 @@ def upcast(var):
                 # this give float64. This is safer then checking for
                 # uint64 in case we support [u]int128 or other in the
                 # future.
-                aes.upcast(var.dtype, "int64") == "int64"
+                ps.upcast(var.dtype, "int64") == "int64"
             ):
                 return cast(var, "int64")
             return var
@@ -3085,7 +3085,7 @@ def arange(start, stop=None, step=1, dtype=None):
     start, stop, step = map(as_tensor_variable, (start, stop, step))
     # If dtype is not provided, infer it from the other arguments
     if dtype is None:
-        dtype = aes.upcast(start.type.dtype, stop.type.dtype, step.type.dtype)
+        dtype = ps.upcast(start.type.dtype, stop.type.dtype, step.type.dtype)
         # don't try to be stingy and byte-optimize, this leads to
         # overflow problems.
         if dtype in int_dtypes:
diff --git a/pytensor/tensor/blas.py b/pytensor/tensor/blas.py
index 301cc5d199..06b39df8e7 100644
--- a/pytensor/tensor/blas.py
+++ b/pytensor/tensor/blas.py
@@ -97,7 +97,7 @@
 from pytensor.link.c.params_type import ParamsType
 from pytensor.printing import FunctionPrinter, pprint
 from pytensor.scalar import bool as bool_t
-from pytensor.tensor import basic as at
+from pytensor.tensor import basic as ptb
 from pytensor.tensor.basic import expand_dims
 from pytensor.tensor.blas_headers import blas_header_text, blas_header_version
 from pytensor.tensor.elemwise import DimShuffle
@@ -185,11 +185,11 @@ def __str__(self):
             return "%s{no_inplace}" % self.__class__.__name__
 
     def make_node(self, y, alpha, A, x, beta):
-        y = at.as_tensor_variable(y)
-        x = at.as_tensor_variable(x)
-        A = at.as_tensor_variable(A)
-        alpha = at.as_tensor_variable(alpha)
-        beta = at.as_tensor_variable(beta)
+        y = ptb.as_tensor_variable(y)
+        x = ptb.as_tensor_variable(x)
+        A = ptb.as_tensor_variable(A)
+        alpha = ptb.as_tensor_variable(alpha)
+        beta = ptb.as_tensor_variable(beta)
         if y.dtype != A.dtype or y.dtype != x.dtype:
             raise TypeError(
                 "Gemv requires matching dtypes", (y.dtype, A.dtype, x.dtype)
@@ -284,10 +284,10 @@ def __str__(self):
             return "%s{non-destructive}" % self.__class__.__name__
 
     def make_node(self, A, alpha, x, y):
-        A = at.as_tensor_variable(A)
-        y = at.as_tensor_variable(y)
-        x = at.as_tensor_variable(x)
-        alpha = at.as_tensor_variable(alpha)
+        A = ptb.as_tensor_variable(A)
+        y = ptb.as_tensor_variable(y)
+        x = ptb.as_tensor_variable(x)
+        alpha = ptb.as_tensor_variable(alpha)
         if not (A.dtype == x.dtype == y.dtype == alpha.dtype):
             raise TypeError(
                 "ger requires matching dtypes", (A.dtype, alpha.dtype, x.dtype, y.dtype)
@@ -864,7 +864,7 @@ def __getstate__(self):
         return rval
 
     def make_node(self, *inputs):
-        inputs = list(map(at.as_tensor_variable, inputs))
+        inputs = list(map(ptb.as_tensor_variable, inputs))
 
         if any(not isinstance(i.type, DenseTensorType) for i in inputs):
             raise NotImplementedError("Only dense tensor types are supported")
@@ -1152,7 +1152,7 @@ def _as_scalar(res, dtype=None):
             # as the cast of the scalar can be done before or after the dot22
             # and this will give the same result.
             if pytensor.scalar.upcast(res.dtype, dtype) == dtype:
-                return at.cast(rval, dtype)
+                return ptb.cast(rval, dtype)
             else:
                 return None
 
@@ -1362,9 +1362,9 @@ def _gemm_from_factored_list(fgraph, lst):
         # sM can be a tuple of 2 elements or an PyTensor variable.
         if isinstance(sM, tuple):
             sm0, sm1 = sM
-            sm0 = at.as_tensor_variable(sm0)
+            sm0 = ptb.as_tensor_variable(sm0)
             if pytensor.scalar.upcast(sm0.dtype, sm1.dtype) == sm1.dtype:
-                lst2.append((at.cast(sm0, sm1.dtype), sM[1]))
+                lst2.append((ptb.cast(sm0, sm1.dtype), sM[1]))
 
     lst = lst2
 
@@ -1454,8 +1454,8 @@ class Dot22(GemmRelated):
     check_input = False
 
     def make_node(self, x, y):
-        x = at.as_tensor_variable(x)
-        y = at.as_tensor_variable(y)
+        x = ptb.as_tensor_variable(x)
+        y = ptb.as_tensor_variable(y)
 
         if any(not isinstance(i.type, DenseTensorType) for i in (x, y)):
             raise NotImplementedError("Only dense tensor types are supported")
@@ -1647,8 +1647,8 @@ class BatchedDot(COp):
     gufunc_signature = "(b,m,k),(b,k,n)->(b,m,n)"
 
     def make_node(self, x, y):
-        x = at.as_tensor_variable(x)
-        y = at.as_tensor_variable(y)
+        x = ptb.as_tensor_variable(x)
+        y = ptb.as_tensor_variable(y)
 
         if not (
             isinstance(x.type, DenseTensorType) and isinstance(y.type, DenseTensorType)
@@ -1682,7 +1682,7 @@ def extract_static_dim(dim_x, dim_y):
 
         # Change dtype if needed
         dtype = pytensor.scalar.upcast(x.type.dtype, y.type.dtype)
-        x, y = at.cast(x, dtype), at.cast(y, dtype)
+        x, y = ptb.cast(x, dtype), ptb.cast(y, dtype)
         out = tensor(dtype=dtype, shape=out_shape)
         return Apply(self, [x, y], [out])
 
@@ -2069,7 +2069,7 @@ def batched_dot(a, b):
             dot products in terms of batched matrix-matrix dot products, so
             it may be possible to further optimize for performance.
     """
-    a, b = at.as_tensor_variable(a), at.as_tensor_variable(b)
+    a, b = ptb.as_tensor_variable(a), ptb.as_tensor_variable(b)
 
     if a.ndim == 0:
         raise TypeError("a must have at least one (batch) axis")
diff --git a/pytensor/tensor/conv/abstract_conv.py b/pytensor/tensor/conv/abstract_conv.py
index 24e8e5be47..16120197bc 100644
--- a/pytensor/tensor/conv/abstract_conv.py
+++ b/pytensor/tensor/conv/abstract_conv.py
@@ -19,7 +19,7 @@
     from scipy.signal._sigtools import _convolve2d
 
 import pytensor
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Apply, Variable
 from pytensor.graph.op import Op
@@ -552,12 +552,12 @@ def assert_conv_shape(shape):
                 assert_shp = Assert(
                     f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
                 )
-                out_shape.append(assert_shp(n, at.ge(n, 0)))
+                out_shape.append(assert_shp(n, pt.ge(n, 0)))
             else:
                 assert_shp = Assert(
                     f"The convolution would produce an invalid shape (dim[{int(i)}] < 0)."
                 )
-                out_shape.append(assert_shp(n, at.gt(n, 0)))
+                out_shape.append(assert_shp(n, pt.gt(n, 0)))
     return tuple(out_shape)
 
 
@@ -589,7 +589,7 @@ def assert_shape(x, expected_shape, msg="Unexpected shape."):
     tests = []
     for i in range(x.ndim):
         if expected_shape[i] is not None:
-            tests.append(at.eq(shape[i], expected_shape[i]))
+            tests.append(pt.eq(shape[i], expected_shape[i]))
     if tests:
         return Assert(msg)(x, *tests)
     else:
@@ -1800,11 +1800,11 @@ def bilinear_kernel_1D(ratio, normalize=True):
         by the indicated ratio using bilinear interpolation in one dimension.
 
     """
-    half_kern = at.arange(1, ratio + 1, dtype=config.floatX)
-    kern = at.concatenate([half_kern, half_kern[-2::-1]])
+    half_kern = pt.arange(1, ratio + 1, dtype=config.floatX)
+    kern = pt.concatenate([half_kern, half_kern[-2::-1]])
 
     if normalize:
-        kern /= at.cast(ratio, config.floatX)
+        kern /= pt.cast(ratio, config.floatX)
     return kern
 
 
@@ -1863,15 +1863,15 @@ def frac_bilinear_upsampling(input, frac_ratio):
             subsample = (frac_ratio[1], frac_ratio[1])
 
     # duplicate borders of the input
-    concat_mat = at.concatenate(
+    concat_mat = pt.concatenate(
         (up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2
     )
-    concat_mat = at.concatenate(
+    concat_mat = pt.concatenate(
         (concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3
     )
 
     # add padding for the pyramidal kernel
-    double_pad = (2 * at.as_tensor([row, col]) - 1) * np.array(ratio) + 1
+    double_pad = (2 * pt.as_tensor([row, col]) - 1) * np.array(ratio) + 1
     pad = double_pad // 2
 
     # build pyramidal kernel
@@ -1880,25 +1880,25 @@ def frac_bilinear_upsampling(input, frac_ratio):
     )
 
     # add corresponding padding
-    pad_kern = at.concatenate(
+    pad_kern = pt.concatenate(
         (
-            at.zeros(
+            pt.zeros(
                 tuple(kern.shape[:2]) + (pad[0], kern.shape[-1]),
                 dtype=config.floatX,
             ),
             kern,
-            at.zeros(
+            pt.zeros(
                 tuple(kern.shape[:2]) + (double_pad[0] - pad[0], kern.shape[-1]),
                 dtype=config.floatX,
             ),
         ),
         axis=2,
     )
-    pad_kern = at.concatenate(
+    pad_kern = pt.concatenate(
         (
-            at.zeros(tuple(pad_kern.shape[:3]) + (pad[1],), dtype=config.floatX),
+            pt.zeros(tuple(pad_kern.shape[:3]) + (pad[1],), dtype=config.floatX),
             pad_kern,
-            at.zeros(
+            pt.zeros(
                 tuple(pad_kern.shape[:3]) + (double_pad[1] - pad[1],),
                 dtype=config.floatX,
             ),
@@ -1992,11 +1992,11 @@ def bilinear_upsampling(
 
     # concatenating the first and last row and column
     # first and last row
-    concat_mat = at.concatenate(
+    concat_mat = pt.concatenate(
         (up_input[:, :, :1, :], up_input, up_input[:, :, -1:, :]), axis=2
     )
     # first and last col
-    concat_mat = at.concatenate(
+    concat_mat = pt.concatenate(
         (concat_mat[:, :, :, :1], concat_mat, concat_mat[:, :, :, -1:]), axis=3
     )
     concat_col = col + 2
diff --git a/pytensor/tensor/elemwise.py b/pytensor/tensor/elemwise.py
index 869d40faa6..6c01d574d8 100644
--- a/pytensor/tensor/elemwise.py
+++ b/pytensor/tensor/elemwise.py
@@ -531,7 +531,7 @@ def connection_pattern(self, node):
         return [[True for output in node.outputs] for ipt in node.inputs]
 
     def L_op(self, inputs, outs, ograds):
-        from pytensor.tensor.math import sum as at_sum
+        from pytensor.tensor.math import sum as pt_sum
 
         # Compute grad with respect to broadcasted input
         rval = self._bgrad(inputs, outs, ograds)
@@ -572,7 +572,7 @@ def L_op(self, inputs, outs, ograds):
             ]
 
             if to_sum:
-                sr = at_sum(rval[i], axis=to_sum, keepdims=True)
+                sr = pt_sum(rval[i], axis=to_sum, keepdims=True)
                 rval[i] = sr
 
         return rval
diff --git a/pytensor/tensor/extra_ops.py b/pytensor/tensor/extra_ops.py
index b9bcceb2db..fd1a18f237 100644
--- a/pytensor/tensor/extra_ops.py
+++ b/pytensor/tensor/extra_ops.py
@@ -5,7 +5,7 @@
 from numpy.core.multiarray import normalize_axis_index
 
 import pytensor
-import pytensor.scalar.basic as aes
+import pytensor.scalar.basic as ps
 from pytensor.gradient import (
     DisconnectedType,
     _float_zeros_like,
@@ -22,7 +22,7 @@
 from pytensor.scalar import int32 as int_t
 from pytensor.scalar import upcast
 from pytensor.tensor import as_tensor_variable
-from pytensor.tensor import basic as at
+from pytensor.tensor import basic as ptb
 from pytensor.tensor.basic import alloc, second
 from pytensor.tensor.exceptions import NotScalarConstantError
 from pytensor.tensor.math import abs as pt_abs
@@ -31,7 +31,7 @@
 from pytensor.tensor.math import ge, lt
 from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.math import maximum, minimum, prod
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import switch
 from pytensor.tensor.subtensor import advanced_inc_subtensor1, set_subtensor
 from pytensor.tensor.type import TensorType, dvector, int_dtypes, integer_dtypes, vector
@@ -51,7 +51,7 @@ class CpuContiguous(COp):
     check_input = False
 
     def make_node(self, x):
-        x_ = at.as_tensor_variable(x)
+        x_ = ptb.as_tensor_variable(x)
         return Apply(self, [x_], [x_.type()])
 
     def perform(self, node, inputs, output_storage):
@@ -65,7 +65,7 @@ def perform(self, node, inputs, output_storage):
         y[0] = x
 
     def grad(self, inputs, dout):
-        return [at.as_tensor_variable(dout[0])]
+        return [ptb.as_tensor_variable(dout[0])]
 
     def c_code(self, node, name, inames, onames, sub):
         (x,) = inames
@@ -126,13 +126,13 @@ def get_params(self, node):
         return self.side
 
     def make_node(self, x, v, sorter=None):
-        x = at.as_tensor(x, ndim=1)
-        v = at.as_tensor(v)
+        x = ptb.as_tensor(x, ndim=1)
+        v = ptb.as_tensor(v)
         out_type = v.type.clone(dtype="int64")
         if sorter is None:
             return Apply(self, [x, v], [out_type()])
         else:
-            sorter = at.as_tensor(sorter, ndim=1)
+            sorter = ptb.as_tensor(sorter, ndim=1)
             if PYTHON_INT_BITWIDTH == 32 and sorter.dtype == "int64":
                 raise TypeError(
                     "numpy.searchsorted with Python 32bit do not support a"
@@ -263,9 +263,9 @@ def searchsorted(x, v, side="left", sorter=None):
 
     Examples
     --------
-    >>> from pytensor import tensor as at
+    >>> from pytensor import tensor as pt
     >>> from pytensor.tensor import extra_ops
-    >>> x = at.dvector()
+    >>> x = ptb.dvector()
     >>> idx = x.searchsorted(3)
     >>> idx.eval({x: [1,2,3,4,5]})
     array(2)
@@ -300,7 +300,7 @@ def __init__(self, axis: Optional[int] = None, mode="add"):
     c_axis = property(lambda self: np.MAXDIMS if self.axis is None else self.axis)
 
     def make_node(self, x):
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
         out_type = x.type()
 
         if self.axis is None:
@@ -540,7 +540,7 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
         assert_op = Assert("Input to bincount has negative values!")
         x = assert_op(x, pt_all(x >= 0))
 
-    max_value = at.cast(x.max() + 1, "int64")
+    max_value = ptb.cast(x.max() + 1, "int64")
 
     if minlength is not None:
         max_value = maximum(max_value, minlength)
@@ -548,10 +548,10 @@ def bincount(x, weights=None, minlength=None, assert_nonneg=False):
     # Note: we do not use inc_subtensor(out[x], ...) in the following lines,
     # since out[x] raises an exception if the indices (x) are int8.
     if weights is None:
-        out = at.zeros([max_value], dtype=x.dtype)
+        out = ptb.zeros([max_value], dtype=x.dtype)
         out = advanced_inc_subtensor1(out, 1, x)
     else:
-        out = at.zeros([max_value], dtype=weights.dtype)
+        out = ptb.zeros([max_value], dtype=weights.dtype)
         out = advanced_inc_subtensor1(out, weights, x)
     return out
 
@@ -589,7 +589,7 @@ def squeeze(x, axis=None):
     `x` without `axis` dimensions.
 
     """
-    _x = at.as_tensor_variable(x)
+    _x = ptb.as_tensor_variable(x)
 
     if axis is None:
         # By default exclude all broadcastable (length=1) axes
@@ -635,8 +635,8 @@ def compress(condition, x, axis=None):
     `x` with selected slices.
 
     """
-    _x = at.as_tensor_variable(x)
-    indices = at.flatnonzero(condition)
+    _x = ptb.as_tensor_variable(x)
+    indices = ptb.flatnonzero(condition)
     return _x.take(indices, axis=axis)
 
 
@@ -649,8 +649,8 @@ def __init__(self, axis=None):
         self.axis = axis
 
     def make_node(self, x, repeats):
-        x = at.as_tensor_variable(x)
-        repeats = at.as_tensor_variable(repeats)
+        x = ptb.as_tensor_variable(x)
+        repeats = ptb.as_tensor_variable(repeats)
 
         if repeats.dtype not in integer_dtypes:
             raise TypeError("repeats.dtype must be an integer.")
@@ -677,7 +677,7 @@ def make_node(self, x, repeats):
             out_shape = [None]
         else:
             try:
-                const_reps = at.get_underlying_scalar_constant_value(repeats)
+                const_reps = ptb.get_underlying_scalar_constant_value(repeats)
             except NotScalarConstantError:
                 const_reps = None
             if const_reps == 1:
@@ -747,12 +747,12 @@ def infer_shape(self, fgraph, node, ins_shapes):
                         res = res * d
                     out_shape = (res * repeats,)
             else:
-                out_shape = [at_sum(repeats, dtype=dtype)]
+                out_shape = [pt_sum(repeats, dtype=dtype)]
         else:
             if repeats.ndim == 0:
                 out_shape[self.axis] = out_shape[self.axis] * repeats
             else:
-                out_shape[self.axis] = at_sum(repeats, dtype=dtype)
+                out_shape[self.axis] = pt_sum(repeats, dtype=dtype)
         return [out_shape]
 
 
@@ -781,7 +781,7 @@ def repeat(x, repeats, axis=None):
     .. versionadded:: 0.6
 
     """
-    repeats = at.as_tensor_variable(repeats, dtype=np.int64)
+    repeats = ptb.as_tensor_variable(repeats, dtype=np.int64)
 
     if repeats.ndim > 1:
         raise ValueError("The dimension of repeats should not exceed 1.")
@@ -824,7 +824,7 @@ def repeat(x, repeats, axis=None):
         # After the original tensor is duplicated along the additional
         # dimension, we reshape it to the expected output shape, and
         # return the output z.
-        z = at.alloc(x.dimshuffle(*dims_), *shape_).reshape(shape)
+        z = ptb.alloc(x.dimshuffle(*dims_), *shape_).reshape(shape)
         return z
 
 
@@ -833,7 +833,7 @@ class Bartlett(Op):
     __props__ = ()
 
     def make_node(self, M):
-        M = at.as_tensor_variable(M)
+        M = ptb.as_tensor_variable(M)
         if M.ndim != 0:
             raise TypeError(f"{self.__class__.__name__} only works on scalar input")
         elif M.dtype not in integer_dtypes:
@@ -848,7 +848,7 @@ def perform(self, node, inputs, out_):
 
     def infer_shape(self, fgraph, node, in_shapes):
         temp = node.inputs[0]
-        M = at.switch(lt(temp, 0), at.cast(0, temp.dtype), temp)
+        M = ptb.switch(lt(temp, 0), ptb.cast(0, temp.dtype), temp)
         return [[M]]
 
     def grad(self, inputs, output_grads):
@@ -893,8 +893,8 @@ def infer_shape(self, fgraph, node, in_shapes):
         return [in_shapes[0]]
 
     def make_node(self, a, val):
-        a = at.as_tensor_variable(a)
-        val = at.as_tensor_variable(val)
+        a = ptb.as_tensor_variable(a)
+        val = ptb.as_tensor_variable(val)
         if a.ndim < 2:
             raise TypeError(
                 "%s: first parameter must have at least"
@@ -904,7 +904,7 @@ def make_node(self, a, val):
             raise TypeError(
                 f"{self.__class__.__name__}: second parameter must be a scalar"
             )
-        val = at.cast(val, dtype=upcast(a.dtype, val.dtype))
+        val = ptb.cast(val, dtype=upcast(a.dtype, val.dtype))
         if val.dtype != a.dtype:
             raise TypeError(
                 "%s: type of second parameter must be the same as"
@@ -946,7 +946,7 @@ def grad(self, inp, cost_grad):
             )
         wr_a = fill_diagonal(grad, 0)  # valid for any number of dimensions
         # diag is only valid for matrices
-        wr_val = at.diag(grad).sum()
+        wr_val = ptb.diag(grad).sum()
         return [wr_a, wr_val]
 
 
@@ -994,9 +994,9 @@ def infer_shape(self, fgraph, node, in_shapes):
         return [in_shapes[0]]
 
     def make_node(self, a, val, offset):
-        a = at.as_tensor_variable(a)
-        val = at.as_tensor_variable(val)
-        offset = at.as_tensor_variable(offset)
+        a = ptb.as_tensor_variable(a)
+        val = ptb.as_tensor_variable(val)
+        offset = ptb.as_tensor_variable(offset)
         if a.ndim != 2:
             raise TypeError(
                 "%s: first parameter must have exactly"
@@ -1010,7 +1010,7 @@ def make_node(self, a, val, offset):
             raise TypeError(
                 f"{self.__class__.__name__}: third parameter must be a scalar"
             )
-        val = at.cast(val, dtype=upcast(a.dtype, val.dtype))
+        val = ptb.cast(val, dtype=upcast(a.dtype, val.dtype))
         if val.dtype != a.dtype:
             raise TypeError(
                 "%s: type of second parameter must be the same"
@@ -1085,9 +1085,9 @@ def grad(self, inp, cost_grad):
         end = start + step * num_of_step
 
         # input of slice should be integer
-        start = at.cast(start, "int32")
-        step = at.cast(step, "int32")
-        end = at.cast(end, "int32")
+        start = ptb.cast(start, "int32")
+        step = ptb.cast(step, "int32")
+        end = ptb.cast(end, "int32")
 
         wr_val = grad.flatten()[start:end:step].sum()
 
@@ -1153,8 +1153,8 @@ def to_one_hot(y, nb_class, dtype=None):
         the one hot encoding of the corresponding ``y[i]`` value.
 
     """
-    ret = at.zeros((y.shape[0], nb_class), dtype=dtype)
-    ret = set_subtensor(ret[at.arange(y.shape[0]), y], 1)
+    ret = ptb.zeros((y.shape[0], nb_class), dtype=dtype)
+    ret = set_subtensor(ret[ptb.arange(y.shape[0]), y], 1)
     return ret
 
 
@@ -1190,7 +1190,7 @@ def __init__(
         self.axis = axis
 
     def make_node(self, x):
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
         self_axis = self.axis
         if self_axis is None:
             out_shape = (None,)
@@ -1297,8 +1297,8 @@ def __init__(self, order="C"):
         self.order = order
 
     def make_node(self, indices, dims):
-        indices = at.as_tensor_variable(indices)
-        dims = at.as_tensor_variable(dims)
+        indices = ptb.as_tensor_variable(indices)
+        dims = ptb.as_tensor_variable(dims)
 
         if indices.dtype not in int_dtypes:
             raise TypeError(
@@ -1314,7 +1314,7 @@ def make_node(self, indices, dims):
             [indices, dims],
             [
                 TensorType(dtype="int64", shape=(None,) * indices.type.ndim)()
-                for i in range(at.get_vector_length(dims))
+                for i in range(ptb.get_vector_length(dims))
             ],
         )
 
@@ -1378,8 +1378,8 @@ def __init__(self, mode="raise", order="C"):
         self.order = order
 
     def make_node(self, *inp):
-        multi_index = [at.as_tensor_variable(i) for i in inp[:-1]]
-        dims = at.as_tensor_variable(inp[-1])
+        multi_index = [ptb.as_tensor_variable(i) for i in inp[:-1]]
+        dims = ptb.as_tensor_variable(inp[-1])
 
         for i in multi_index:
             if i.dtype not in int_dtypes:
@@ -1453,7 +1453,7 @@ def ravel_multi_index(multi_index, dims, mode="raise", order="C"):
 _runtime_broadcast_assert = Assert("Could not broadcast dimensions.")
 
 
-def broadcast_shape(*arrays, **kwargs) -> tuple[aes.ScalarVariable, ...]:
+def broadcast_shape(*arrays, **kwargs) -> tuple[ps.ScalarVariable, ...]:
     """Compute the shape resulting from broadcasting arrays.
 
     Parameters
@@ -1475,7 +1475,7 @@ def broadcast_shape_iter(
     arrays: Iterable[Union[TensorVariable, tuple[TensorVariable, ...]]],
     arrays_are_shapes: bool = False,
     allow_runtime_broadcast: bool = False,
-) -> tuple[aes.ScalarVariable, ...]:
+) -> tuple[ps.ScalarVariable, ...]:
     r"""Compute the shape resulting from broadcasting arrays.
 
 
@@ -1509,7 +1509,7 @@ def broadcast_shape_iter(
             + tuple(
                 one
                 if sh == 1 or isinstance(sh, Constant) and sh.value == 1
-                else (aes.as_scalar(sh) if not isinstance(sh, Variable) else sh)
+                else (ps.as_scalar(sh) if not isinstance(sh, Variable) else sh)
                 for sh in a
             )
             for a in arrays
@@ -1517,7 +1517,7 @@ def broadcast_shape_iter(
     else:
         max_dims = max(a.ndim for a in arrays)
 
-        _arrays = tuple(at.as_tensor_variable(a) for a in arrays)
+        _arrays = tuple(ptb.as_tensor_variable(a) for a in arrays)
 
         array_shapes = [
             (one,) * (max_dims - a.ndim)
@@ -1556,7 +1556,7 @@ def broadcast_shape_iter(
             if len(const_nb_shapes) == 1:
                 (first_length,) = const_nb_shapes
                 other_lengths = nonconst_nb_shapes
-                first_length = aes.as_scalar(first_length)
+                first_length = ps.as_scalar(first_length)
             else:
                 first_length, *other_lengths = nonconst_nb_shapes
 
@@ -1591,22 +1591,22 @@ def broadcast_shape_iter(
 def geomspace(start, end, steps, base=10.0):
     from pytensor.tensor.math import log
 
-    start = at.as_tensor_variable(start)
-    end = at.as_tensor_variable(end)
+    start = ptb.as_tensor_variable(start)
+    end = ptb.as_tensor_variable(end)
     return base ** linspace(log(start) / log(base), log(end) / log(base), steps)
 
 
 def logspace(start, end, steps, base=10.0):
-    start = at.as_tensor_variable(start)
-    end = at.as_tensor_variable(end)
+    start = ptb.as_tensor_variable(start)
+    end = ptb.as_tensor_variable(end)
     return base ** linspace(start, end, steps)
 
 
 def linspace(start, end, steps):
-    start = at.as_tensor_variable(start)
-    end = at.as_tensor_variable(end)
-    arr = at.arange(steps)
-    arr = at.shape_padright(arr, max(start.ndim, end.ndim))
+    start = ptb.as_tensor_variable(start)
+    end = ptb.as_tensor_variable(end)
+    arr = ptb.arange(steps)
+    arr = ptb.shape_padright(arr, max(start.ndim, end.ndim))
     multiplier = (end - start) / (steps - 1)
     return start + arr * multiplier
 
diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
index 6fa0065ce2..6ab92d086d 100644
--- a/pytensor/tensor/math.py
+++ b/pytensor/tensor/math.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from pytensor import config, printing
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.gradient import DisconnectedType
 from pytensor.graph.basic import Apply, Variable
 from pytensor.graph.op import Op
@@ -340,7 +340,7 @@ class Argmax(COp):
     __props__ = ("axis",)
     _f16_ok = True
 
-    params_type = ParamsType(c_axis=aes.int64)
+    params_type = ParamsType(c_axis=ps.int64)
 
     def __init__(self, axis):
         if axis is not None:
@@ -624,7 +624,7 @@ class Max(NonZeroDimsCAReduce):
     nfunc_spec = ("max", 1, 1)
 
     def __init__(self, axis):
-        super().__init__(aes.scalar_maximum, axis)
+        super().__init__(ps.scalar_maximum, axis)
 
     def clone(self, **kwargs):
         axis = kwargs.get("axis", self.axis)
@@ -635,7 +635,7 @@ class Min(NonZeroDimsCAReduce):
     nfunc_spec = ("min", 1, 1)
 
     def __init__(self, axis):
-        super().__init__(aes.scalar_minimum, axis)
+        super().__init__(ps.scalar_minimum, axis)
 
     def clone(self, **kwargs):
         axis = kwargs.get("axis", self.axis)
@@ -1517,7 +1517,7 @@ class Mean(FixedOpCAReduce):
     nfunc_spec = ("mean", 1, 1)
 
     def __init__(self, axis=None):
-        super().__init__(aes.mean, axis)
+        super().__init__(ps.mean, axis)
         assert self.axis is None or len(self.axis) == 1
 
     def __str__(self):
@@ -1839,13 +1839,13 @@ def ceil_intdiv(a, b):
     # is faster or not. But this is not safe for int64, because the cast will
     # lose precision. For example:
     #     cast(cast(a, scalar.upcast(a.type.dtype, 'float32')) / b,
-    #          aes.upcast(a.type.dtype, b.type.dtype))
+    #          ps.upcast(a.type.dtype, b.type.dtype))
 
     # We cast for the case when a and b are uint*; otherwise, neq will
     # force their upcast to int.
     div = int_div(a, b)
     ret = cast(neq(a % b, 0), div.dtype) + div
-    assert ret.dtype == aes.upcast(
+    assert ret.dtype == ps.upcast(
         div.owner.inputs[0].type.dtype, div.owner.inputs[1].type.dtype
     )
     return ret
@@ -1858,7 +1858,7 @@ def mod_check(x, y):
         or as_tensor_variable(y).dtype in complex_dtypes
     ):
         # Currently forbidden.
-        raise aes.Mod.complex_error
+        raise ps.Mod.complex_error
     else:
         return mod(x, y)
 
@@ -1947,7 +1947,7 @@ def make_node(self, *inputs):
             sz = sx[:-1]
 
         i_dtypes = [input.type.dtype for input in inputs]
-        outputs = [tensor(dtype=aes.upcast(*i_dtypes), shape=sz)]
+        outputs = [tensor(dtype=ps.upcast(*i_dtypes), shape=sz)]
         return Apply(self, inputs, outputs)
 
     def perform(self, node, inp, out):
@@ -2381,7 +2381,7 @@ class All(FixedOpCAReduce):
     nfunc_spec = ("all", 1, 1)
 
     def __init__(self, axis=None):
-        super().__init__(aes.and_, axis)
+        super().__init__(ps.and_, axis)
 
     def _output_dtype(self, idtype):
         return "bool"
@@ -2411,7 +2411,7 @@ class Any(FixedOpCAReduce):
     nfunc_spec = ("any", 1, 1)
 
     def __init__(self, axis=None):
-        super().__init__(aes.or_, axis)
+        super().__init__(ps.or_, axis)
 
     def _output_dtype(self, idtype):
         return "bool"
@@ -2446,7 +2446,7 @@ class Sum(FixedOpCAReduce):
 
     def __init__(self, axis=None, dtype=None, acc_dtype=None):
         super().__init__(
-            aes.add,
+            ps.add,
             axis=axis,
             dtype=dtype,
             acc_dtype=acc_dtype,
@@ -2475,7 +2475,7 @@ def L_op(self, inp, out, grads):
                 new_dims.append(i)
                 i += 1
         ds_op = DimShuffle(gz.type.broadcastable, new_dims)
-        gx = Elemwise(aes.second)(x, ds_op(gz))
+        gx = Elemwise(ps.second)(x, ds_op(gz))
         return [gx]
 
     def R_op(self, inputs, eval_points):
@@ -2537,7 +2537,7 @@ class Prod(FixedOpCAReduce):
 
     def __init__(self, axis=None, dtype=None, acc_dtype=None, no_zeros_in_input=False):
         super().__init__(
-            aes.mul,
+            ps.mul,
             axis=axis,
             dtype=dtype,
             acc_dtype=acc_dtype,
@@ -2751,7 +2751,7 @@ def c_code_cache_version(self):
         return (1,)
 
 
-mul_without_zeros = MulWithoutZeros(aes.upcast_out, name="mul_without_zeros")
+mul_without_zeros = MulWithoutZeros(ps.upcast_out, name="mul_without_zeros")
 
 
 class ProdWithoutZeros(FixedOpCAReduce):
diff --git a/pytensor/tensor/nlinalg.py b/pytensor/tensor/nlinalg.py
index 17d551f8c3..c8805c38e1 100644
--- a/pytensor/tensor/nlinalg.py
+++ b/pytensor/tensor/nlinalg.py
@@ -3,12 +3,12 @@
 
 import numpy as np
 
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.gradient import DisconnectedType
 from pytensor.graph.basic import Apply
 from pytensor.graph.op import Op
-from pytensor.tensor import basic as at
-from pytensor.tensor import math as tm
+from pytensor.tensor import basic as ptb
+from pytensor.tensor import math as ptm
 from pytensor.tensor.basic import as_tensor_variable, diagonal
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.type import dvector, lscalar, matrix, scalar, vector
@@ -47,13 +47,13 @@ def L_op(self, inputs, outputs, g_outputs):
         (z,) = outputs
         (gz,) = g_outputs
 
-        x_dot_z = tm.dot(x, z)
-        z_dot_x = tm.dot(z, x)
+        x_dot_z = ptm.dot(x, z)
+        z_dot_x = ptm.dot(z, x)
 
         grad = (
             -matrix_dot(z, gz.T, z)
-            + matrix_dot(z, z.T, gz, (at.identity_like(x_dot_z) - x_dot_z))
-            + matrix_dot((at.identity_like(z_dot_x) - z_dot_x), gz, z.T, z)
+            + matrix_dot(z, z.T, gz, (ptb.identity_like(x_dot_z) - x_dot_z))
+            + matrix_dot((ptb.identity_like(z_dot_x) - z_dot_x), gz, z.T, z)
         ).T
         return [grad]
 
@@ -127,7 +127,7 @@ def grad(self, inputs, g_outputs):
         (x,) = inputs
         xi = self(x)
         (gz,) = g_outputs
-        # tm.dot(gz.T,xi)
+        # ptm.dot(gz.T,xi)
         return [-matrix_dot(xi, gz.T, xi).T]
 
     def R_op(self, inputs, eval_points):
@@ -167,7 +167,7 @@ def matrix_dot(*args):
     """
     rval = args[0]
     for a in args[1:]:
-        rval = tm.dot(rval, a)
+        rval = ptm.dot(rval, a)
     return rval
 
 
@@ -383,7 +383,7 @@ def make_node(self, x, w, v, gw, gv):
         assert v.ndim == 2
         assert gw.ndim == 1
         assert gv.ndim == 2
-        out_dtype = aes.upcast(x.dtype, w.dtype, v.dtype, gw.dtype, gv.dtype)
+        out_dtype = ps.upcast(x.dtype, w.dtype, v.dtype, gw.dtype, gv.dtype)
         out = matrix(dtype=out_dtype)
         return Apply(self, [x, w, v, gw, gv], [out])
 
@@ -573,7 +573,7 @@ def perform(self, node, inputs, outputs):
     def infer_shape(self, fgraph, node, shapes):
         (x_shape,) = shapes
         M, N = x_shape
-        K = tm.minimum(M, N)
+        K = ptm.minimum(M, N)
         s_shape = (K,)
         if self.compute_uv:
             u_shape = (M, M) if self.full_matrices else (M, K)
@@ -655,24 +655,24 @@ def matrix_power(M, n):
 
     # Shortcuts when 0 < n <= 3
     if n == 0:
-        return at.eye(M.shape[-2])
+        return ptb.eye(M.shape[-2])
 
     elif n == 1:
         return M
 
     elif n == 2:
-        return tm.dot(M, M)
+        return ptm.dot(M, M)
 
     elif n == 3:
-        return tm.dot(tm.dot(M, M), M)
+        return ptm.dot(ptm.dot(M, M), M)
 
     result = z = None
 
     while n > 0:
-        z = M if z is None else tm.dot(z, z)
+        z = M if z is None else ptm.dot(z, z)
         n, bit = divmod(n, 2)
         if bit:
-            result = z if result is None else tm.dot(result, z)
+            result = z if result is None else ptm.dot(result, z)
 
     return result
 
@@ -684,30 +684,30 @@ def norm(x, ord):
         raise ValueError("'axis' entry is out of bounds.")
     elif ndim == 1:
         if ord is None:
-            return tm.sum(x**2) ** 0.5
+            return ptm.sum(x**2) ** 0.5
         elif ord == "inf":
-            return tm.max(abs(x))
+            return ptm.max(abs(x))
         elif ord == "-inf":
-            return tm.min(abs(x))
+            return ptm.min(abs(x))
         elif ord == 0:
             return x[x.nonzero()].shape[0]
         else:
             try:
-                z = tm.sum(abs(x**ord)) ** (1.0 / ord)
+                z = ptm.sum(abs(x**ord)) ** (1.0 / ord)
             except TypeError:
                 raise ValueError("Invalid norm order for vectors.")
             return z
     elif ndim == 2:
         if ord is None or ord == "fro":
-            return tm.sum(abs(x**2)) ** (0.5)
+            return ptm.sum(abs(x**2)) ** (0.5)
         elif ord == "inf":
-            return tm.max(tm.sum(abs(x), 1))
+            return ptm.max(ptm.sum(abs(x), 1))
         elif ord == "-inf":
-            return tm.min(tm.sum(abs(x), 1))
+            return ptm.min(ptm.sum(abs(x), 1))
         elif ord == 1:
-            return tm.max(tm.sum(abs(x), 0))
+            return ptm.max(ptm.sum(abs(x), 0))
         elif ord == -1:
-            return tm.min(tm.sum(abs(x), 0))
+            return ptm.min(ptm.sum(abs(x), 0))
         else:
             raise ValueError(0)
     elif ndim > 2:
@@ -787,7 +787,7 @@ def __init__(self, axes=None):
     def make_node(self, a, b):
         a = as_tensor_variable(a)
         b = as_tensor_variable(b)
-        out_dtype = aes.upcast(a.dtype, b.dtype)
+        out_dtype = ps.upcast(a.dtype, b.dtype)
         x = matrix(dtype=out_dtype)
         return Apply(self, [a, b], [x])
 
diff --git a/pytensor/tensor/rewriting/basic.py b/pytensor/tensor/rewriting/basic.py
index 98f6d68dab..a2a4ccc2f7 100644
--- a/pytensor/tensor/rewriting/basic.py
+++ b/pytensor/tensor/rewriting/basic.py
@@ -27,7 +27,7 @@
 
 import numpy as np
 
-import pytensor.scalar.basic as aes
+import pytensor.scalar.basic as ps
 from pytensor import compile
 from pytensor.compile.ops import ViewOp
 from pytensor.graph import FunctionGraph
@@ -551,7 +551,7 @@ def local_useless_elemwise(fgraph, node):
         # cleaner graph.
         dtype = node.outputs[0].dtype
 
-        if node.op.scalar_op == aes.eq and len(node.inputs) == 2:
+        if node.op.scalar_op == ps.eq and len(node.inputs) == 2:
             if node.inputs[0] == node.inputs[1]:
                 # it is the same var in the graph. That will always be true
                 ret = ones_like(node.inputs[0], dtype=dtype, opt=True)
@@ -559,7 +559,7 @@ def local_useless_elemwise(fgraph, node):
                 # Copy stack trace from input to constant output
                 copy_stack_trace(node.outputs[0], ret)
                 return [ret]
-        elif node.op.scalar_op == aes.neq and len(node.inputs) == 2:
+        elif node.op.scalar_op == ps.neq and len(node.inputs) == 2:
             if node.inputs[0] == node.inputs[1]:
                 # it is the same var in the graph. That will always be false
                 ret = zeros_like(node.inputs[0], dtype=dtype, opt=True)
@@ -568,17 +568,17 @@ def local_useless_elemwise(fgraph, node):
                 copy_stack_trace(node.outputs[0], ret)
                 return [ret]
 
-        elif node.op.scalar_op == aes.mul and len(node.inputs) == 1:
+        elif node.op.scalar_op == ps.mul and len(node.inputs) == 1:
             # No need to copy over any stack trace
             return [node.inputs[0]]
 
-        elif node.op.scalar_op == aes.add and len(node.inputs) == 1:
+        elif node.op.scalar_op == ps.add and len(node.inputs) == 1:
             # No need to copy over any stack trace
             return [node.inputs[0]]
-        elif node.op.scalar_op == aes.identity and len(node.inputs) == 1:
+        elif node.op.scalar_op == ps.identity and len(node.inputs) == 1:
             return [node.inputs[0]]
 
-        elif isinstance(node.op.scalar_op, aes.AND) and len(node.inputs) == 2:
+        elif isinstance(node.op.scalar_op, ps.AND) and len(node.inputs) == 2:
             if isinstance(node.inputs[0], TensorConstant):
                 const_val = extract_constant(
                     node.inputs[0], only_process_constants=True
@@ -603,7 +603,7 @@ def local_useless_elemwise(fgraph, node):
                         # and this rewrite would be wrong
                         return [node.inputs[0].astype(node.outputs[0].dtype)]
 
-        elif isinstance(node.op.scalar_op, aes.OR) and len(node.inputs) == 2:
+        elif isinstance(node.op.scalar_op, ps.OR) and len(node.inputs) == 2:
             if isinstance(node.inputs[0], TensorConstant):
                 const_val = extract_constant(
                     node.inputs[0], only_process_constants=True
@@ -628,7 +628,7 @@ def local_useless_elemwise(fgraph, node):
                         # and this rewrite would be wrong
                         return [ones_like(node.inputs[0], dtype=dtype, opt=True)]
 
-        elif isinstance(node.op.scalar_op, aes.XOR) and len(node.inputs) == 2:
+        elif isinstance(node.op.scalar_op, ps.XOR) and len(node.inputs) == 2:
             if node.inputs[0] is node.inputs[1]:
                 return [zeros_like(node.inputs[0], dtype=dtype, opt=True)]
 
@@ -666,13 +666,13 @@ def local_cast_cast(fgraph, node):
           and the first cast cause an upcast.
 
     """
-    if not isinstance(node.op, Elemwise) or not isinstance(node.op.scalar_op, aes.Cast):
+    if not isinstance(node.op, Elemwise) or not isinstance(node.op.scalar_op, ps.Cast):
         return
     x = node.inputs[0]
     if (
         not x.owner
         or not isinstance(x.owner.op, Elemwise)
-        or not isinstance(x.owner.op.scalar_op, aes.Cast)
+        or not isinstance(x.owner.op.scalar_op, ps.Cast)
     ):
         return
 
@@ -1016,7 +1016,7 @@ def local_useless_switch(fgraph, node):
     if (
         cond_var.owner
         and isinstance(cond_var.owner.op, Elemwise)
-        and isinstance(cond_var.owner.op.scalar_op, aes.LE)
+        and isinstance(cond_var.owner.op.scalar_op, ps.LE)
         and cond_var.owner.inputs[0].owner
         and isinstance(cond_var.owner.inputs[0].owner.op, Shape_i)
         and extract_constant(cond_var.owner.inputs[1], only_process_constants=True) == 0
@@ -1039,14 +1039,14 @@ def local_merge_switch_same_cond(fgraph, node):
     """
     # node must be binary elemwise or add or mul
     if not isinstance(node.op, Elemwise) or not isinstance(
-        node.op.scalar_op, (aes.BinaryScalarOp, aes.Add, aes.Mul)
+        node.op.scalar_op, (ps.BinaryScalarOp, ps.Add, ps.Mul)
     ):
         return
     # all inputs must be switch
     if not all(
         s.owner
         and isinstance(s.owner.op, Elemwise)
-        and isinstance(s.owner.op.scalar_op, aes.Switch)
+        and isinstance(s.owner.op.scalar_op, ps.Switch)
         for s in node.inputs
     ):
         return
diff --git a/pytensor/tensor/rewriting/blas.py b/pytensor/tensor/rewriting/blas.py
index 7434fd7e1c..bac08755c7 100644
--- a/pytensor/tensor/rewriting/blas.py
+++ b/pytensor/tensor/rewriting/blas.py
@@ -81,7 +81,7 @@
 )
 from pytensor.graph.rewriting.db import SequenceDB
 from pytensor.graph.utils import InconsistencyError
-from pytensor.tensor import basic as at
+from pytensor.tensor import basic as ptb
 from pytensor.tensor.blas import (
     Dot22,
     _dot22,
@@ -137,7 +137,7 @@ def _as_scalar(res, dtype=None):
             # as the cast of the scalar can be done before or after the dot22
             # and this will give the same result.
             if pytensor.scalar.upcast(res.dtype, dtype) == dtype:
-                return at.cast(rval, dtype)
+                return ptb.cast(rval, dtype)
             else:
                 return None
 
@@ -347,9 +347,9 @@ def _gemm_from_factored_list(fgraph, lst):
         # sM can be a tuple of 2 elements or an PyTensor variable.
         if isinstance(sM, tuple):
             sm0, sm1 = sM
-            sm0 = at.as_tensor_variable(sm0)
+            sm0 = ptb.as_tensor_variable(sm0)
             if pytensor.scalar.upcast(sm0.dtype, sm1.dtype) == sm1.dtype:
-                lst2.append((at.cast(sm0, sm1.dtype), sM[1]))
+                lst2.append((ptb.cast(sm0, sm1.dtype), sM[1]))
 
     lst = lst2
 
@@ -657,7 +657,7 @@ def local_gemm_to_ger(fgraph, node):
             xv = x.dimshuffle(0)
             yv = y.dimshuffle(1)
             try:
-                bval = at.get_underlying_scalar_constant_value(b)
+                bval = ptb.get_underlying_scalar_constant_value(b)
             except NotScalarConstantError:
                 # b isn't a constant, GEMM is doing useful pre-scaling
                 return
@@ -666,7 +666,7 @@ def local_gemm_to_ger(fgraph, node):
                 rval = ger(z, a, xv, yv)
                 new_out = [rval]
             elif bval == 0:  # GER on zeros_like should be faster than GEMM
-                zeros = at.zeros([x.shape[0], y.shape[1]], x.dtype)
+                zeros = ptb.zeros([x.shape[0], y.shape[1]], x.dtype)
                 rval = ger(zeros, a, xv, yv)
                 new_out = [rval]
             else:
@@ -686,32 +686,32 @@ def local_dot22_to_ger_or_gemv(fgraph, node):
         x, y = node.inputs
         xb = x.broadcastable
         yb = y.broadcastable
-        one = at.as_tensor_variable(np.asarray(1, dtype=x.dtype))
-        zero = at.as_tensor_variable(np.asarray(0, dtype=x.dtype))
+        one = ptb.as_tensor_variable(np.asarray(1, dtype=x.dtype))
+        zero = ptb.as_tensor_variable(np.asarray(0, dtype=x.dtype))
         if xb[1] and yb[0]:
             # x and y are both vectors so this might qualifies for a GER
             xv = x.dimshuffle(0)
             yv = y.dimshuffle(1)
-            zeros = at.zeros([x.shape[0], y.shape[1]], dtype=x.dtype)
+            zeros = ptb.zeros([x.shape[0], y.shape[1]], dtype=x.dtype)
             rval = ger(zeros, one, xv, yv)
             new_out = [rval]
         elif xb[0] and yb[1]:
             # x and y are both vectors so this qualifies for a sdot / ddot
             # TODO: PyTensor doesn't have a sdot, but gemv is better than _dot22
             xv = x.dimshuffle(1)
-            zeros = at.AllocEmpty(x.dtype)(1)
+            zeros = ptb.AllocEmpty(x.dtype)(1)
             rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
             new_out = [rval.dimshuffle("x", 0)]
         elif xb[0] and not yb[0] and not yb[1]:
             # x is vector, y is matrix so try gemv
             xv = x.dimshuffle(1)
-            zeros = at.AllocEmpty(x.dtype)(y.shape[1])
+            zeros = ptb.AllocEmpty(x.dtype)(y.shape[1])
             rval = gemv_no_inplace(zeros, one, y.T, xv, zero)
             new_out = [rval.dimshuffle("x", 0)]
         elif not xb[0] and not xb[1] and yb[1]:
             # x is matrix, y is vector, try gemv
             yv = y.dimshuffle(0)
-            zeros = at.AllocEmpty(x.dtype)(x.shape[0])
+            zeros = ptb.AllocEmpty(x.dtype)(x.shape[0])
             rval = gemv_no_inplace(zeros, one, x, yv, zero)
             new_out = [rval.dimshuffle(0, "x")]
         else:
@@ -845,7 +845,9 @@ def local_dot22_to_dot22scalar(fgraph, node):
                 " matrix type"
             )
             return False
-        a = at.cast(_as_scalar(m.owner.inputs[scalar_idx], dtype=d.dtype), d.type.dtype)
+        a = ptb.cast(
+            _as_scalar(m.owner.inputs[scalar_idx], dtype=d.dtype), d.type.dtype
+        )
         assert not a.type.ndim
         dot = _dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)
 
@@ -883,7 +885,7 @@ def local_dot22_to_dot22scalar(fgraph, node):
     o.remove(d)
     o.remove(s)
 
-    a = at.cast(i_scalar[scalar_idx], d.type.dtype)
+    a = ptb.cast(i_scalar[scalar_idx], d.type.dtype)
     assert not a.type.ndim
     if len(o) == 0:
         return [_dot22scalar(d.owner.inputs[0], d.owner.inputs[1], a)]
diff --git a/pytensor/tensor/rewriting/blas_c.py b/pytensor/tensor/rewriting/blas_c.py
index 77629dccca..1723cf36f8 100644
--- a/pytensor/tensor/rewriting/blas_c.py
+++ b/pytensor/tensor/rewriting/blas_c.py
@@ -1,6 +1,6 @@
 from pytensor.configdefaults import config
 from pytensor.graph.rewriting.basic import in2out
-from pytensor.tensor import basic as at
+from pytensor.tensor import basic as ptb
 from pytensor.tensor.blas import gemv_inplace, gemv_no_inplace, ger, ger_destructive
 from pytensor.tensor.blas_c import (
     CGemv,
@@ -47,10 +47,10 @@ def make_c_gemv_destructive(fgraph, node):
         dest = inputs[0]
         if (
             dest.owner
-            and isinstance(dest.owner.op, at.AllocEmpty)
+            and isinstance(dest.owner.op, ptb.AllocEmpty)
             and len(fgraph.clients[dest]) > 1
         ):
-            inputs[0] = at.AllocEmpty(dest.dtype)(*dest.owner.inputs)
+            inputs[0] = ptb.AllocEmpty(dest.dtype)(*dest.owner.inputs)
 
         return [cgemv_inplace(*inputs)]
 
diff --git a/pytensor/tensor/rewriting/elemwise.py b/pytensor/tensor/rewriting/elemwise.py
index 6f2fcd2302..43aecb8816 100644
--- a/pytensor/tensor/rewriting/elemwise.py
+++ b/pytensor/tensor/rewriting/elemwise.py
@@ -6,7 +6,7 @@
 from warnings import warn
 
 import pytensor
-import pytensor.scalar.basic as aes
+import pytensor.scalar.basic as ps
 from pytensor import clone_replace, compile
 from pytensor.compile.mode import get_target_language
 from pytensor.configdefaults import config
@@ -79,7 +79,7 @@ def candidate_input_idxs(self, node):
         #  and ScalarLoops
         if isinstance(node.op.scalar_op, ScalarLoop):
             return []
-        if isinstance(node.op.scalar_op, aes.Composite) and (len(node.outputs) > 1):
+        if isinstance(node.op.scalar_op, ps.Composite) and (len(node.outputs) > 1):
             return []
         else:
             return range(len(node.outputs))
@@ -278,7 +278,7 @@ def apply(self, fgraph):
                     try:
                         if hasattr(op.scalar_op, "make_new_inplace"):
                             new_scal = op.scalar_op.make_new_inplace(
-                                aes.transfer_type(
+                                ps.transfer_type(
                                     *[
                                         inplace_pattern.get(i, o.dtype)
                                         for i, o in enumerate(node.outputs)
@@ -287,7 +287,7 @@ def apply(self, fgraph):
                             )
                         else:
                             new_scal = op.scalar_op.__class__(
-                                aes.transfer_type(
+                                ps.transfer_type(
                                     *[
                                         inplace_pattern.get(i, None)
                                         for i in range(len(node.outputs))
@@ -503,8 +503,8 @@ def local_upcast_elemwise_constant_inputs(fgraph, node):
         scalar_op = node.op.scalar_op
         # print "aa", scalar_op.output_types_preference
         if getattr(scalar_op, "output_types_preference", None) in (
-            aes.upgrade_to_float,
-            aes.upcast_out,
+            ps.upgrade_to_float,
+            ps.upcast_out,
         ):
             # this is the kind of op that we can screw with the input
             # dtypes by upcasting explicitly
@@ -570,7 +570,7 @@ def local_add_mul_fusion(fgraph, node):
     but it catches a few edge cases that are not canonicalized by it
     """
     if not isinstance(node.op, Elemwise) or not isinstance(
-        node.op.scalar_op, (aes.Add, aes.Mul)
+        node.op.scalar_op, (ps.Add, ps.Mul)
     ):
         return False
 
@@ -634,7 +634,7 @@ def elemwise_to_scalar(inputs, outputs):
         middle_inputs = []
 
         scalar_inputs = [
-            aes.get_scalar_type(inp.type.dtype).make_variable() for inp in inputs
+            ps.get_scalar_type(inp.type.dtype).make_variable() for inp in inputs
         ]
         middle_scalar_inputs = []
 
@@ -648,7 +648,7 @@ def elemwise_to_scalar(inputs, outputs):
                         middle_scalar_inputs[middle_inputs.index(inp)]
                     )
                 else:
-                    new_scalar_input = aes.get_scalar_type(
+                    new_scalar_input = ps.get_scalar_type(
                         inp.type.dtype
                     ).make_variable()
                     node_scalar_inputs.append(new_scalar_input)
@@ -721,7 +721,7 @@ def elemwise_scalar_op_has_c_code(node: Apply) -> bool:
                     out_maybe_fuseable = (
                         out.owner
                         and isinstance(out.owner.op, Elemwise)
-                        # and not isinstance(out.owner.op.scalar_op, aes.Composite)
+                        # and not isinstance(out.owner.op.scalar_op, ps.Composite)
                         and len(out.owner.outputs) == 1
                         and elemwise_scalar_op_has_c_code(out.owner)
                     )
@@ -730,7 +730,7 @@ def elemwise_scalar_op_has_c_code(node: Apply) -> bool:
                             out_maybe_fuseable
                             and not isinstance(client, str)  # "output"
                             and isinstance(client.op, Elemwise)
-                            # and not isinstance(client.op.scalar_op, aes.Composite)
+                            # and not isinstance(client.op.scalar_op, ps.Composite)
                             and len(client.outputs) == 1
                             and out.type.broadcastable
                             == client.outputs[0].type.broadcastable
@@ -1033,7 +1033,7 @@ def update_fuseable_mappings_after_fg_replace(
                 break
 
             scalar_inputs, scalar_outputs = self.elemwise_to_scalar(inputs, outputs)
-            composite_outputs = Elemwise(aes.Composite(scalar_inputs, scalar_outputs))(
+            composite_outputs = Elemwise(ps.Composite(scalar_inputs, scalar_outputs))(
                 *inputs
             )
             if not isinstance(composite_outputs, list):
@@ -1096,7 +1096,7 @@ def print_profile(stream, prof, level=0):
 def local_useless_composite_outputs(fgraph, node):
     """Remove inputs and outputs of Composite Ops that are not used anywhere."""
     if not isinstance(node.op, Elemwise) or not isinstance(
-        node.op.scalar_op, aes.Composite
+        node.op.scalar_op, ps.Composite
     ):
         return
     comp = node.op.scalar_op
@@ -1117,7 +1117,7 @@ def local_useless_composite_outputs(fgraph, node):
         node.outputs
     ):
         used_inputs = [node.inputs[i] for i in used_inputs_idxs]
-        c = aes.Composite(inputs=used_inner_inputs, outputs=used_inner_outputs)
+        c = ps.Composite(inputs=used_inner_inputs, outputs=used_inner_outputs)
         e = Elemwise(scalar_op=c)(*used_inputs, return_list=True)
         return dict(zip([node.outputs[i] for i in used_outputs_idxs], e))
 
@@ -1131,7 +1131,7 @@ def local_careduce_fusion(fgraph, node):
 
     # FIXME: This check is needed because of the faulty logic in the FIXME below!
     # Right now, rewrite only works for `Sum`/`Prod`
-    if not isinstance(car_scalar_op, (aes.Add, aes.Mul)):
+    if not isinstance(car_scalar_op, (ps.Add, ps.Mul)):
         return None
 
     elm_node = car_input.owner
@@ -1175,19 +1175,19 @@ def local_careduce_fusion(fgraph, node):
     car_acc_dtype = node.op.acc_dtype
 
     scalar_elm_inputs = [
-        aes.get_scalar_type(inp.type.dtype).make_variable() for inp in elm_inputs
+        ps.get_scalar_type(inp.type.dtype).make_variable() for inp in elm_inputs
     ]
 
     elm_output = elm_scalar_op(*scalar_elm_inputs)
 
     # This input represents the previous value in the `CAReduce` binary reduction
-    carried_car_input = aes.get_scalar_type(car_acc_dtype).make_variable()
+    carried_car_input = ps.get_scalar_type(car_acc_dtype).make_variable()
 
     scalar_fused_output = car_scalar_op(carried_car_input, elm_output)
     if scalar_fused_output.type.dtype != car_acc_dtype:
-        scalar_fused_output = aes.cast(scalar_fused_output, car_acc_dtype)
+        scalar_fused_output = ps.cast(scalar_fused_output, car_acc_dtype)
 
-    fused_scalar_op = aes.Composite(
+    fused_scalar_op = ps.Composite(
         inputs=[carried_car_input] + scalar_elm_inputs, outputs=[scalar_fused_output]
     )
 
@@ -1213,7 +1213,7 @@ def local_inline_composite_constants(fgraph, node):
     """Inline scalar constants in Composite graphs."""
     composite_op = node.op.scalar_op
 
-    if not isinstance(composite_op, aes.Composite):
+    if not isinstance(composite_op, ps.Composite):
         return None
 
     new_outer_inputs = []
@@ -1224,7 +1224,7 @@ def local_inline_composite_constants(fgraph, node):
         if "complex" not in outer_inp.type.dtype:
             unique_value = get_unique_constant_value(outer_inp)
             if unique_value is not None:
-                inner_replacements[inner_inp] = aes.constant(
+                inner_replacements[inner_inp] = ps.constant(
                     unique_value, dtype=inner_inp.dtype
                 )
                 continue
@@ -1237,7 +1237,7 @@ def local_inline_composite_constants(fgraph, node):
     new_inner_outs = clone_replace(
         composite_op.fgraph.outputs, replace=inner_replacements
     )
-    new_composite_op = aes.Composite(new_inner_inputs, new_inner_outs)
+    new_composite_op = ps.Composite(new_inner_inputs, new_inner_outs)
     new_outputs = Elemwise(new_composite_op).make_node(*new_outer_inputs).outputs
 
     # Some of the inlined constants were broadcasting the output shape
diff --git a/pytensor/tensor/rewriting/extra_ops.py b/pytensor/tensor/rewriting/extra_ops.py
index 945433f2a4..92d1f04e9a 100644
--- a/pytensor/tensor/rewriting/extra_ops.py
+++ b/pytensor/tensor/rewriting/extra_ops.py
@@ -1,4 +1,4 @@
-import pytensor.scalar.basic as aes
+import pytensor.scalar.basic as ps
 from pytensor.graph.rewriting.basic import node_rewriter
 from pytensor.tensor.basic import Alloc, as_tensor_variable
 from pytensor.tensor.elemwise import Elemwise
@@ -117,7 +117,7 @@ def local_Unique_second(fgraph, node):
     if not (
         second_var.owner
         and isinstance(second_var.owner.op, Elemwise)
-        and isinstance(second_var.owner.op.scalar_op, aes.Second)
+        and isinstance(second_var.owner.op.scalar_op, ps.Second)
     ):
         return False
 
diff --git a/pytensor/tensor/rewriting/jax.py b/pytensor/tensor/rewriting/jax.py
index 138355cdd1..59e701d328 100644
--- a/pytensor/tensor/rewriting/jax.py
+++ b/pytensor/tensor/rewriting/jax.py
@@ -1,4 +1,4 @@
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import optdb
 from pytensor.graph.rewriting.basic import in2out, node_rewriter
 from pytensor.tensor.basic import MakeVector
@@ -37,10 +37,10 @@ def boolean_indexing_set_or_inc(fgraph, node):
         return
 
     if op.set_instead_of_inc:
-        out = at.where(cond, y, x)
+        out = pt.where(cond, y, x)
         return out.owner.outputs
     else:
-        out = at.where(cond, x + y, x)
+        out = pt.where(cond, x + y, x)
         return out.owner.outputs
 
 
@@ -91,7 +91,7 @@ def boolean_indexing_sum(fgraph, node):
 
     # Output must be a scalar, since pure boolean indexing returns a vector
     # No need to worry about axis
-    out = at.sum(at.where(cond, x, 0))
+    out = pt.sum(pt.where(cond, x, 0))
     return out.owner.outputs
 
 
diff --git a/pytensor/tensor/rewriting/math.py b/pytensor/tensor/rewriting/math.py
index 67dc8eedeb..5309abe882 100644
--- a/pytensor/tensor/rewriting/math.py
+++ b/pytensor/tensor/rewriting/math.py
@@ -7,8 +7,8 @@
 
 import numpy as np
 
-import pytensor.scalar.basic as aes
-import pytensor.scalar.math as aes_math
+import pytensor.scalar.basic as ps
+import pytensor.scalar.math as ps_math
 from pytensor.graph.basic import Constant, Variable
 from pytensor.graph.rewriting.basic import (
     NodeRewriter,
@@ -50,7 +50,7 @@
     Sum,
     _conj,
 )
-from pytensor.tensor.math import abs as at_abs
+from pytensor.tensor.math import abs as pt_abs
 from pytensor.tensor.math import (
     add,
     digamma,
@@ -69,9 +69,9 @@
     log1p,
     makeKeepDims,
 )
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.math import maximum, mul, neg, polygamma
-from pytensor.tensor.math import pow as at_pow
+from pytensor.tensor.math import pow as pt_pow
 from pytensor.tensor.math import (
     prod,
     reciprocal,
@@ -82,7 +82,7 @@
     sqrt,
     sub,
 )
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tri_gamma, true_div
 from pytensor.tensor.rewriting.basic import (
     alloc_like,
@@ -241,13 +241,13 @@ def local_func_inv(fgraph, node):
 
     """
     inv_pairs = (
-        (aes.Deg2Rad, aes.Rad2Deg),
-        (aes.Cosh, aes.ArcCosh),
-        (aes.Tanh, aes.ArcTanh),
-        (aes.Sinh, aes.ArcSinh),
-        (aes.Conj, aes.Conj),
-        (aes.Neg, aes.Neg),
-        (aes.Reciprocal, aes.Reciprocal),
+        (ps.Deg2Rad, ps.Rad2Deg),
+        (ps.Cosh, ps.ArcCosh),
+        (ps.Tanh, ps.ArcTanh),
+        (ps.Sinh, ps.ArcSinh),
+        (ps.Conj, ps.Conj),
+        (ps.Neg, ps.Neg),
+        (ps.Reciprocal, ps.Reciprocal),
     )
     x = node.inputs[0]
 
@@ -288,7 +288,7 @@ def local_exp_log(fgraph, node):
     node_op = node.op.scalar_op
 
     # Case for log(exp(x)) -> x
-    if isinstance(prev_op, aes.Exp) and isinstance(node_op, aes.Log):
+    if isinstance(prev_op, ps.Exp) and isinstance(node_op, ps.Log):
         new_out = x.owner.inputs[0]
         old_out = node.outputs[0]
         # Exp may have cast integer input to float
@@ -297,7 +297,7 @@ def local_exp_log(fgraph, node):
         return [new_out]
 
     # Case for log1p(expm1(x)) -> x
-    if isinstance(prev_op, aes.Expm1) and isinstance(node_op, aes.Log1p):
+    if isinstance(prev_op, ps.Expm1) and isinstance(node_op, ps.Log1p):
         new_out = x.owner.inputs[0]
         old_out = node.outputs[0]
         # Expm1 may have cast integer input to float
@@ -306,12 +306,12 @@ def local_exp_log(fgraph, node):
         return [new_out]
 
     # Case for exp(softplus(x)) aka exp(log1pexp) -> 1 + exp(x)
-    if isinstance(prev_op, aes_math.Softplus) and isinstance(node_op, aes.Exp):
+    if isinstance(prev_op, ps_math.Softplus) and isinstance(node_op, ps.Exp):
         x = x.owner.inputs[0]
         return [add(1, exp(x))]
 
     # Case for expm1(softplus(x)) aka expm1(log1pexp) -> exp(x)
-    if isinstance(prev_op, aes_math.Softplus) and isinstance(node_op, aes.Expm1):
+    if isinstance(prev_op, ps_math.Softplus) and isinstance(node_op, ps.Expm1):
         x = x.owner.inputs[0]
         return [exp(x)]
 
@@ -331,42 +331,42 @@ def local_exp_log_nan_switch(fgraph, node):
     node_op = node.op.scalar_op
 
     # Case for exp(log(x)) -> x
-    if isinstance(prev_op, aes.Log) and isinstance(node_op, aes.Exp):
+    if isinstance(prev_op, ps.Log) and isinstance(node_op, ps.Exp):
         x = x.owner.inputs[0]
         old_out = node.outputs[0]
         new_out = switch(ge(x, 0), x, np.asarray(np.nan, old_out.dtype))
         return [new_out]
 
     # Case for exp(log1p(x)) -> x + 1
-    if isinstance(prev_op, aes.Log1p) and isinstance(node_op, aes.Exp):
+    if isinstance(prev_op, ps.Log1p) and isinstance(node_op, ps.Exp):
         x = x.owner.inputs[0]
         old_out = node.outputs[0]
         new_out = switch(ge(x, -1), add(1, x), np.asarray(np.nan, old_out.dtype))
         return [new_out]
 
     # Case for expm1(log(x)) -> x - 1
-    if isinstance(prev_op, aes.Log) and isinstance(node_op, aes.Expm1):
+    if isinstance(prev_op, ps.Log) and isinstance(node_op, ps.Expm1):
         x = x.owner.inputs[0]
         old_out = node.outputs[0]
         new_out = switch(ge(x, 0), sub(x, 1), np.asarray(np.nan, old_out.dtype))
         return [new_out]
 
     # Case for expm1(log1p(x)) -> x
-    if isinstance(prev_op, aes.Log1p) and isinstance(node_op, aes.Expm1):
+    if isinstance(prev_op, ps.Log1p) and isinstance(node_op, ps.Expm1):
         x = x.owner.inputs[0]
         old_out = node.outputs[0]
         new_out = switch(ge(x, -1), x, np.asarray(np.nan, old_out.dtype))
         return [new_out]
 
     # Case for exp(log1mexp(x)) -> 1 - exp(x)
-    if isinstance(prev_op, aes_math.Log1mexp) and isinstance(node_op, aes.Exp):
+    if isinstance(prev_op, ps_math.Log1mexp) and isinstance(node_op, ps.Exp):
         x = x.owner.inputs[0]
         old_out = node.outputs[0]
         new_out = switch(le(x, 0), sub(1, exp(x)), np.asarray(np.nan, old_out.dtype))
         return [new_out]
 
     # Case for expm1(log1mexp(x)) -> -exp(x)
-    if isinstance(prev_op, aes_math.Log1mexp) and isinstance(node_op, aes.Expm1):
+    if isinstance(prev_op, ps_math.Log1mexp) and isinstance(node_op, ps.Expm1):
         x = x.owner.inputs[0]
         old_out = node.outputs[0]
         new_out = switch(le(x, 0), neg(exp(x)), np.asarray(np.nan, old_out.dtype))
@@ -379,12 +379,12 @@ def local_exp_log_nan_switch(fgraph, node):
 def local_sumsqr2dot(fgraph, node):
     """
     This rewrite detects
-    ``at.sqr(W.dimshuffle("x", 0, 1) * G.dimshuffle(0, "x", 1) ).sum(axis=(1, 2))``
-    and converts it to ``at.dot(at.sqr(G), at.sqr(W).sum(axis=0))``.
+    ``pt.sqr(W.dimshuffle("x", 0, 1) * G.dimshuffle(0, "x", 1) ).sum(axis=(1, 2))``
+    and converts it to ``pt.dot(pt.sqr(G), pt.sqr(W).sum(axis=0))``.
     """
     if (
         isinstance(node.op, Sum)
-        and isinstance(node.op.scalar_op, aes.Add)
+        and isinstance(node.op.scalar_op, ps.Add)
         and node.op.axis == (1, 2)
     ):
         in1 = node.inputs[0]
@@ -393,13 +393,13 @@ def local_sumsqr2dot(fgraph, node):
         if (
             in1.owner
             and isinstance(in1.owner.op, Elemwise)
-            and isinstance(in1.owner.op.scalar_op, aes.Sqr)
+            and isinstance(in1.owner.op.scalar_op, ps.Sqr)
         ):
             in_sqr = in1.owner.inputs[0]
             if (
                 in_sqr.owner
                 and isinstance(in_sqr.owner.op, Elemwise)
-                and isinstance(in_sqr.owner.op.scalar_op, aes.Mul)
+                and isinstance(in_sqr.owner.op.scalar_op, ps.Mul)
                 and len(in_sqr.owner.inputs) == 2
             ):
                 in_mul1, in_mul2 = in_sqr.owner.inputs
@@ -431,13 +431,13 @@ def local_mul_exp_to_exp_add(fgraph, node):
         for n in node.inputs
         if n.owner
         and hasattr(n.owner.op, "scalar_op")
-        and isinstance(n.owner.op.scalar_op, aes.Exp)
+        and isinstance(n.owner.op.scalar_op, ps.Exp)
     ]
     # Can only do any rewrite if there are at least two exp-s
     if len(exps) >= 2:
         # Mul -> add; TrueDiv -> sub
         orig_op, new_op = mul, add
-        if isinstance(node.op.scalar_op, aes.TrueDiv):
+        if isinstance(node.op.scalar_op, ps.TrueDiv):
             orig_op, new_op = true_div, sub
         new_out = exp(new_op(*exps))
         if new_out.dtype != node.outputs[0].dtype:
@@ -450,7 +450,7 @@ def local_mul_exp_to_exp_add(fgraph, node):
             for n in node.inputs
             if not n.owner
             or not hasattr(n.owner.op, "scalar_op")
-            or not isinstance(n.owner.op.scalar_op, aes.Exp)
+            or not isinstance(n.owner.op.scalar_op, ps.Exp)
         ]
         if len(rest) > 0:
             new_out = orig_op(new_out, *rest)
@@ -473,7 +473,7 @@ def local_mul_pow_to_pow_add(fgraph, node):
         if (
             n.owner
             and hasattr(n.owner.op, "scalar_op")
-            and isinstance(n.owner.op.scalar_op, aes.Pow)
+            and isinstance(n.owner.op.scalar_op, ps.Pow)
         ):
             base_node = n.owner.inputs[0]
             # exponent is at n.owner.inputs[1], but we need to store the full node
@@ -487,7 +487,7 @@ def local_mul_pow_to_pow_add(fgraph, node):
     if len(can_rewrite) >= 1:
         # Mul -> add; TrueDiv -> sub
         orig_op, new_op = mul, add
-        if isinstance(node.op.scalar_op, aes.TrueDiv):
+        if isinstance(node.op.scalar_op, ps.TrueDiv):
             orig_op, new_op = true_div, sub
         pow_factors = []
         # Rewrite pow-s having the same base for each different base
@@ -519,14 +519,14 @@ def local_mul_pow_to_pow_add(fgraph, node):
 @node_rewriter([Elemwise])
 def local_expm1(fgraph, node):
     """Detect ``exp(a) - 1`` and convert them to ``expm1(a)``."""
-    if isinstance(node.op, Elemwise) and isinstance(node.op.scalar_op, aes.Sub):
+    if isinstance(node.op, Elemwise) and isinstance(node.op.scalar_op, ps.Sub):
         in1, in2 = node.inputs
         out = node.outputs[0]
 
         if (
             in1.owner
             and isinstance(in1.owner.op, Elemwise)
-            and isinstance(in1.owner.op.scalar_op, aes.Exp)
+            and isinstance(in1.owner.op.scalar_op, ps.Exp)
             and extract_constant(in2, only_process_constants=False) == 1
         ):
             in11 = in1.owner.inputs[0]
@@ -547,28 +547,28 @@ def local_mul_switch_sink(fgraph, node):
     """
     This rewrite makes the following changes in the graph:
 
-        at.mul(A, at.switch(cond, 0, iff), B) -> at.switch(cond, 0, at.mul(A, B, iff))
-        at.mul(A, at.switch(cond, ift, 0), B) -> at.switch(cond, at.mul(A, B, ift), 0)
+        pt.mul(A, pt.switch(cond, 0, iff), B) -> pt.switch(cond, 0, pt.mul(A, B, iff))
+        pt.mul(A, pt.switch(cond, ift, 0), B) -> pt.switch(cond, pt.mul(A, B, ift), 0)
 
     ``A`` and ``B`` being several (or none) symbolic variables.
     This is useful because ``A`` and ``B`` may not be numerically stable and give
     NaN or inf values for cases where the switch returns 0.
-    With this rewrite ``at.grad(at.switch(...))`` has the right behavior.
+    With this rewrite ``pt.grad(pt.switch(...))`` has the right behavior.
 
     Examples
     --------
 
         x -> f(x)
         x -> g(x)
-        y = at.switch(cond, f(x), g(x))
+        y = pt.switch(cond, f(x), g(x))
 
     without the rewrite:
 
-        at.grad(y, x) -> grad(f(x), x) * grad(y, f(x)) + grad(g(x), x) * grad(y, g(x))
+        pt.grad(y, x) -> grad(f(x), x) * grad(y, f(x)) + grad(g(x), x) * grad(y, g(x))
 
     with the rewrite
 
-        at.grad(y, x) -> switch(cond, grad(f(x), x), 0) + switch(cond, 0, grad(g(x), x))
+        pt.grad(y, x) -> switch(cond, grad(f(x), x), 0) + switch(cond, 0, grad(g(x), x))
 
     This will be particularly useful for the lazy ``if`` because we skip an entire
     part of the graph.
@@ -643,8 +643,8 @@ def local_div_switch_sink(fgraph, node):
     """
     This rewrite makes the following changes in the graph:
 
-        at.div(at.switch(cond, 0, iff), A) -> at.switch(cond, 0, at.div(iff, A))
-        at.div(at.switch(cond, ift, 0), A) -> at.switch(cond, at.div(ift, A), 0)
+        pt.div(pt.switch(cond, 0, iff), A) -> pt.switch(cond, 0, pt.div(iff, A))
+        pt.div(pt.switch(cond, ift, 0), A) -> pt.switch(cond, pt.div(ift, A), 0)
 
     where ``A`` is a symbolic variable.
 
@@ -745,7 +745,7 @@ class AlgebraicCanonizer(NodeRewriter):
 
     Examples
     --------
-    >>> import pytensor.tensor as at
+    >>> import pytensor.tensor as pt
     >>> from pytensor.tensor.rewriting.math import AlgebraicCanonizer
     >>> add_canonizer = AlgebraicCanonizer(add, sub, neg, \\
     ...                                    lambda n, d: sum(n) - sum(d))
@@ -1161,7 +1161,7 @@ def mul_calculate(num, denum, aslist=False, out_type=None):
 
     # Make sure we do not accidentally upcast data types.
     if out_type is None:
-        out_dtype = aes.upcast(*[v.dtype for v in (num + denum)])
+        out_dtype = ps.upcast(*[v.dtype for v in (num + denum)])
     else:
         out_dtype = out_type.dtype
     one = _asarray(1, dtype=out_dtype)
@@ -1304,7 +1304,7 @@ def local_elemwise_sub_zeros(fgraph, node):
     if (
         isinstance(node.op, Elemwise)
         and node.op.scalar_op.nin == 2
-        and node.op.scalar_op == aes.sub
+        and node.op.scalar_op == ps.sub
         and node.inputs[0] == node.inputs[1]
     ):
         res = zeros_like(node.inputs[0])
@@ -1360,7 +1360,7 @@ def local_useless_elemwise_comparison(fgraph, node):
 
     # Elemwise[{LT,GT}](X, X) -> Elemwise[zeros](X)
     if (
-        isinstance(node.op.scalar_op, (aes.LT, aes.GT))
+        isinstance(node.op.scalar_op, (ps.LT, ps.GT))
         and node.inputs[0] is node.inputs[1]
     ):
         res = zeros_like(node.inputs[0], dtype=dtype, opt=True)
@@ -1369,7 +1369,7 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [res]
     # Elemwise[{LE,GE}](X, X) -> Elemwise[ones](X)
     if (
-        isinstance(node.op.scalar_op, (aes.LE, aes.GE))
+        isinstance(node.op.scalar_op, (ps.LE, ps.GE))
         and node.inputs[0] is node.inputs[1]
     ):
         res = ones_like(node.inputs[0], dtype=dtype, opt=True)
@@ -1379,7 +1379,7 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [res]
     # Elemwise[{minimum,maximum}](X, X) -> X
     if (
-        isinstance(node.op.scalar_op, (aes.ScalarMinimum, aes.ScalarMaximum))
+        isinstance(node.op.scalar_op, (ps.ScalarMinimum, ps.ScalarMaximum))
         and node.inputs[0] is node.inputs[1]
     ):
         res = node.inputs[0]
@@ -1389,7 +1389,7 @@ def local_useless_elemwise_comparison(fgraph, node):
 
     # Elemwise[LT](X.shape[i], 0) -> Elemwise[zeros](X)
     if (
-        isinstance(node.op.scalar_op, aes.LT)
+        isinstance(node.op.scalar_op, ps.LT)
         and node.inputs[0].owner
         and isinstance(node.inputs[0].owner.op, Shape_i)
         and extract_constant(node.inputs[1], only_process_constants=True) == 0
@@ -1400,7 +1400,7 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [res]
     # Elemwise[GE](X.shape[i], 0) -> Elemwise[ones](X)
     if (
-        isinstance(node.op.scalar_op, aes.GE)
+        isinstance(node.op.scalar_op, ps.GE)
         and node.inputs[0].owner
         and isinstance(node.inputs[0].owner.op, Shape_i)
         and extract_constant(node.inputs[1], only_process_constants=True) == 0
@@ -1411,7 +1411,7 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [res]
     # Elemwise[maximum](X.shape[i], 0) -> X.shape[i]
     if (
-        isinstance(node.op.scalar_op, aes.ScalarMaximum)
+        isinstance(node.op.scalar_op, ps.ScalarMaximum)
         and node.inputs[0].owner
         and isinstance(node.inputs[0].owner.op, Shape_i)
         and extract_constant(node.inputs[1], only_process_constants=True) == 0
@@ -1420,7 +1420,7 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [node.inputs[0]]
     # Elemwise[maximum](0, X.shape[i]) -> X.shape[i]
     if (
-        isinstance(node.op.scalar_op, aes.ScalarMaximum)
+        isinstance(node.op.scalar_op, ps.ScalarMaximum)
         and extract_constant(node.inputs[0], only_process_constants=True) == 0
         and node.inputs[1].owner
         and isinstance(node.inputs[1].owner.op, Shape_i)
@@ -1429,7 +1429,7 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [node.inputs[1]]
     # Elemwise[minimum](X.shape[i], 0) -> 0
     if (
-        isinstance(node.op.scalar_op, aes.ScalarMinimum)
+        isinstance(node.op.scalar_op, ps.ScalarMinimum)
         and node.inputs[0].owner
         and isinstance(node.inputs[0].owner.op, Shape_i)
         and extract_constant(node.inputs[1], only_process_constants=True) == 0
@@ -1441,7 +1441,7 @@ def local_useless_elemwise_comparison(fgraph, node):
 
     # Elemwise[minimum](0, X.shape[i]) -> 0
     if (
-        isinstance(node.op.scalar_op, aes.ScalarMinimum)
+        isinstance(node.op.scalar_op, ps.ScalarMinimum)
         and extract_constant(node.inputs[0], only_process_constants=True) == 0
         and node.inputs[1].owner
         and isinstance(node.inputs[1].owner.op, Shape_i)
@@ -1453,10 +1453,10 @@ def local_useless_elemwise_comparison(fgraph, node):
 
     # Elemwise[LT](add([anything that is shapes]), 0) -> Elemwise[zeros](X)
     if (
-        isinstance(node.op.scalar_op, aes.LT)
+        isinstance(node.op.scalar_op, ps.LT)
         and node.inputs[0].owner
         and isinstance(node.inputs[0].owner.op, Elemwise)
-        and isinstance(node.inputs[0].owner.op.scalar_op, aes.Add)
+        and isinstance(node.inputs[0].owner.op.scalar_op, ps.Add)
         and all(
             isinstance(var.owner and var.owner.op, Shape_i)
             for var in node.inputs[0].owner.inputs
@@ -1469,10 +1469,10 @@ def local_useless_elemwise_comparison(fgraph, node):
         return [res]
     # Elemwise[GE](add([anything that is shapes]), 0) -> Elemwise[ones](X)
     if (
-        isinstance(node.op.scalar_op, aes.GE)
+        isinstance(node.op.scalar_op, ps.GE)
         and node.inputs[0].owner
         and isinstance(node.inputs[0].owner.op, Elemwise)
-        and isinstance(node.inputs[0].owner.op.scalar_op, aes.Add)
+        and isinstance(node.inputs[0].owner.op.scalar_op, ps.Add)
         and all(
             isinstance(var.owner and var.owner.op, Shape_i)
             for var in node.inputs[0].owner.inputs
@@ -1509,7 +1509,7 @@ def investigate(node):
             return all(v.owner and investigate(v.owner) for v in node.inputs)
 
     if (
-        isinstance(node.op.scalar_op, aes.EQ)
+        isinstance(node.op.scalar_op, ps.EQ)
         and node.inputs[0].owner
         and investigate(node.inputs[0].owner)
     ):
@@ -1629,11 +1629,11 @@ def local_reduce_join(fgraph, node):
         if extract_constant(join_node.inputs[0], only_process_constants=True) != 0:
             return
 
-        if isinstance(node.op.scalar_op, (aes.ScalarMaximum, aes.ScalarMinimum)):
+        if isinstance(node.op.scalar_op, (ps.ScalarMaximum, ps.ScalarMinimum)):
             # Support only 2 inputs for now
             if len(join_node.inputs) != 3:
                 return
-        elif not isinstance(node.op.scalar_op, (aes.Add, aes.Mul)):
+        elif not isinstance(node.op.scalar_op, (ps.Add, ps.Mul)):
             return
         elif len(join_node.inputs) <= 2:
             # This is a useless join that should get removed by another rewrite?
@@ -1915,15 +1915,15 @@ def local_div_to_reciprocal(fgraph, node):
 @node_rewriter([reciprocal])
 def local_reciprocal_canon(fgraph, node):
     if node.op == reciprocal:
-        return [at_pow(node.inputs[0], -1.0)]
+        return [pt_pow(node.inputs[0], -1.0)]
     else:
         return False
 
 
 @register_canonicalize
-@node_rewriter([at_pow])
+@node_rewriter([pt_pow])
 def local_pow_canonicalize(fgraph, node):
-    if node.op == at_pow:
+    if node.op == pt_pow:
         cst = get_constant(node.inputs[1])
         if cst == 0:
             return [alloc_like(1, node.outputs[0], fgraph)]
@@ -1962,7 +1962,7 @@ def local_intdiv_by_one(fgraph, node):
 def local_zero_div(fgraph, node):
     """0 / x -> 0"""
     if isinstance(node.op, Elemwise) and isinstance(
-        node.op.scalar_op, (aes.IntDiv, aes.TrueDiv)
+        node.op.scalar_op, (ps.IntDiv, ps.TrueDiv)
     ):
         if get_constant(node.inputs[0]) == 0:
             ret = alloc_like(0, node.outputs[0], fgraph)
@@ -1971,9 +1971,9 @@ def local_zero_div(fgraph, node):
 
 
 @register_specialize
-@node_rewriter([at_pow])
+@node_rewriter([pt_pow])
 def local_pow_specialize(fgraph, node):
-    if node.op == at_pow:
+    if node.op == pt_pow:
         # the idea here is that we have pow(x, y)
         odtype = node.outputs[0].dtype
         xsym = node.inputs[0]
@@ -2007,7 +2007,7 @@ def local_pow_specialize(fgraph, node):
 
 
 @register_specialize
-@node_rewriter([at_pow])
+@node_rewriter([pt_pow])
 def local_pow_to_nested_squaring(fgraph, node):
     """Convert a large power exponent to multiple squaring operations.
 
@@ -2039,11 +2039,11 @@ def local_pow_to_nested_squaring(fgraph, node):
         # 512 is too small for the cpu and too big for some gpu!
         if abs(y) == int(abs(y)) and abs(y) <= 512:
             pow2 = [xsym]
-            pow2_scal = [aes.get_scalar_type(xsym.dtype)()]
+            pow2_scal = [ps.get_scalar_type(xsym.dtype)()]
             y_to_do = abs(y)
             for i in range(int(np.log2(y_to_do))):
                 pow2.append(sqr(pow2[i]))
-                pow2_scal.append(aes.sqr(pow2_scal[i]))
+                pow2_scal.append(ps.sqr(pow2_scal[i]))
             rval1 = None
             rval1_scal = None
             while y_to_do > 0:
@@ -2059,7 +2059,7 @@ def local_pow_to_nested_squaring(fgraph, node):
             if abs(y) > 2:
                 # We fuse all the pow together here to make
                 # compilation faster
-                rval1 = Elemwise(aes.Composite([pow2_scal[0]], [rval1_scal])).make_node(
+                rval1 = Elemwise(ps.Composite([pow2_scal[0]], [rval1_scal])).make_node(
                     xsym
                 )
             if y < 0:
@@ -2202,7 +2202,7 @@ def check_for_x_over_absX(numerators, denominators):
     # TODO: this function should dig/search through dimshuffles
     # This won't catch a dimshuffled absolute value
     for den in list(denominators):
-        if den.owner and den.owner.op == at_abs and den.owner.inputs[0] in numerators:
+        if den.owner and den.owner.op == pt_abs and den.owner.inputs[0] in numerators:
             if den.owner.inputs[0].type.dtype.startswith("complex"):
                 # TODO: Make an Op that projects a complex number to
                 #      have unit length but projects 0 to 0.  That
@@ -2222,7 +2222,7 @@ def check_for_x_over_absX(numerators, denominators):
 
 
 @register_canonicalize
-@node_rewriter([at_abs])
+@node_rewriter([pt_abs])
 def local_abs_lift(fgraph, node):
     """
     Move the abs toward the input.
@@ -2230,13 +2230,13 @@ def local_abs_lift(fgraph, node):
     This is needed for check_for_x_over_absX to apply in more case.
 
     """
-    if node.op == at_abs and node.inputs[0].owner:
+    if node.op == pt_abs and node.inputs[0].owner:
         assert node.nin == 1
         if node.inputs[0].owner.op == mul:
-            return [mul(*[at_abs(i) for i in node.inputs[0].owner.inputs])]
+            return [mul(*[pt_abs(i) for i in node.inputs[0].owner.inputs])]
         if node.inputs[0].owner.op == true_div:
             i = node.inputs[0].owner.inputs
-            return [true_div(at_abs(i[0]), at_abs(i[1]))]
+            return [true_div(pt_abs(i[0]), pt_abs(i[1]))]
 
 
 @register_specialize
@@ -2247,10 +2247,10 @@ def local_abs_merge(fgraph, node):
     need it anymore
 
     """
-    if node.op == mul and sum(i.owner.op == at_abs for i in node.inputs if i.owner) > 1:
+    if node.op == mul and sum(i.owner.op == pt_abs for i in node.inputs if i.owner) > 1:
         inputs = []
         for i in node.inputs:
-            if i.owner and i.owner.op == at_abs:
+            if i.owner and i.owner.op == pt_abs:
                 inputs.append(i.owner.inputs[0])
             elif isinstance(i, Constant):
                 try:
@@ -2264,13 +2264,13 @@ def local_abs_merge(fgraph, node):
                 inputs.append(i)
             else:
                 return False
-        return [at_abs(mul(*inputs))]
+        return [pt_abs(mul(*inputs))]
     if (
         node.op == true_div
-        and sum(i.owner.op == at_abs for i in node.inputs if i.owner) == 2
+        and sum(i.owner.op == pt_abs for i in node.inputs if i.owner) == 2
     ):
         return [
-            at_abs(
+            pt_abs(
                 true_div(node.inputs[0].owner.inputs[0], node.inputs[1].owner.inputs[0])
             )
         ]
@@ -2363,19 +2363,19 @@ def local_log_sum_exp(fgraph, node):
 
     exp_node, axis = sum_node.inputs[0].owner, sum_node.op.axis
     if not exp_node or not (
-        isinstance(exp_node.op, Elemwise) and isinstance(exp_node.op.scalar_op, aes.Exp)
+        isinstance(exp_node.op, Elemwise) and isinstance(exp_node.op.scalar_op, ps.Exp)
     ):
         return
 
     pre_exp = exp_node.inputs[0]
-    max_pre_exp = at_max(pre_exp, axis=axis)
+    max_pre_exp = pt_max(pre_exp, axis=axis)
     max_pre_exp_keepdims = makeKeepDims(pre_exp, max_pre_exp, axis)
 
     # Do not offset when max_pre = -np.inf, to avoid nan in the output
     # Switch statement is placed directly inside sum to break the self-symmetry
     # of the returned output (otherwise the rewrite would not stabilize)
     ret = max_pre_exp + log(
-        at_sum(
+        pt_sum(
             switch(
                 isinf(max_pre_exp_keepdims),
                 exp(max_pre_exp_keepdims),
@@ -2869,7 +2869,7 @@ def check_input(inputs):
     # aaron value
     stab_value = (
         x
-        * at_pow(1 - 1 / (2 * (x**2)) + 3 / (4 * (x**4)) - 15 / (8 * (x**6)), -1)
+        * pt_pow(1 - 1 / (2 * (x**2)) + 3 / (4 * (x**4)) - 15 / (8 * (x**6)), -1)
         * cast(sqrt(np.pi), dtype=x.dtype)
     )
 
diff --git a/pytensor/tensor/rewriting/special.py b/pytensor/tensor/rewriting/special.py
index c893439e4d..48ae400082 100644
--- a/pytensor/tensor/rewriting/special.py
+++ b/pytensor/tensor/rewriting/special.py
@@ -1,7 +1,7 @@
 from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
 from pytensor.tensor.elemwise import DimShuffle
 from pytensor.tensor.math import Sum, exp, log
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import true_div
 from pytensor.tensor.rewriting.basic import register_stabilize
 from pytensor.tensor.rewriting.math import local_mul_canonizer
@@ -102,7 +102,7 @@ def local_logsoftmax_grad(fgraph, node):
     ):
         # get parameters from unoptimized op
         grads, sm = node.inputs[0].owner.inputs
-        ret = grads - at_sum(grads, axis=sm.owner.op.axis, keepdims=True) * sm
+        ret = grads - pt_sum(grads, axis=sm.owner.op.axis, keepdims=True) * sm
         ret.tag.values_eq_approx = values_eq_approx_remove_nan
         copy_stack_trace(node.outputs[0], ret)
         return [ret]
diff --git a/pytensor/tensor/rewriting/subtensor.py b/pytensor/tensor/rewriting/subtensor.py
index e860034235..a574b772fb 100644
--- a/pytensor/tensor/rewriting/subtensor.py
+++ b/pytensor/tensor/rewriting/subtensor.py
@@ -4,7 +4,7 @@
 import numpy as np
 
 import pytensor
-import pytensor.scalar.basic as aes
+import pytensor.scalar.basic as ps
 from pytensor import compile
 from pytensor.graph.basic import Constant, Variable
 from pytensor.graph.rewriting.basic import (
@@ -33,7 +33,7 @@
 from pytensor.tensor.elemwise import Elemwise
 from pytensor.tensor.exceptions import NotScalarConstantError
 from pytensor.tensor.math import Dot, add
-from pytensor.tensor.math import all as at_all
+from pytensor.tensor.math import all as pt_all
 from pytensor.tensor.math import (
     and_,
     ceil_intdiv,
@@ -585,11 +585,11 @@ def local_subtensor_remove_broadcastable_index(fgraph, node):
     remove_dim = []
     node_inputs_idx = 1
     for dim, elem in enumerate(idx):
-        if isinstance(elem, (aes.ScalarType)):
+        if isinstance(elem, (ps.ScalarType)):
             # The idx is a ScalarType, ie a Type. This means the actual index
             # is contained in node.inputs[1]
             dim_index = node.inputs[node_inputs_idx]
-            if isinstance(dim_index, aes.ScalarConstant):
+            if isinstance(dim_index, ps.ScalarConstant):
                 dim_index = dim_index.value
             if dim_index in (0, -1) and node.inputs[0].broadcastable[dim]:
                 remove_dim.append(dim)
@@ -767,7 +767,7 @@ def local_subtensor_make_vector(fgraph, node):
 
         (idx,) = idxs
 
-        if isinstance(idx, (aes.ScalarType, TensorType)):
+        if isinstance(idx, (ps.ScalarType, TensorType)):
             old_idx, idx = idx, node.inputs[1]
             assert idx.type.is_super(old_idx)
     elif isinstance(node.op, AdvancedSubtensor1):
@@ -889,7 +889,7 @@ def local_set_to_inc_subtensor(fgraph, node):
         and node.op.set_instead_of_inc
         and node.inputs[1].owner
         and isinstance(node.inputs[1].owner.op, Elemwise)
-        and isinstance(node.inputs[1].owner.op.scalar_op, aes.Add)
+        and isinstance(node.inputs[1].owner.op.scalar_op, ps.Add)
     ):
         addn = node.inputs[1].owner
         subn = None
@@ -1467,7 +1467,7 @@ def local_adv_sub1_adv_inc_sub1(fgraph, node):
     if not inp.owner.op.set_instead_of_inc:
         return
 
-    cond = [at_all(and_(lt(idx, x.shape[0]), ge(idx, -x.shape[0])))]
+    cond = [pt_all(and_(lt(idx, x.shape[0]), ge(idx, -x.shape[0])))]
     if not fgraph.shape_feature.same_shape(idx, y, 0, 0):
         cond.append(eq(idx.shape[0], y.shape[0]))
     r = Assert(
@@ -1858,7 +1858,7 @@ def local_uint_constant_indices(fgraph, node):
                 index_val.astype(dtype), dtype=dtype
             )
         else:
-            new_index = aes.constant(index_val.astype(dtype), dtype=dtype)
+            new_index = ps.constant(index_val.astype(dtype), dtype=dtype)
 
         new_indices[i] = new_index
         has_new_index = True
diff --git a/pytensor/tensor/rewriting/uncanonicalize.py b/pytensor/tensor/rewriting/uncanonicalize.py
index 0acb65da7c..15a316c5a0 100644
--- a/pytensor/tensor/rewriting/uncanonicalize.py
+++ b/pytensor/tensor/rewriting/uncanonicalize.py
@@ -31,7 +31,7 @@
 
 """
 
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.graph.rewriting.basic import copy_stack_trace, node_rewriter
 from pytensor.tensor.basic import Alloc, alloc, constant
 from pytensor.tensor.elemwise import CAReduce, DimShuffle
@@ -79,7 +79,7 @@ def local_max_to_min(fgraph, node):
         if (
             max.owner
             and isinstance(max.owner.op, CAReduce)
-            and max.owner.op.scalar_op == aes.scalar_maximum
+            and max.owner.op.scalar_op == ps.scalar_maximum
         ):
             neg_node = max.owner.inputs[0]
             if neg_node.owner and neg_node.owner.op == neg:
diff --git a/pytensor/tensor/shape.py b/pytensor/tensor/shape.py
index 0d8dea8a2e..1a83a41122 100644
--- a/pytensor/tensor/shape.py
+++ b/pytensor/tensor/shape.py
@@ -16,7 +16,7 @@
 from pytensor.misc.safe_asarray import _asarray
 from pytensor.scalar import int32
 from pytensor.tensor import _get_vector_length, as_tensor_variable
-from pytensor.tensor import basic as at
+from pytensor.tensor import basic as ptb
 from pytensor.tensor import get_vector_length
 from pytensor.tensor.exceptions import NotScalarConstantError
 from pytensor.tensor.type import DenseTensorType, TensorType, int_dtypes, tensor
@@ -68,7 +68,7 @@ class Shape(COp):
 
     def make_node(self, x):
         if not isinstance(x, Variable):
-            x = at.as_tensor_variable(x)
+            x = ptb.as_tensor_variable(x)
 
         if isinstance(x.type, TensorType):
             out_var = TensorType("int64", (x.type.ndim,))()
@@ -146,7 +146,7 @@ def c_code_cache_version(self):
 def shape(x: Union[np.ndarray, Number, Variable]) -> Variable:
     """Return the shape of `x`."""
     if not isinstance(x, Variable):
-        x = at.as_tensor_variable(x)  # type: ignore
+        x = ptb.as_tensor_variable(x)  # type: ignore
 
     return cast(Variable, _shape(x))
 
@@ -411,12 +411,12 @@ class SpecifyShape(COp):
     def make_node(self, x, *shape):
         from pytensor.tensor.basic import get_underlying_scalar_constant_value
 
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
 
         shape = tuple(
             NoneConst
             if (s is None or NoneConst.equals(s))
-            else at.as_tensor_variable(s, ndim=0)
+            else ptb.as_tensor_variable(s, ndim=0)
             for s in shape
         )
 
@@ -469,7 +469,7 @@ def infer_shape(self, fgraph, node, shapes):
         for dim in range(node.inputs[0].type.ndim):
             s = shape[dim]
             try:
-                s = at.get_underlying_scalar_constant_value(s)
+                s = ptb.get_underlying_scalar_constant_value(s)
                 # We assume that `None` shapes are always retrieved by
                 # `get_underlying_scalar_constant_value`, and only in that case do we default to
                 # the shape of the input variable
@@ -477,7 +477,7 @@ def infer_shape(self, fgraph, node, shapes):
                     s = xshape[dim]
             except NotScalarConstantError:
                 pass
-            new_shape.append(at.as_tensor_variable(s))
+            new_shape.append(ptb.as_tensor_variable(s))
 
         assert len(new_shape) == len(xshape)
         return [new_shape]
@@ -570,7 +570,7 @@ def specify_shape(
     # If shape is a symbolic 1d vector of fixed length, we separate the items into a
     # tuple with one entry per shape dimension
     if len(shape) == 1 and shape[0] is not None:
-        shape_vector = at.as_tensor_variable(shape[0])
+        shape_vector = ptb.as_tensor_variable(shape[0])
         if shape_vector.ndim == 1:
             try:
                 shape = tuple(shape_vector)
@@ -579,7 +579,7 @@ def specify_shape(
 
     # If the specified shape is already encoded in the input static shape, do nothing
     # This ignores PyTensor constants in shape
-    x = at.as_tensor_variable(x)  # type: ignore
+    x = ptb.as_tensor_variable(x)  # type: ignore
     new_shape_info = any(
         s != xts for (s, xts) in zip(shape, x.type.shape) if s is not None
     )
@@ -593,7 +593,7 @@ def specify_shape(
 @_get_vector_length.register(SpecifyShape)  # type: ignore
 def _get_vector_length_SpecifyShape(op: Op, var: TensorVariable) -> int:
     try:
-        return int(at.get_underlying_scalar_constant_value(var.owner.inputs[1]).item())
+        return int(ptb.get_underlying_scalar_constant_value(var.owner.inputs[1]).item())
     except NotScalarConstantError:
         raise ValueError(f"Length of {var} cannot be determined")
 
@@ -649,9 +649,9 @@ def __str__(self):
         return f"{self.__class__.__name__}{{{self.ndim}}}"
 
     def make_node(self, x, shp):
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
         shp_orig = shp
-        shp = at.as_tensor_variable(shp, ndim=1)
+        shp = ptb.as_tensor_variable(shp, ndim=1)
         if not (
             shp.dtype in int_dtypes
             or (isinstance(shp, TensorConstant) and shp.data.size == 0)
@@ -672,9 +672,9 @@ def make_node(self, x, shp):
                 shp_list = [shp_orig]
             for index in range(self.ndim):
                 y = shp_list[index]
-                y = at.as_tensor_variable(y)
+                y = ptb.as_tensor_variable(y)
                 try:
-                    s_val = at.get_underlying_scalar_constant_value(y).item()
+                    s_val = ptb.get_underlying_scalar_constant_value(y).item()
                     if s_val >= 0:
                         out_shape[index] = s_val
                 except NotScalarConstantError:
@@ -773,7 +773,7 @@ def infer_shape(self, fgraph, node, ishapes):
             return [
                 tuple(
                     [
-                        at.switch(eq(requ[i], -1), rest_size, requ[i])
+                        ptb.switch(eq(requ[i], -1), rest_size, requ[i])
                         for i in range(self.ndim)
                     ]
                 )
@@ -836,7 +836,7 @@ def _vectorize_reshape(op, node, x, shape):
 
 def reshape(x, newshape, ndim=None):
     if ndim is None:
-        newshape = at.as_tensor_variable(newshape)
+        newshape = ptb.as_tensor_variable(newshape)
         if newshape.type.ndim != 1:
             raise TypeError(
                 "New shape in reshape must be a vector or a list/tuple of"
@@ -867,7 +867,7 @@ def shape_padleft(t, n_ones=1):
     Dimshuffle
 
     """
-    _t = at.as_tensor_variable(t)
+    _t = ptb.as_tensor_variable(t)
     if n_ones == 0:
         return _t
     pattern = ["x"] * n_ones + list(range(_t.type.ndim))
@@ -884,7 +884,7 @@ def shape_padright(t, n_ones=1):
     Dimshuffle
 
     """
-    _t = at.as_tensor_variable(t)
+    _t = ptb.as_tensor_variable(t)
     if n_ones == 0:
         return _t
     pattern = list(range(_t.type.ndim)) + ["x"] * n_ones
@@ -913,7 +913,7 @@ def shape_padaxis(t, axis):
     Dimshuffle
 
     """
-    _t = at.as_tensor_variable(t)
+    _t = ptb.as_tensor_variable(t)
 
     ndim = _t.ndim + 1
     if not -ndim <= axis < ndim:
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
index 6e24c56f55..f96dec5a35 100644
--- a/pytensor/tensor/slinalg.py
+++ b/pytensor/tensor/slinalg.py
@@ -11,8 +11,8 @@
 from pytensor.graph.basic import Apply
 from pytensor.graph.op import Op
 from pytensor.tensor import as_tensor_variable
-from pytensor.tensor import basic as at
-from pytensor.tensor import math as atm
+from pytensor.tensor import basic as ptb
+from pytensor.tensor import math as ptm
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.nlinalg import matrix_dot
 from pytensor.tensor.shape import reshape
@@ -96,9 +96,9 @@ def L_op(self, inputs, outputs, gradients):
         # Replace the cholesky decomposition with 1 if there are nans
         # or solve_upper_triangular will throw a ValueError.
         if self.on_error == "nan":
-            ok = ~atm.any(atm.isnan(chol_x))
-            chol_x = at.switch(ok, chol_x, 1)
-            dz = at.switch(ok, dz, 1)
+            ok = ~ptm.any(ptm.isnan(chol_x))
+            chol_x = ptb.switch(ok, chol_x, 1)
+            dz = ptb.switch(ok, dz, 1)
 
         # deal with upper triangular by converting to lower triangular
         if not self.lower:
@@ -107,7 +107,7 @@ def L_op(self, inputs, outputs, gradients):
 
         def tril_and_halve_diagonal(mtx):
             """Extracts lower triangle of square matrix and halves diagonal."""
-            return at.tril(mtx) - at.diag(at.diagonal(mtx) / 2.0)
+            return ptb.tril(mtx) - ptb.diag(ptb.diagonal(mtx) / 2.0)
 
         def conjugate_solve_triangular(outer, inner):
             """Computes L^{-T} P L^{-1} for lower-triangular L."""
@@ -119,12 +119,12 @@ def conjugate_solve_triangular(outer, inner):
         )
 
         if self.lower:
-            grad = at.tril(s + s.T) - at.diag(at.diagonal(s))
+            grad = ptb.tril(s + s.T) - ptb.diag(ptb.diagonal(s))
         else:
-            grad = at.triu(s + s.T) - at.diag(at.diagonal(s))
+            grad = ptb.triu(s + s.T) - ptb.diag(ptb.diagonal(s))
 
         if self.on_error == "nan":
-            return [at.switch(ok, grad, np.nan)]
+            return [ptb.switch(ok, grad, np.nan)]
         else:
             return [grad]
 
@@ -214,7 +214,7 @@ def L_op(self, inputs, outputs, output_gradients):
         )
         b_bar = trans_solve_op(A.T, c_bar)
         # force outer product if vector second input
-        A_bar = -atm.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
+        A_bar = -ptm.outer(b_bar, c) if c.ndim == 1 else -b_bar.dot(c.T)
 
         return [A_bar, b_bar]
 
@@ -303,9 +303,9 @@ def L_op(self, inputs, outputs, output_gradients):
         res = super().L_op(inputs, outputs, output_gradients)
 
         if self.lower:
-            res[0] = at.tril(res[0])
+            res[0] = ptb.tril(res[0])
         else:
-            res[0] = at.triu(res[0])
+            res[0] = ptb.triu(res[0])
 
         return res
 
@@ -582,8 +582,8 @@ def kron(a, b):
             "kron: inputs dimensions must sum to 3 or more. "
             f"You passed {int(a.ndim)} and {int(b.ndim)}."
         )
-    o = atm.outer(a, b)
-    o = o.reshape(at.concatenate((a.shape, b.shape)), ndim=a.ndim + b.ndim)
+    o = ptm.outer(a, b)
+    o = o.reshape(ptb.concatenate((a.shape, b.shape)), ndim=a.ndim + b.ndim)
     shf = o.dimshuffle(0, 2, 1, *list(range(3, o.ndim)))
     if shf.ndim == 3:
         shf = o.dimshuffle(1, 0, 2)
diff --git a/pytensor/tensor/subtensor.py b/pytensor/tensor/subtensor.py
index de0862f443..7e47898c0a 100644
--- a/pytensor/tensor/subtensor.py
+++ b/pytensor/tensor/subtensor.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 import pytensor
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.configdefaults import config
 from pytensor.gradient import DisconnectedType
 from pytensor.graph.basic import Apply, Constant, Variable
@@ -52,16 +52,16 @@
 
 _logger = logging.getLogger("pytensor.tensor.subtensor")
 
-invalid_scal_types = (aes.float64, aes.float32, aes.float16)
+invalid_scal_types = (ps.float64, ps.float32, ps.float16)
 scal_types = (
-    aes.int64,
-    aes.int32,
-    aes.int16,
-    aes.int8,
-    aes.uint64,
-    aes.uint32,
-    aes.uint16,
-    aes.uint8,
+    ps.int64,
+    ps.int32,
+    ps.int16,
+    ps.int8,
+    ps.uint64,
+    ps.uint32,
+    ps.uint16,
+    ps.uint8,
 )
 tensor_types = (
     lscalar,
@@ -144,7 +144,7 @@ def as_index_constant(
             as_index_constant(a.step),
         )
     elif isinstance(a, (int, np.integer)):
-        return aes.ScalarConstant(aes.int64, a)
+        return ps.ScalarConstant(ps.int64, a)
     elif not isinstance(a, Variable):
         return as_tensor_variable(a)
     else:
@@ -382,7 +382,7 @@ def range_len(slc):
         switch(
             and_(lt(step, 0), gt(start, stop)),
             1 + (start - 1 - stop) // (-step),
-            aes.ScalarConstant(aes.int64, 0),
+            ps.ScalarConstant(ps.int64, 0),
         ),
     )
 
@@ -437,9 +437,9 @@ def basic_shape(shape, indices):
                 idx_inputs = (None,)
             res_shape += (slice_len(slice(*idx_inputs), n),)
         elif idx is None:
-            res_shape += (aes.ScalarConstant(aes.int64, 1),)
+            res_shape += (ps.ScalarConstant(ps.int64, 1),)
         elif isinstance(getattr(idx, "type", None), NoneTypeT):
-            res_shape += (aes.ScalarConstant(aes.int64, 1),)
+            res_shape += (ps.ScalarConstant(ps.int64, 1),)
         else:
             raise ValueError(f"Invalid index type: {idx}")
     return res_shape
@@ -595,9 +595,9 @@ def index_vars_to_types(entry, slice_ok=True):
         and entry.type in tensor_types
         and all(entry.type.broadcastable)
     ):
-        return aes.get_scalar_type(entry.type.dtype)
+        return ps.get_scalar_type(entry.type.dtype)
     elif isinstance(entry, Type) and entry in tensor_types and all(entry.broadcastable):
-        return aes.get_scalar_type(entry.dtype)
+        return ps.get_scalar_type(entry.dtype)
     elif slice_ok and isinstance(entry, slice):
         a = entry.start
         b = entry.stop
@@ -683,15 +683,15 @@ def conv(val):
     return list(map(conv, real_idx))
 
 
-def as_nontensor_scalar(a: Variable) -> aes.ScalarVariable:
+def as_nontensor_scalar(a: Variable) -> ps.ScalarVariable:
     """Convert a value to a `ScalarType` variable."""
-    # Since aes.as_scalar does not know about tensor types (it would
+    # Since ps.as_scalar does not know about tensor types (it would
     # create a circular import) , this method converts either a
     # TensorVariable or a ScalarVariable to a scalar.
     if isinstance(a, Variable) and isinstance(a.type, TensorType):
         return pytensor.tensor.scalar_from_tensor(a)
     else:
-        return aes.as_scalar(a)
+        return ps.as_scalar(a)
 
 
 class Subtensor(COp):
@@ -1255,7 +1255,7 @@ def _process(self, idxs, op_inputs, pstate):
         sidxs = []
         getattr(pstate, "precedence", None)
         for entry in idxs:
-            if isinstance(entry, aes.ScalarType):
+            if isinstance(entry, ps.ScalarType):
                 with set_precedence(pstate):
                     sidxs.append(pstate.pprinter.process(inputs.pop()))
             elif isinstance(entry, slice):
@@ -2181,7 +2181,7 @@ class AdvancedIncSubtensor1(COp):
 
     __props__ = ("inplace", "set_instead_of_inc")
     check_input = False
-    params_type = ParamsType(inplace=aes.bool, set_instead_of_inc=aes.bool)
+    params_type = ParamsType(inplace=ps.bool, set_instead_of_inc=ps.bool)
 
     def __init__(self, inplace=False, set_instead_of_inc=False):
         self.inplace = bool(inplace)
diff --git a/pytensor/tensor/type.py b/pytensor/tensor/type.py
index 7392d3f421..153879b77e 100644
--- a/pytensor/tensor/type.py
+++ b/pytensor/tensor/type.py
@@ -6,7 +6,7 @@
 import numpy as np
 
 import pytensor
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Variable
 from pytensor.graph.type import HasDataType, HasShape
@@ -26,14 +26,14 @@
 
 
 # Define common subsets of dtypes (as strings).
-complex_dtypes = list(map(str, aes.complex_types))
-continuous_dtypes = list(map(str, aes.continuous_types))
-float_dtypes = list(map(str, aes.float_types))
-integer_dtypes = list(map(str, aes.integer_types))
-discrete_dtypes = list(map(str, aes.discrete_types))
-all_dtypes = list(map(str, aes.all_types))
-int_dtypes = list(map(str, aes.int_types))
-uint_dtypes = list(map(str, aes.uint_types))
+complex_dtypes = list(map(str, ps.complex_types))
+continuous_dtypes = list(map(str, ps.continuous_types))
+float_dtypes = list(map(str, ps.float_types))
+integer_dtypes = list(map(str, ps.integer_types))
+discrete_dtypes = list(map(str, ps.discrete_types))
+all_dtypes = list(map(str, ps.all_types))
+int_dtypes = list(map(str, ps.int_types))
+uint_dtypes = list(map(str, ps.uint_types))
 
 # TODO: add more type correspondences for e.g. int32, int64, float32,
 # complex64, etc.
@@ -185,7 +185,7 @@ def filter(self, data, strict=False, allow_downcast=None):
                 if isinstance(data, np.ndarray):
                     # Check if self.dtype can accurately represent data
                     # (do not try to convert the data)
-                    up_dtype = aes.upcast(self.dtype, data.dtype)
+                    up_dtype = ps.upcast(self.dtype, data.dtype)
                     if up_dtype == self.dtype:
                         # Bug in the following line when data is a
                         # scalar array, see
@@ -297,7 +297,7 @@ def dtype_specs(self):
             )
 
     def to_scalar_type(self):
-        return aes.get_scalar_type(dtype=self.dtype)
+        return ps.get_scalar_type(dtype=self.dtype)
 
     def in_same_class(self, otype):
         r"""Determine if `otype` is in the same class of fixed broadcastable types as `self`.
@@ -615,22 +615,22 @@ def c_sync(self, name, sub):
         )
 
     def c_headers(self, **kwargs):
-        return aes.get_scalar_type(self.dtype).c_headers(**kwargs)
+        return ps.get_scalar_type(self.dtype).c_headers(**kwargs)
 
     def c_libraries(self, **kwargs):
-        return aes.get_scalar_type(self.dtype).c_libraries(**kwargs)
+        return ps.get_scalar_type(self.dtype).c_libraries(**kwargs)
 
     def c_compile_args(self, **kwargs):
-        return aes.get_scalar_type(self.dtype).c_compile_args(**kwargs)
+        return ps.get_scalar_type(self.dtype).c_compile_args(**kwargs)
 
     def c_support_code(self, **kwargs):
-        return aes.get_scalar_type(self.dtype).c_support_code(**kwargs)
+        return ps.get_scalar_type(self.dtype).c_support_code(**kwargs)
 
     def c_init_code(self, **kwargs):
-        return aes.get_scalar_type(self.dtype).c_init_code(**kwargs)
+        return ps.get_scalar_type(self.dtype).c_init_code(**kwargs)
 
     def c_code_cache_version(self):
-        scalar_version = aes.get_scalar_type(self.dtype).c_code_cache_version()
+        scalar_version = ps.get_scalar_type(self.dtype).c_code_cache_version()
         if scalar_version:
             return (11,) + scalar_version
         else:
diff --git a/pytensor/tensor/variable.py b/pytensor/tensor/variable.py
index 770ea66d8e..d4b3df6975 100644
--- a/pytensor/tensor/variable.py
+++ b/pytensor/tensor/variable.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Constant, OptionalApplyType, Variable
 from pytensor.graph.utils import MetaType
@@ -24,10 +24,10 @@
 
 class _tensor_py_operators:
     def __abs__(self):
-        return at.math.abs(self)
+        return pt.math.abs(self)
 
     def __neg__(self):
-        return at.math.neg(self)
+        return pt.math.neg(self)
 
     # These won't work because Python requires an int return value
     # def __int__(self): return convert_to_int32(self)
@@ -37,22 +37,22 @@ def __neg__(self):
     _is_nonzero = True
 
     def __lt__(self, other):
-        rval = at.math.lt(self, other)
+        rval = pt.math.lt(self, other)
         rval._is_nonzero = False
         return rval
 
     def __le__(self, other):
-        rval = at.math.le(self, other)
+        rval = pt.math.le(self, other)
         rval._is_nonzero = False
         return rval
 
     def __gt__(self, other):
-        rval = at.math.gt(self, other)
+        rval = pt.math.gt(self, other)
         rval._is_nonzero = False
         return rval
 
     def __ge__(self, other):
-        rval = at.math.ge(self, other)
+        rval = pt.math.ge(self, other)
         rval._is_nonzero = False
         return rval
 
@@ -72,25 +72,25 @@ def __bool__(self):
             raise TypeError("Variables do not support boolean operations.")
 
     def __invert__(self):
-        return at.math.invert(self)
+        return pt.math.invert(self)
 
     def __and__(self, other):
-        return at.math.and_(self, other)
+        return pt.math.and_(self, other)
 
     def __or__(self, other):
-        return at.math.or_(self, other)
+        return pt.math.or_(self, other)
 
     def __xor__(self, other):
-        return at.math.xor(self, other)
+        return pt.math.xor(self, other)
 
     def __rand__(self, other):
-        return at.math.and_(other, self)
+        return pt.math.and_(other, self)
 
     def __ror__(self, other):
-        return at.math.or_(other, self)
+        return pt.math.or_(other, self)
 
     def __rxor__(self, other):
-        return at.math.xor(other, self)
+        return pt.math.xor(other, self)
 
     # def __iand__(self, other):
     #    return _and_inplace(self, other)
@@ -103,7 +103,7 @@ def __rxor__(self, other):
 
     def __add__(self, other):
         try:
-            return at.math.add(self, other)
+            return pt.math.add(self, other)
         # We should catch the minimum number of exception here.
         # Otherwise this will convert error when PyTensor flags
         # compute_test_value is used
@@ -122,7 +122,7 @@ def __sub__(self, other):
         # See explanation in __add__ for the error caught
         # and the return value in that case
         try:
-            return at.math.sub(self, other)
+            return pt.math.sub(self, other)
         except (NotImplementedError, TypeError):
             return NotImplemented
 
@@ -130,7 +130,7 @@ def __mul__(self, other):
         # See explanation in __add__ for the error caught
         # and the return value in that case
         try:
-            return at.math.mul(self, other)
+            return pt.math.mul(self, other)
         except (NotImplementedError, TypeError):
             return NotImplemented
 
@@ -138,7 +138,7 @@ def __div__(self, other):
         # See explanation in __add__ for the error caught
         # and the return value in that case
         try:
-            return at.math.div_proxy(self, other)
+            return pt.math.div_proxy(self, other)
         except IntegerDivisionError:
             # This is to raise the exception that occurs when trying to divide
             # two integer arrays (currently forbidden).
@@ -150,7 +150,7 @@ def __pow__(self, other):
         # See explanation in __add__ for the error caught
         # and the return value in that case
         try:
-            return at.math.pow(self, other)
+            return pt.math.pow(self, other)
         except (NotImplementedError, TypeError):
             return NotImplemented
 
@@ -158,7 +158,7 @@ def __mod__(self, other):
         # See explanation in __add__ for the error caught
         # and the return value in that case
         try:
-            return at.math.mod_check(self, other)
+            return pt.math.mod_check(self, other)
         except ComplexError:
             # This is to raise the exception that occurs when trying to compute
             # x % y with either x or y a complex number.
@@ -167,19 +167,19 @@ def __mod__(self, other):
             return NotImplemented
 
     def __divmod__(self, other):
-        return at.math.divmod(self, other)
+        return pt.math.divmod(self, other)
 
     def __truediv__(self, other):
-        return at.math.true_div(self, other)
+        return pt.math.true_div(self, other)
 
     def __floordiv__(self, other):
-        return at.math.floor_div(self, other)
+        return pt.math.floor_div(self, other)
 
     def __rtruediv__(self, other):
-        return at.math.true_div(other, self)
+        return pt.math.true_div(other, self)
 
     def __rfloordiv__(self, other):
-        return at.math.floor_div(other, self)
+        return pt.math.floor_div(other, self)
 
     # Do not use these; in-place `Op`s should be inserted by optimizations
     # only!
@@ -198,39 +198,39 @@ def __rfloordiv__(self, other):
     #    return _pow_inplace(self, other)
 
     def __radd__(self, other):
-        return at.math.add(other, self)
+        return pt.math.add(other, self)
 
     def __rsub__(self, other):
-        return at.math.sub(other, self)
+        return pt.math.sub(other, self)
 
     def __rmul__(self, other):
-        return at.math.mul(other, self)
+        return pt.math.mul(other, self)
 
     def __rdiv__(self, other):
-        return at.math.div_proxy(other, self)
+        return pt.math.div_proxy(other, self)
 
     def __rmod__(self, other):
-        return at.math.mod(other, self)
+        return pt.math.mod(other, self)
 
     def __rdivmod__(self, other):
-        return at.math.divmod(other, self)
+        return pt.math.divmod(other, self)
 
     def __rpow__(self, other):
-        return at.math.pow(other, self)
+        return pt.math.pow(other, self)
 
     def __ceil__(self):
-        return at.math.ceil(self)
+        return pt.math.ceil(self)
 
     def __floor__(self):
-        return at.math.floor(self)
+        return pt.math.floor(self)
 
     def __trunc__(self):
-        return at.math.trunc(self)
+        return pt.math.trunc(self)
 
     # NumPy-like transpose property
     @property
     def T(self):
-        return at.basic.transpose(self)
+        return pt.basic.transpose(self)
 
     def transpose(self, *axes):
         """Transpose this array.
@@ -246,33 +246,33 @@ def transpose(self, *axes):
 
         """
         if len(axes) == 0:
-            return at.basic.transpose(self)
+            return pt.basic.transpose(self)
         try:
             iter(axes[0])
             iterable = True
         except TypeError:
             iterable = False
         if len(axes) == 1 and iterable:
-            return at.basic.transpose(self, axes[0])
+            return pt.basic.transpose(self, axes[0])
         else:
-            return at.basic.transpose(self, axes)
+            return pt.basic.transpose(self, axes)
 
     @property
     def shape(self):
-        return at.shape(self)
+        return pt.shape(self)
 
     @property
     def size(self):
         if self.ndim == 1:
             return self.shape[0]
         else:
-            return at.math.prod(self.shape)
+            return pt.math.prod(self.shape)
 
     def any(self, axis=None, keepdims=False):
-        return at.math.any(self, axis=axis, keepdims=keepdims)
+        return pt.math.any(self, axis=axis, keepdims=keepdims)
 
     def all(self, axis=None, keepdims=False):
-        return at.math.all(self, axis=axis, keepdims=keepdims)
+        return pt.math.all(self, axis=axis, keepdims=keepdims)
 
     # Old note: "We can't implement this because Python requests that this
     # function returns an integer."
@@ -307,7 +307,7 @@ def reshape(self, shape, *, ndim=None):
                     "Expected ndim to be an integer, is " + str(type(ndim))
                 )
 
-        return at.reshape(self, shape, ndim=ndim)
+        return pt.reshape(self, shape, ndim=ndim)
 
     def dimshuffle(self, *pattern):
         """
@@ -340,17 +340,17 @@ def dimshuffle(self, *pattern):
         """
         if (len(pattern) == 1) and (isinstance(pattern[0], (list, tuple))):
             pattern = pattern[0]
-        op = at.elemwise.DimShuffle(list(self.type.broadcastable), pattern)
+        op = pt.elemwise.DimShuffle(list(self.type.broadcastable), pattern)
         return op(self)
 
     def flatten(self, ndim=1):
-        return at.basic.flatten(self, ndim)
+        return pt.basic.flatten(self, ndim)
 
     def ravel(self):
-        return at.basic.flatten(self)
+        return pt.basic.flatten(self)
 
     def diagonal(self, offset=0, axis1=0, axis2=1):
-        return at.basic.diagonal(self, offset, axis1, axis2)
+        return pt.basic.diagonal(self, offset, axis1, axis2)
 
     def transfer(self, target):
         """Transfer this this array's data to another device.
@@ -363,85 +363,85 @@ def transfer(self, target):
         target : str
             The desired location of the output variable
         """
-        return at.basic.transfer(self, target)
+        return pt.basic.transfer(self, target)
 
     def arccos(self):
-        return at.math.arccos(self)
+        return pt.math.arccos(self)
 
     def arccosh(self):
-        return at.math.arccosh(self)
+        return pt.math.arccosh(self)
 
     def arcsin(self):
-        return at.math.arcsin(self)
+        return pt.math.arcsin(self)
 
     def arcsinh(self):
-        return at.math.arcsinh(self)
+        return pt.math.arcsinh(self)
 
     def arctan(self):
-        return at.math.arctan(self)
+        return pt.math.arctan(self)
 
     def arctanh(self):
-        return at.math.arctanh(self)
+        return pt.math.arctanh(self)
 
     def ceil(self):
-        return at.math.ceil(self)
+        return pt.math.ceil(self)
 
     def cos(self):
-        return at.math.cos(self)
+        return pt.math.cos(self)
 
     def cosh(self):
-        return at.math.cosh(self)
+        return pt.math.cosh(self)
 
     def deg2rad(self):
-        return at.math.deg2rad(self)
+        return pt.math.deg2rad(self)
 
     def exp(self):
-        return at.math.exp(self)
+        return pt.math.exp(self)
 
     def exp2(self):
-        return at.math.exp2(self)
+        return pt.math.exp2(self)
 
     def expm1(self):
-        return at.math.expm1(self)
+        return pt.math.expm1(self)
 
     def floor(self):
-        return at.math.floor(self)
+        return pt.math.floor(self)
 
     def log(self):
-        return at.math.log(self)
+        return pt.math.log(self)
 
     def log10(self):
-        return at.math.log10(self)
+        return pt.math.log10(self)
 
     def log1p(self):
-        return at.math.log1p(self)
+        return pt.math.log1p(self)
 
     def log2(self):
-        return at.math.log2(self)
+        return pt.math.log2(self)
 
     def rad2deg(self):
-        return at.math.rad2deg(self)
+        return pt.math.rad2deg(self)
 
     def sin(self):
-        return at.math.sin(self)
+        return pt.math.sin(self)
 
     def sinh(self):
-        return at.math.sinh(self)
+        return pt.math.sinh(self)
 
     def sqrt(self):
-        return at.math.sqrt(self)
+        return pt.math.sqrt(self)
 
     def tan(self):
-        return at.math.tan(self)
+        return pt.math.tan(self)
 
     def tanh(self):
-        return at.math.tanh(self)
+        return pt.math.tanh(self)
 
     def trunc(self):
-        return at.math.trunc(self)
+        return pt.math.trunc(self)
 
     def astype(self, dtype):
-        return at.basic.cast(self, dtype)
+        return pt.basic.cast(self, dtype)
 
     def __getitem__(self, args):
         def includes_bool(args_el):
@@ -502,9 +502,9 @@ def includes_bool(args_el):
         if len(ellipses) > 1:
             raise IndexError("an index can only have a single Ellipsis (`...`)")
         elif len(ellipses) == 1:
-            ellipsis_at = ellipses[0]
+            ellipsis_pt = ellipses[0]
             args = list(args)
-            args[ellipsis_at : ellipsis_at + 1] = [slice(None)] * (
+            args[ellipsis_pt : ellipsis_pt + 1] = [slice(None)] * (
                 self.ndim - index_dim_count
             )
 
@@ -518,7 +518,7 @@ def is_empty_array(val):
         # Convert python literals to pytensor constants
         args = tuple(
             [
-                at.subtensor.as_index_constant(
+                pt.subtensor.as_index_constant(
                     np.array(inp, dtype=np.uint8) if is_empty_array(inp) else inp
                 )
                 for inp in args
@@ -537,7 +537,7 @@ def is_empty_array(val):
 
             if arg is not np.newaxis and arg is not NoneConst:
                 try:
-                    at.subtensor.index_vars_to_types(arg)
+                    pt.subtensor.index_vars_to_types(arg)
                 except AdvancedIndexingError:
                     if advanced:
                         break
@@ -545,7 +545,7 @@ def is_empty_array(val):
                         advanced = True
 
         if advanced:
-            return at.subtensor.advanced_subtensor(self, *args)
+            return pt.subtensor.advanced_subtensor(self, *args)
         else:
             if np.newaxis in args or NoneConst in args:
                 # `np.newaxis` (i.e. `None`) in NumPy indexing mean "add a new
@@ -587,9 +587,9 @@ def is_empty_array(val):
                 else:
                     return view.__getitem__(tuple(new_args))
             else:
-                return at.subtensor.Subtensor(args)(
+                return pt.subtensor.Subtensor(args)(
                     self,
-                    *at.subtensor.get_slice_elements(
+                    *pt.subtensor.get_slice_elements(
                         args, lambda entry: isinstance(entry, Variable)
                     ),
                 )
@@ -600,20 +600,20 @@ def __setitem__(self, key, value):
         )
 
     def take(self, indices, axis=None, mode="raise"):
-        return at.subtensor.take(self, indices, axis, mode)
+        return pt.subtensor.take(self, indices, axis, mode)
 
     def copy(self, name=None):
         """Return a symbolic copy and optionally assign a name.
 
         Does not copy the tags.
         """
-        copied_variable = at.basic.tensor_copy(self)
+        copied_variable = pt.basic.tensor_copy(self)
         copied_variable.name = name
         return copied_variable
 
     def __iter__(self):
         try:
-            for i in range(at.basic.get_vector_length(self)):
+            for i in range(pt.basic.get_vector_length(self)):
                 yield self[i]
         except TypeError:
             # This prevents accidental iteration via sum(self)
@@ -646,28 +646,28 @@ def dtype(self):
         return self.type.dtype
 
     def __dot__(left, right):
-        return at.math.dense_dot(left, right)
+        return pt.math.dense_dot(left, right)
 
     def __rdot__(right, left):
-        return at.math.dense_dot(left, right)
+        return pt.math.dense_dot(left, right)
 
     dot = __dot__
 
     def __matmul__(left, right):
-        return at.math.matmul(left, right)
+        return pt.math.matmul(left, right)
 
     def __rmatmul__(right, left):
-        return at.math.matmul(left, right)
+        return pt.math.matmul(left, right)
 
     def sum(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
         """See :func:`pytensor.tensor.math.sum`."""
-        return at.math.sum(
+        return pt.math.sum(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
     def prod(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
         """See :func:`pytensor.tensor.math.prod`."""
-        return at.math.prod(
+        return pt.math.prod(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
@@ -677,60 +677,60 @@ def norm(self, L, axis=None, keepdims=False):
         if np.isinf(L):
             raise NotImplementedError()
         # optimizations will/should catch cases like L=1, L=2
-        y = at.math.pow(
-            at.math.pow(at.math.abs(self), L).sum(axis=axis),
+        y = pt.math.pow(
+            pt.math.pow(pt.math.abs(self), L).sum(axis=axis),
             1.0 / L,
         )
         if keepdims:
-            return at.math.makeKeepDims(self, y, axis)
+            return pt.math.makeKeepDims(self, y, axis)
         else:
             return y
 
     def mean(self, axis=None, dtype=None, keepdims=False, acc_dtype=None):
         """See :func:`pytensor.tensor.math.mean`."""
-        return at.math.mean(
+        return pt.math.mean(
             self, axis=axis, dtype=dtype, keepdims=keepdims, acc_dtype=acc_dtype
         )
 
     def var(self, axis=None, ddof=0, keepdims=False, corrected=False):
         """See :func:`pytensor.tensor.math.var`."""
-        return at.math.var(
+        return pt.math.var(
             self, axis=axis, ddof=ddof, keepdims=keepdims, corrected=corrected
         )
 
     def std(self, axis=None, ddof=0, keepdims=False, corrected=False):
         """See :func:`pytensor.tensor.math.std`."""
-        return at.math.std(
+        return pt.math.std(
             self, axis=axis, ddof=ddof, keepdims=keepdims, corrected=corrected
         )
 
     def min(self, axis=None, keepdims=False):
         """See :func:`pytensor.tensor.math.min`."""
-        return at.math.min(self, axis, keepdims=keepdims)
+        return pt.math.min(self, axis, keepdims=keepdims)
 
     def max(self, axis=None, keepdims=False):
         """See :func:`pytensor.tensor.math.max`."""
-        return at.math.max(self, axis, keepdims=keepdims)
+        return pt.math.max(self, axis, keepdims=keepdims)
 
     def argmin(self, axis=None, keepdims=False):
         """See :func:`pytensor.tensor.math.argmin`."""
-        return at.math.argmin(self, axis, keepdims=keepdims)
+        return pt.math.argmin(self, axis, keepdims=keepdims)
 
     def argmax(self, axis=None, keepdims=False):
         """See :func:`pytensor.tensor.math.argmax`."""
-        return at.math.argmax(self, axis, keepdims=keepdims)
+        return pt.math.argmax(self, axis, keepdims=keepdims)
 
     def nonzero(self, return_matrix=False):
         """See :func:`pytensor.tensor.basic.nonzero`."""
-        return at.nonzero(self, return_matrix=return_matrix)
+        return pt.nonzero(self, return_matrix=return_matrix)
 
     def nonzero_values(self):
         """See :func:`pytensor.tensor.basic.nonzero_values`."""
-        return at.nonzero_values(self)
+        return pt.nonzero_values(self)
 
     def sort(self, axis=-1, kind="quicksort", order=None):
         """See :func:`pytensor.tensor.sort.sort`."""
-        return at.sort(self, axis, kind, order)
+        return pt.sort(self, axis, kind, order)
 
     def argsort(self, axis=-1, kind="quicksort", order=None):
         """See :func:`pytensor.tensor.sort.argsort`."""
@@ -740,50 +740,50 @@ def argsort(self, axis=-1, kind="quicksort", order=None):
 
     def clip(self, a_min, a_max):
         "See :func:`pytensor.tensor.math.clip`."
-        return at.math.clip(self, a_min, a_max)
+        return pt.math.clip(self, a_min, a_max)
 
     def conj(self):
         """See :func:`pytensor.tensor.math.conj`."""
-        return at.math.conj(self)
+        return pt.math.conj(self)
 
     conjugate = conj
 
     def repeat(self, repeats, axis=None):
         """See :func:`pytensor.tensor.basic.repeat`."""
-        return at.extra_ops.repeat(self, repeats, axis)
+        return pt.extra_ops.repeat(self, repeats, axis)
 
     def round(self, mode=None):
         """See :func:`pytensor.tensor.math.round`."""
-        return at.math.round(self, mode)
+        return pt.math.round(self, mode)
 
     def trace(self):
-        return at.linalg.trace(self)
+        return pt.linalg.trace(self)
 
     # This value is set so that PyTensor arrays will trump NumPy operators.
     __array_priority__ = 1000
 
     def get_underlying_scalar_constant(self):
-        return at.basic.get_underlying_scalar_constant_value(self)
+        return pt.basic.get_underlying_scalar_constant_value(self)
 
     def zeros_like(model, dtype=None):
-        return at.basic.zeros_like(model, dtype=dtype)
+        return pt.basic.zeros_like(model, dtype=dtype)
 
     def ones_like(model, dtype=None):
-        return at.basic.ones_like(model, dtype=dtype)
+        return pt.basic.ones_like(model, dtype=dtype)
 
     def cumsum(self, axis=None):
-        return at.extra_ops.cumsum(self, axis)
+        return pt.extra_ops.cumsum(self, axis)
 
     def cumprod(self, axis=None):
-        return at.extra_ops.cumprod(self, axis)
+        return pt.extra_ops.cumprod(self, axis)
 
     def searchsorted(self, v, side="left", sorter=None):
-        return at.extra_ops.searchsorted(self, v, side, sorter)
+        return pt.extra_ops.searchsorted(self, v, side, sorter)
 
     def ptp(self, axis=None):
         """See :func:`pytensor.tensor.math.ptp`."""
 
-        return at.math.ptp(self, axis)
+        return pt.math.ptp(self, axis)
 
     def swapaxes(self, axis1, axis2):
         """See :func:`pytensor.tensor.basic.swapaxes`.
@@ -792,11 +792,11 @@ def swapaxes(self, axis1, axis2):
         will be returned.
 
         """
-        return at.basic.swapaxes(self, axis1, axis2)
+        return pt.basic.swapaxes(self, axis1, axis2)
 
     def fill(self, value):
         """Fill inputted tensor with the assigned value."""
-        return at.basic.fill(self, value)
+        return pt.basic.fill(self, value)
 
     def choose(self, choices, mode="raise"):
         """
@@ -804,7 +804,7 @@ def choose(self, choices, mode="raise"):
         from.
 
         """
-        return at.basic.choose(self, choices, mode="raise")
+        return pt.basic.choose(self, choices, mode="raise")
 
     def squeeze(self, axis=None):
         """
@@ -814,11 +814,11 @@ def squeeze(self, axis=None):
         removed. This is always `x` itself or a view into `x`.
 
         """
-        return at.extra_ops.squeeze(self, axis=axis)
+        return pt.extra_ops.squeeze(self, axis=axis)
 
     def compress(self, a, axis=None):
         """Return selected slices only."""
-        return at.extra_ops.compress(self, a, axis=axis)
+        return pt.extra_ops.compress(self, a, axis=axis)
 
     def set(self, idx, y, **kwargs):
         """Return a copy of self with the indexed values set to y.
@@ -833,7 +833,7 @@ def set(self, idx, y, **kwargs):
         >>> out = x.set(1, 2)
         >>> out.eval()  # array([1., 2., 1.])
         """
-        return at.subtensor.set_subtensor(self[idx], y, **kwargs)
+        return pt.subtensor.set_subtensor(self[idx], y, **kwargs)
 
     def inc(self, idx, y, **kwargs):
         """Return a copy of self with the indexed values incremented by y.
@@ -849,7 +849,7 @@ def inc(self, idx, y, **kwargs):
         >>> out = x.inc(1, 2)
         >>> out.eval()  # array([1., 3., 1.])
         """
-        return at.inc_subtensor(self[idx], y, **kwargs)
+        return pt.inc_subtensor(self[idx], y, **kwargs)
 
 
 class TensorVariable(
@@ -1068,7 +1068,7 @@ def equals(self, other):
         # numpy.ndarray, and python type.
         if isinstance(other, (np.ndarray, int, float)):
             # Make a TensorConstant to be able to compare
-            other = at.basic.constant(other)
+            other = pt.basic.constant(other)
         return (
             isinstance(other, TensorConstant) and self.signature() == other.signature()
         )
diff --git a/pytensor/tensor/xlogx.py b/pytensor/tensor/xlogx.py
index f09111b2cf..8cc27de9fb 100644
--- a/pytensor/tensor/xlogx.py
+++ b/pytensor/tensor/xlogx.py
@@ -1,10 +1,10 @@
 import numpy as np
 
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.tensor.elemwise import Elemwise
 
 
-class XlogX(aes.UnaryScalarOp):
+class XlogX(ps.UnaryScalarOp):
     """
     Compute X * log(X), with special case 0 log(0) = 0.
 
@@ -22,12 +22,12 @@ def impl(self, x):
     def grad(self, inputs, grads):
         (x,) = inputs
         (gz,) = grads
-        return [gz * (1 + aes.log(x))]
+        return [gz * (1 + ps.log(x))]
 
     def c_code(self, node, name, inputs, outputs, sub):
         (x,) = inputs
         (z,) = outputs
-        if node.inputs[0].type in [aes.float32, aes.float64]:
+        if node.inputs[0].type in [ps.float32, ps.float64]:
             return f"""{z} =
                 {x} == 0.0
                 ? 0.0
@@ -35,11 +35,11 @@ def c_code(self, node, name, inputs, outputs, sub):
         raise NotImplementedError("only floatingpoint is implemented")
 
 
-scalar_xlogx = XlogX(aes.upgrade_to_float, name="scalar_xlogx")
+scalar_xlogx = XlogX(ps.upgrade_to_float, name="scalar_xlogx")
 xlogx = Elemwise(scalar_xlogx, name="xlogx")
 
 
-class XlogY0(aes.BinaryScalarOp):
+class XlogY0(ps.BinaryScalarOp):
     """
     Compute X * log(Y), with special case 0 log(0) = 0.
 
@@ -57,12 +57,12 @@ def impl(self, x, y):
     def grad(self, inputs, grads):
         x, y = inputs
         (gz,) = grads
-        return [gz * aes.log(y), gz * x / y]
+        return [gz * ps.log(y), gz * x / y]
 
     def c_code(self, node, name, inputs, outputs, sub):
         x, y = inputs
         (z,) = outputs
-        if node.inputs[0].type in [aes.float32, aes.float64]:
+        if node.inputs[0].type in [ps.float32, ps.float64]:
             return f"""{z} =
                 {x} == 0.0
                 ? 0.0
@@ -70,5 +70,5 @@ def c_code(self, node, name, inputs, outputs, sub):
         raise NotImplementedError("only floatingpoint is implemented")
 
 
-scalar_xlogy0 = XlogY0(aes.upgrade_to_float, name="scalar_xlogy0")
+scalar_xlogy0 = XlogY0(ps.upgrade_to_float, name="scalar_xlogy0")
 xlogy0 = Elemwise(scalar_xlogy0, name="xlogy0")
diff --git a/pytensor/typed_list/basic.py b/pytensor/typed_list/basic.py
index 54e41124b0..d3e0c24223 100644
--- a/pytensor/typed_list/basic.py
+++ b/pytensor/typed_list/basic.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.debugmode import _lessbroken_deepcopy
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Apply, Constant, Variable
@@ -78,7 +78,7 @@ def make_node(self, x, index):
                 index = Constant(SliceType(), index)
                 return Apply(self, [x, index], [x.type()])
             else:
-                index = at.constant(index, ndim=0, dtype="int64")
+                index = pt.constant(index, ndim=0, dtype="int64")
                 return Apply(self, [x, index], [x.ttype()])
         if isinstance(index.type, SliceType):
             return Apply(self, [x, index], [x.type()])
@@ -323,7 +323,7 @@ def make_node(self, x, index, toInsert):
         assert isinstance(x.type, TypedListType)
         assert x.ttype == toInsert.type
         if not isinstance(index, Variable):
-            index = at.constant(index, ndim=0, dtype="int64")
+            index = pt.constant(index, ndim=0, dtype="int64")
         else:
             assert index.dtype == "int64"
             assert isinstance(index, TensorVariable) and index.ndim == 0
@@ -650,7 +650,7 @@ def make_node(self, a):
         a2 = []
         for elem in a:
             if not isinstance(elem, Variable):
-                elem = at.as_tensor_variable(elem)
+                elem = pt.as_tensor_variable(elem)
             a2.append(elem)
         if not all(a2[0].type.is_super(elem.type) for elem in a2):
             raise TypeError("MakeList need all input variable to be of the same type.")
diff --git a/tests/compile/function/test_pfunc.py b/tests/compile/function/test_pfunc.py
index 7a13bc9994..b5cfaba5f0 100644
--- a/tests/compile/function/test_pfunc.py
+++ b/tests/compile/function/test_pfunc.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import UnusedInputError, get_mode
 from pytensor.compile.function import function, pfunc
 from pytensor.compile.function.pfunc import rebuild_collect_shared
@@ -10,7 +10,7 @@
 from pytensor.configdefaults import config
 from pytensor.graph.utils import MissingInputError
 from pytensor.misc.safe_asarray import _asarray
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.type import (
     bscalar,
     bvector,
@@ -110,7 +110,7 @@ def test_no_shared_as_input(self):
         with pytest.raises(
             TypeError, match=r"^Cannot use a shared variable \(w\) as explicit input"
         ):
-            pfunc([w], at_sum(w * w))
+            pfunc([w], pt_sum(w * w))
 
     def test_default_container(self):
         # Ensure it is possible to (implicitly) use a shared variable in a
@@ -119,7 +119,7 @@ def test_default_container(self):
         rng = np.random.default_rng(1827)
         w_init = rng.random(5)
         w = shared(w_init.copy(), "w")
-        reg = at_sum(w * w)
+        reg = pt_sum(w * w)
         f = pfunc([], reg)
 
         assert f() == np.sum(w_init * w_init)
@@ -826,7 +826,7 @@ def test_input_aliasing_affecting_inplace_operations(self):
                 In(m1, mutable=True),
                 In(m2, mutable=True),
             ],
-            at.dot((x * 2), m1) + at.dot((y * 3), m2),
+            pt.dot((x * 2), m1) + pt.dot((y * 3), m2),
         )
         # Test 1. If the same variable is given twice
 
@@ -889,7 +889,7 @@ def test_partial_input_aliasing_affecting_inplace_operations(self):
                 In(m2, mutable=True),
                 In(m3, mutable=True),
             ],
-            (at.dot((x * 2), m1) + at.dot((y * 3), m2) + at.dot((z * 4), m3)),
+            (pt.dot((x * 2), m1) + pt.dot((y * 3), m2) + pt.dot((z * 4), m3)),
         )
 
         # Compute bogus values
diff --git a/tests/compile/function/test_types.py b/tests/compile/function/test_types.py
index 59472b9cda..c07c79ee15 100644
--- a/tests/compile/function/test_types.py
+++ b/tests/compile/function/test_types.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import shared
 from pytensor.compile.debugmode import DebugMode, InvalidValueError
 from pytensor.compile.function import function
@@ -17,7 +17,7 @@
 from pytensor.graph.utils import MissingInputError
 from pytensor.link.vm import VMLinker
 from pytensor.tensor.math import dot
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tanh
 from pytensor.tensor.type import (
     dmatrix,
@@ -455,7 +455,7 @@ def test_swap_SharedVariable_with_given(self):
         x = vector("x")
         y = vector("y")
         # this formular has no sense but for a test
-        out = (at_sum(x) - y) ** 2
+        out = (pt_sum(x) - y) ** 2
         train = function(
             [i],
             out,
@@ -598,7 +598,7 @@ def test_shared_state_not_implicit(self):
 
     def test_constant_output(self):
         # Test that if the output is a constant, we respect the pytensor memory interface
-        f = function([], at.constant([4]))
+        f = function([], pt.constant([4]))
         # print f.maker.fgraph.toposort()
         out = f()
         assert (out == 4).all()
@@ -609,7 +609,7 @@ def test_constant_output(self):
         assert (out2 == 4).all()
 
         # Test that if the output is a constant and borrow, we respect the pytensor memory interface
-        f = function([], Out(at.constant([4]), borrow=True))
+        f = function([], Out(pt.constant([4]), borrow=True))
         # print f.maker.fgraph.toposort()
         out = f()
         assert (out == 4).all()
diff --git a/tests/compile/test_builders.py b/tests/compile/test_builders.py
index 8fdeb18470..c07f97e0fe 100644
--- a/tests/compile/test_builders.py
+++ b/tests/compile/test_builders.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import shared
 from pytensor.compile.builders import OpFromGraph
 from pytensor.compile.function import function
@@ -17,9 +17,9 @@
 from pytensor.printing import debugprint
 from pytensor.tensor.basic import as_tensor
 from pytensor.tensor.math import dot, exp
-from pytensor.tensor.math import round as at_round
+from pytensor.tensor.math import round as pt_round
 from pytensor.tensor.math import sigmoid
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.random.utils import RandomStream
 from pytensor.tensor.rewriting.shape import ShapeOptimizer
 from pytensor.tensor.shape import specify_shape
@@ -108,7 +108,7 @@ def test_grad(self, cls_ofg):
         e = x + y * z
         op = cls_ofg([x, y, z], [e])
         f = op(x, y, z)
-        f = f - grad(at_sum(f), y)
+        f = f - grad(pt_sum(f), y)
         fn = function([x, y, z], f)
         xv = np.ones((2, 2), dtype=config.floatX)
         yv = np.ones((2, 2), dtype=config.floatX) * 3
@@ -123,8 +123,8 @@ def test_grad_grad(self, cls_ofg):
         e = x + y * z
         op = cls_ofg([x, y, z], [e])
         f = op(x, y, z)
-        f = f - grad(at_sum(f), y)
-        f = f - grad(at_sum(f), y)
+        f = f - grad(pt_sum(f), y)
+        f = f - grad(pt_sum(f), y)
         fn = function([x, y, z], f)
         xv = np.ones((2, 2), dtype=config.floatX)
         yv = np.ones((2, 2), dtype=config.floatX) * 3
@@ -160,7 +160,7 @@ def test_shared_grad(self, cls_ofg):
         e = x + y * z + s
         op = cls_ofg([x, y, z], [e])
         f = op(x, y, z)
-        f = f - grad(at_sum(f), y)
+        f = f - grad(pt_sum(f), y)
         fn = function([x, y, z], f)
         xv = np.ones((2, 2), dtype=config.floatX)
         yv = np.ones((2, 2), dtype=config.floatX) * 3
@@ -169,7 +169,7 @@ def test_shared_grad(self, cls_ofg):
 
         # grad again the shared variable
         f = op(x, y, z)
-        f = f - grad(at_sum(f), s)
+        f = f - grad(pt_sum(f), s)
         fn = function([x, y, z], f)
         np.testing.assert_array_almost_equal(15.0 + s.get_value(), fn(xv, yv, zv), 4)
 
@@ -193,7 +193,7 @@ def go(inps, gs):
         # single override case (function or OfG instance)
         xx, yy = vector("xx"), vector("yy")
         for op in [op_mul, op_mul2]:
-            zz = at_sum(op(xx, yy))
+            zz = pt_sum(op(xx, yy))
             dx, dy = grad(zz, [xx, yy])
             fn = function([xx, yy], [dx, dy])
             xv = np.random.random((16,)).astype(config.floatX)
@@ -219,7 +219,7 @@ def go2(inps, gs):
             [x, w, b], [x * w + b], grad_overrides=[go1, go2, "default"]
         )
         xx, ww, bb = vector("xx"), vector("yy"), vector("bb")
-        zz = at_sum(op_linear(xx, ww, bb))
+        zz = pt_sum(op_linear(xx, ww, bb))
         dx, dw, db = grad(zz, [xx, ww, bb])
         fn = function([xx, ww, bb], [dx, dw, db])
         xv = np.random.random((16,)).astype(config.floatX)
@@ -236,7 +236,7 @@ def go2(inps, gs):
             [x * w + b],
             grad_overrides=[go1, NullType()(), DisconnectedType()()],
         )
-        zz2 = at_sum(op_linear2(xx, ww, bb))
+        zz2 = pt_sum(op_linear2(xx, ww, bb))
         dx2, dw2, db2 = grad(
             zz2,
             [xx, ww, bb],
@@ -265,12 +265,12 @@ def lop_ov(inps, outs, grads):
         op_lop_ov = cls_ofg([x, y_, dedy], [2.0 * y_ * (1.0 - y_) * dedy])
 
         xx = vector()
-        yy1 = at_sum(sigmoid(xx))
+        yy1 = pt_sum(sigmoid(xx))
         gyy1 = 2.0 * grad(yy1, xx)
 
         for ov in [lop_ov, op_lop_ov]:
             op = cls_ofg([x], [y], lop_overrides=ov)
-            yy2 = at_sum(op(xx))
+            yy2 = pt_sum(op(xx))
             gyy2 = grad(yy2, xx)
             fn = function([xx], [gyy1, gyy2])
 
@@ -340,7 +340,7 @@ def f1(x, y):
             del x
             # but we know how to backpropagate for x for some reasons
             # and we don't care about the gradient wrt y.
-            return y + at_round(y)
+            return y + pt_round(y)
 
         def f1_back(inputs, output_gradients):
             return [output_gradients[0], disconnected_type()]
@@ -474,7 +474,7 @@ def test_compute_test_value(self):
     def test_make_node_shared(self):
         """Make sure we can provide `OpFromGraph.make_node` new shared inputs and get a valid `OpFromGraph`."""
 
-        x = at.scalar("x")
+        x = pt.scalar("x")
         y = shared(1.0, name="y")
 
         test_ofg = OpFromGraph([x], [x + y], on_unused_input="ignore")
@@ -503,26 +503,26 @@ def test_make_node_shared(self):
 
     def test_shared_with_constant_input(self):
         """Make sure that a constant input can be given to an `OpFromGraph` instance."""
-        x = at.scalar("x")
+        x = pt.scalar("x")
         y = shared(1.0, name="y")
 
         test_ofg = OpFromGraph([x], [x + y])
         assert test_ofg.shared_inputs == [y]
 
-        out = test_ofg(at.as_tensor(1.0, dtype=config.floatX))
+        out = test_ofg(pt.as_tensor(1.0, dtype=config.floatX))
 
         out_fn = function([], out)
         assert np.array_equal(out_fn(), 2.0)
 
     def test_missing_input(self):
-        x = at.lscalar("x")
+        x = pt.lscalar("x")
 
         with pytest.raises(MissingInputError):
             OpFromGraph([], [x])
 
     def test_shared_to_nonshared_input(self):
         """Make sure that shared variables can be replaced with non-shared variables."""
-        x = at.scalar("x")
+        x = pt.scalar("x")
         y = shared(1.0, name="y")
 
         test_ofg = OpFromGraph([], [y])
diff --git a/tests/compile/test_debugmode.py b/tests/compile/test_debugmode.py
index 0719f093be..b1f8954fef 100644
--- a/tests/compile/test_debugmode.py
+++ b/tests/compile/test_debugmode.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.debugmode import (
     BadDestroyMap,
     BadThunkOutput,
@@ -40,8 +40,8 @@ def __init__(self, py_offset):
         self.py_offset = py_offset
 
     def make_node(self, a, b):
-        a = at.as_tensor_variable(a)
-        b = at.as_tensor_variable(b)
+        a = pt.as_tensor_variable(a)
+        b = pt.as_tensor_variable(b)
         assert a.type.dtype == "float64"
         assert a.type.dtype == b.type.dtype
         assert a.type.ndim == 1
@@ -124,7 +124,7 @@ def __init__(self, behaviour):
         self.behaviour = behaviour
 
     def make_node(self, a):
-        a_ = at.as_tensor_variable(a)
+        a_ = pt.as_tensor_variable(a)
         r = Apply(self, [a_], [a_.type()])
         return r
 
@@ -267,7 +267,7 @@ def insert_bigger_b_add(fgraph, node):
         if node.op == add:
             inputs = list(node.inputs)
             if inputs[-1].owner is None:
-                inputs[-1] = at.concatenate((inputs[-1], inputs[-1]))
+                inputs[-1] = pt.concatenate((inputs[-1], inputs[-1]))
                 return [node.op(*inputs)]
         return False
 
@@ -611,8 +611,8 @@ class BrokenCImplementationAdd(COp):
     __props__ = ()
 
     def make_node(self, a, b):
-        a = at.as_tensor_variable(a)
-        b = at.as_tensor_variable(b)
+        a = pt.as_tensor_variable(a)
+        b = pt.as_tensor_variable(b)
         assert a.type.dtype == "float32"
         assert a.type.dtype == b.type.dtype
         assert a.type.ndim == 2
@@ -709,7 +709,7 @@ class VecAsRowAndCol(Op):
 
     def make_node(self, v):
         if not isinstance(v, Variable):
-            v = at.as_tensor_variable(v)
+            v = pt.as_tensor_variable(v)
         assert v.type.ndim == 1
         type_class = type(v.type)
         out_r_type = type_class(dtype=v.dtype, shape=(1, None))
diff --git a/tests/compile/test_misc.py b/tests/compile/test_misc.py
index f84454ede6..b9b1d4a8fc 100644
--- a/tests/compile/test_misc.py
+++ b/tests/compile/test_misc.py
@@ -4,7 +4,7 @@
 from pytensor.compile.sharedvalue import shared
 from pytensor.gradient import grad
 from pytensor.tensor.math import dot, sigmoid
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.type import dvector
 
 
@@ -35,7 +35,7 @@ def __init__(
 
         self.hidden = sigmoid(dot(self.w1, self.input))
         self.output = dot(self.w2, self.hidden)
-        self.cost = at_sum((self.output - self.target) ** 2)
+        self.cost = pt_sum((self.output - self.target) ** 2)
 
         self.sgd_updates = {
             self.w1: self.w1 - self.lr * grad(self.cost, self.w1),
diff --git a/tests/compile/test_nanguardmode.py b/tests/compile/test_nanguardmode.py
index 812da9fed5..1070dca0d5 100644
--- a/tests/compile/test_nanguardmode.py
+++ b/tests/compile/test_nanguardmode.py
@@ -7,7 +7,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import shared
 from pytensor.compile.function import function
 from pytensor.compile.nanguardmode import NanGuardMode
@@ -61,7 +61,7 @@ def test_NanGuardMode():
     biga = np.tile(np.asarray(1e20).astype(config.floatX), (3, 4, 5))
 
     x = tensor3()
-    y = x[:, at.arange(2), at.arange(2), None]
+    y = x[:, pt.arange(2), pt.arange(2), None]
     fun = function([x], y, mode=NanGuardMode(nan_is_error=True, inf_is_error=True))
     fun(a)  # normal values
     try:
diff --git a/tests/compile/test_profiling.py b/tests/compile/test_profiling.py
index 7c63909e8d..fc2fc8efb1 100644
--- a/tests/compile/test_profiling.py
+++ b/tests/compile/test_profiling.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import ProfileStats
 from pytensor.compile.function import function
 from pytensor.configdefaults import config
@@ -28,7 +28,7 @@ def test_profiling(self):
             x = [fvector("val%i" % i) for i in range(3)]
 
             z = []
-            z += [at.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1)]
+            z += [pt.outer(x[i], x[i + 1]).sum(axis=1) for i in range(len(x) - 1)]
             z += [x[i] + x[i + 1] for i in range(len(x) - 1)]
 
             p = ProfileStats(False, gpu_checks=False)
@@ -84,7 +84,7 @@ def test_ifelse(self):
             a, b = scalars("a", "b")
             x, y = scalars("x", "y")
 
-            z = ifelse(at.lt(a, b), x * 2, y * 2)
+            z = ifelse(pt.lt(a, b), x * 2, y * 2)
 
             p = ProfileStats(False, gpu_checks=False)
 
diff --git a/tests/d3viz/models.py b/tests/d3viz/models.py
index 3329242ab1..334220d417 100644
--- a/tests/d3viz/models.py
+++ b/tests/d3viz/models.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import shared
 from pytensor.compile.builders import OpFromGraph
 from pytensor.tensor.special import softmax
@@ -21,11 +21,11 @@ def __init__(self, nfeatures=100, noutputs=10, nhiddens=50, rng=None):
         x = dmatrix("x")
         wh = shared(self.rng.normal(0, 1, (nfeatures, nhiddens)), borrow=True)
         bh = shared(np.zeros(nhiddens), borrow=True)
-        h = at.sigmoid(at.dot(x, wh) + bh)
+        h = pt.sigmoid(pt.dot(x, wh) + bh)
 
         wy = shared(self.rng.normal(0, 1, (nhiddens, noutputs)))
         by = shared(np.zeros(noutputs), borrow=True)
-        y = softmax(at.dot(h, wy) + by, axis=-1)
+        y = softmax(pt.dot(h, wy) + by, axis=-1)
         self.inputs = [x]
         self.outputs = [y]
 
@@ -46,7 +46,7 @@ def __init__(self):
 class Ofg:
     def __init__(self):
         x, y, z = scalars("xyz")
-        e = at.sigmoid((x + y + z) ** 2)
+        e = pt.sigmoid((x + y + z) ** 2)
         op = OpFromGraph([x, y, z], [e])
         e2 = op(x, y, z) + op(z, y, x)
 
@@ -57,7 +57,7 @@ def __init__(self):
 class OfgSimple:
     def __init__(self):
         x, y, z = scalars("xyz")
-        e = at.sigmoid((x + y + z) ** 2)
+        e = pt.sigmoid((x + y + z) ** 2)
         op = OpFromGraph([x, y, z], [e])
         e2 = op(x, y, z)
 
diff --git a/tests/graph/rewriting/test_kanren.py b/tests/graph/rewriting/test_kanren.py
index 7c2bced1de..1b5ffb1564 100644
--- a/tests/graph/rewriting/test_kanren.py
+++ b/tests/graph/rewriting/test_kanren.py
@@ -8,7 +8,7 @@
 from kanren.core import lall
 from unification import var, vars
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.graph.basic import Apply
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import Op
@@ -36,37 +36,37 @@ def clear_assoccomm():
 
 
 def test_kanren_basic():
-    A_at = at.matrix("A")
-    x_at = at.vector("x")
+    A_pt = pt.matrix("A")
+    x_pt = pt.vector("x")
 
-    y_at = at.dot(A_at, x_at)
+    y_pt = pt.dot(A_pt, x_pt)
 
     q = var()
-    res = list(run(None, q, eq(y_at, etuple(_dot, q, x_at))))
+    res = list(run(None, q, eq(y_pt, etuple(_dot, q, x_pt))))
 
-    assert res == [A_at]
+    assert res == [A_pt]
 
 
 def test_KanrenRelationSub_filters():
-    x_at = at.vector("x")
-    y_at = at.vector("y")
-    z_at = at.vector("z")
-    A_at = at.matrix("A")
+    x_pt = pt.vector("x")
+    y_pt = pt.vector("y")
+    z_pt = pt.vector("z")
+    A_pt = pt.matrix("A")
 
     fact(commutative, _dot)
-    fact(commutative, at.add)
-    fact(associative, at.add)
+    fact(commutative, pt.add)
+    fact(associative, pt.add)
 
-    Z_at = A_at.dot((x_at + y_at) + z_at)
+    Z_pt = A_pt.dot((x_pt + y_pt) + z_pt)
 
-    fgraph = FunctionGraph(outputs=[Z_at], clone=False)
+    fgraph = FunctionGraph(outputs=[Z_pt], clone=False)
 
     def distributes(in_lv, out_lv):
         A_lv, x_lv, y_lv, z_lv = vars(4)
         return lall(
             # lhs == A * (x + y + z)
             eq_assoccomm(
-                etuple(_dot, A_lv, etuple(at.add, x_lv, etuple(at.add, y_lv, z_lv))),
+                etuple(_dot, A_lv, etuple(pt.add, x_lv, etuple(pt.add, y_lv, z_lv))),
                 in_lv,
             ),
             # This relation does nothing but provide us with a means of
@@ -79,9 +79,9 @@ def results_filter(results):
         _results = [eval_if_etuple(v) for v in results]
 
         # Make sure that at least a couple permutations are present
-        assert (A_at, x_at, y_at, z_at) in _results
-        assert (A_at, y_at, x_at, z_at) in _results
-        assert (A_at, z_at, x_at, y_at) in _results
+        assert (A_pt, x_pt, y_pt, z_pt) in _results
+        assert (A_pt, y_pt, x_pt, z_pt) in _results
+        assert (A_pt, z_pt, x_pt, y_pt) in _results
 
         return None
 
@@ -121,15 +121,15 @@ def relation(in_lv, out_lv):
 
 def test_KanrenRelationSub_dot():
     """Make sure we can run miniKanren "optimizations" over a graph until a fixed-point/normal-form is reached."""
-    x_at = at.vector("x")
-    c_at = at.vector("c")
-    d_at = at.vector("d")
-    A_at = at.matrix("A")
-    B_at = at.matrix("B")
+    x_pt = pt.vector("x")
+    c_pt = pt.vector("c")
+    d_pt = pt.vector("d")
+    A_pt = pt.matrix("A")
+    B_pt = pt.matrix("B")
 
-    Z_at = A_at.dot(x_at + B_at.dot(c_at + d_at))
+    Z_pt = A_pt.dot(x_pt + B_pt.dot(c_pt + d_pt))
 
-    fgraph = FunctionGraph(outputs=[Z_at], clone=False)
+    fgraph = FunctionGraph(outputs=[Z_pt], clone=False)
 
     assert isinstance(fgraph.outputs[0].owner.op, Dot)
 
@@ -137,13 +137,13 @@ def distributes(in_lv, out_lv):
         return lall(
             # lhs == A * (x + b)
             eq(
-                etuple(_dot, var("A"), etuple(at.add, var("x"), var("b"))),
+                etuple(_dot, var("A"), etuple(pt.add, var("x"), var("b"))),
                 in_lv,
             ),
             # rhs == A * x + A * b
             eq(
                 etuple(
-                    at.add,
+                    pt.add,
                     etuple(_dot, var("A"), var("x")),
                     etuple(_dot, var("A"), var("b")),
                 ),
@@ -158,10 +158,10 @@ def distributes(in_lv, out_lv):
     fgraph_opt = rewrite_graph(fgraph, custom_rewrite=distribute_opt)
     (expr_opt,) = fgraph_opt.outputs
 
-    assert expr_opt.owner.op == at.add
+    assert expr_opt.owner.op == pt.add
     assert isinstance(expr_opt.owner.inputs[0].owner.op, Dot)
-    assert fgraph_opt.inputs[0] is A_at
+    assert fgraph_opt.inputs[0] is A_pt
     assert expr_opt.owner.inputs[0].owner.inputs[0].name == "A"
-    assert expr_opt.owner.inputs[1].owner.op == at.add
+    assert expr_opt.owner.inputs[1].owner.op == pt.add
     assert isinstance(expr_opt.owner.inputs[1].owner.inputs[0].owner.op, Dot)
     assert isinstance(expr_opt.owner.inputs[1].owner.inputs[1].owner.op, Dot)
diff --git a/tests/graph/rewriting/test_unify.py b/tests/graph/rewriting/test_unify.py
index 088939d0e7..a152fcee17 100644
--- a/tests/graph/rewriting/test_unify.py
+++ b/tests/graph/rewriting/test_unify.py
@@ -7,8 +7,8 @@
 from unification import reify, unify, var
 from unification.variable import Var
 
-import pytensor.scalar as aes
-import pytensor.tensor as at
+import pytensor.scalar as ps
+import pytensor.tensor as pt
 from pytensor.graph.basic import Apply, Constant, equal_computations
 from pytensor.graph.op import Op
 from pytensor.graph.rewriting.unify import ConstrainedVar, convert_strs_to_vars
@@ -23,7 +23,7 @@ def __init__(self, a):
         self.a = a
 
     def make_node(self, *inputs):
-        return Apply(self, list(inputs), [at.vector()])
+        return Apply(self, list(inputs), [pt.vector()])
 
     def perform(self, node, inputs, outputs):
         raise NotImplementedError()
@@ -34,7 +34,7 @@ def __init__(self, a):
         self.a = a
 
     def make_node(self, *inputs):
-        return Apply(self, list(inputs), [at.vector()])
+        return Apply(self, list(inputs), [pt.vector()])
 
     def perform(self, node, inputs, outputs):
         raise NotImplementedError()
@@ -49,22 +49,22 @@ def __hash__(self):
 
 
 def test_cons():
-    x_at = at.vector("x")
-    y_at = at.vector("y")
+    x_pt = pt.vector("x")
+    y_pt = pt.vector("y")
 
-    z_at = x_at + y_at
+    z_pt = x_pt + y_pt
 
-    res = car(z_at)
-    assert res == z_at.owner.op
+    res = car(z_pt)
+    assert res == z_pt.owner.op
 
-    res = cdr(z_at)
-    assert res == [x_at, y_at]
+    res = cdr(z_pt)
+    assert res == [x_pt, y_pt]
 
     with pytest.raises(ConsError):
-        car(x_at)
+        car(x_pt)
 
     with pytest.raises(ConsError):
-        cdr(x_at)
+        cdr(x_pt)
 
     op1 = CustomOp(1)
 
@@ -84,44 +84,44 @@ def test_cons():
     with pytest.raises(ConsError):
         cdr(op1_np)
 
-    atype_at = aes.float64
-    car_res = car(atype_at)
-    cdr_res = cdr(atype_at)
-    assert car_res is type(atype_at)
-    assert cdr_res == [atype_at.dtype]
+    atype_pt = ps.float64
+    car_res = car(atype_pt)
+    cdr_res = cdr(atype_pt)
+    assert car_res is type(atype_pt)
+    assert cdr_res == [atype_pt.dtype]
 
-    atype_at = at.lvector
-    car_res = car(atype_at)
-    cdr_res = cdr(atype_at)
-    assert car_res is type(atype_at)
-    assert cdr_res == [atype_at.dtype, atype_at.shape]
+    atype_pt = pt.lvector
+    car_res = car(atype_pt)
+    cdr_res = cdr(atype_pt)
+    assert car_res is type(atype_pt)
+    assert cdr_res == [atype_pt.dtype, atype_pt.shape]
 
 
 def test_etuples():
-    x_at = at.vector("x")
-    y_at = at.vector("y")
+    x_pt = pt.vector("x")
+    y_pt = pt.vector("y")
 
-    z_at = etuple(x_at, y_at)
+    z_pt = etuple(x_pt, y_pt)
 
-    res = apply(at.add, z_at)
+    res = apply(pt.add, z_pt)
 
-    assert res.owner.op == at.add
-    assert res.owner.inputs == [x_at, y_at]
+    assert res.owner.op == pt.add
+    assert res.owner.inputs == [x_pt, y_pt]
 
-    w_at = etuple(at.add, x_at, y_at)
+    w_pt = etuple(pt.add, x_pt, y_pt)
 
-    res = w_at.evaled_obj
-    assert res.owner.op == at.add
-    assert res.owner.inputs == [x_at, y_at]
+    res = w_pt.evaled_obj
+    assert res.owner.op == pt.add
+    assert res.owner.inputs == [x_pt, y_pt]
 
     # This `Op` doesn't expand into an `etuple` (i.e. it's "atomic")
     op1_np = CustomOpNoProps(1)
 
-    res = apply(op1_np, z_at)
+    res = apply(op1_np, z_pt)
     assert res.owner.op == op1_np
 
-    q_at = op1_np(x_at, y_at)
-    res = etuplize(q_at)
+    q_pt = op1_np(x_pt, y_pt)
+    res = etuplize(q_pt)
     assert res[0] == op1_np
 
     with pytest.raises(TypeError):
@@ -136,22 +136,22 @@ def perform(self, node, inputs, outputs):
             outputs[0] = np.array(inputs[0])
             outputs[1] = np.array(inputs[0])
 
-    x_at = at.vector("x")
+    x_pt = pt.vector("x")
     op1_np = MyMultiOutOp()
-    res = apply(op1_np, etuple(x_at))
+    res = apply(op1_np, etuple(x_pt))
     assert len(res) == 2
     assert res[0].owner.op == op1_np
     assert res[1].owner.op == op1_np
 
 
 def test_unify_Variable():
-    x_at = at.vector("x")
-    y_at = at.vector("y")
+    x_pt = pt.vector("x")
+    y_pt = pt.vector("y")
 
-    z_at = x_at + y_at
+    z_pt = x_pt + y_pt
 
     # `Variable`, `Variable`
-    s = unify(z_at, z_at)
+    s = unify(z_pt, z_pt)
     assert s == {}
 
     # These `Variable`s have no owners
@@ -164,48 +164,48 @@ def test_unify_Variable():
     assert s is False
 
     op_lv = var()
-    z_pat_et = etuple(op_lv, x_at, y_at)
+    z_ppt_et = etuple(op_lv, x_pt, y_pt)
 
     # `Variable`, `ExpressionTuple`
-    s = unify(z_at, z_pat_et, {})
+    s = unify(z_pt, z_ppt_et, {})
 
     assert op_lv in s
-    assert s[op_lv] == z_at.owner.op
+    assert s[op_lv] == z_pt.owner.op
 
-    res = reify(z_pat_et, s)
+    res = reify(z_ppt_et, s)
 
     assert isinstance(res, ExpressionTuple)
-    assert equal_computations([res.evaled_obj], [z_at])
+    assert equal_computations([res.evaled_obj], [z_pt])
 
-    z_et = etuple(at.add, x_at, y_at)
+    z_et = etuple(pt.add, x_pt, y_pt)
 
     # `ExpressionTuple`, `ExpressionTuple`
-    s = unify(z_et, z_pat_et, {})
+    s = unify(z_et, z_ppt_et, {})
 
     assert op_lv in s
     assert s[op_lv] == z_et[0]
 
-    res = reify(z_pat_et, s)
+    res = reify(z_ppt_et, s)
 
     assert isinstance(res, ExpressionTuple)
     assert equal_computations([res.evaled_obj], [z_et.evaled_obj])
 
     # `ExpressionTuple`, `Variable`
-    s = unify(z_et, x_at, {})
+    s = unify(z_et, x_pt, {})
     assert s is False
 
     # This `Op` doesn't expand into an `ExpressionTuple`
     op1_np = CustomOpNoProps(1)
 
-    q_at = op1_np(x_at, y_at)
+    q_pt = op1_np(x_pt, y_pt)
 
     a_lv = var()
     b_lv = var()
     # `Variable`, `ExpressionTuple`
-    s = unify(q_at, etuple(op1_np, a_lv, b_lv))
+    s = unify(q_pt, etuple(op1_np, a_lv, b_lv))
 
-    assert s[a_lv] == x_at
-    assert s[b_lv] == y_at
+    assert s[a_lv] == x_pt
+    assert s[b_lv] == y_pt
 
 
 def test_unify_Op():
@@ -237,11 +237,11 @@ def test_unify_Op():
 
 def test_unify_Constant():
     # Make sure `Constant` unification works
-    c1_at = at.as_tensor(np.r_[1, 2])
-    c2_at = at.as_tensor(np.r_[1, 2])
+    c1_pt = pt.as_tensor(np.r_[1, 2])
+    c2_pt = pt.as_tensor(np.r_[1, 2])
 
     # `Constant`, `Constant`
-    s = unify(c1_at, c2_at)
+    s = unify(c1_pt, c2_pt)
     assert s == {}
 
 
@@ -302,18 +302,18 @@ def test_ConstrainedVar():
     s = unify(x_lv, cvar, s_orig)
     assert s is False
 
-    x_at = at.vector("x")
-    y_at = at.vector("y")
+    x_pt = pt.vector("x")
+    y_pt = pt.vector("y")
     op1_np = CustomOpNoProps(1)
-    r_at = etuple(op1_np, x_at, y_at)
+    r_pt = etuple(op1_np, x_pt, y_pt)
 
     def constraint(x):
         return isinstance(x, tuple)
 
     a_lv = ConstrainedVar(constraint)
-    res = reify(etuple(op1_np, a_lv), {a_lv: r_at})
+    res = reify(etuple(op1_np, a_lv), {a_lv: r_pt})
 
-    assert res[1] == r_at
+    assert res[1] == r_pt
 
 
 def test_convert_strs_to_vars():
@@ -321,18 +321,18 @@ def test_convert_strs_to_vars():
     assert isinstance(res, Var)
     assert res.token == "a"
 
-    x_at = at.vector()
-    y_at = at.vector()
-    res = convert_strs_to_vars((("a", x_at), y_at))
-    assert res == etuple(etuple(var("a"), x_at), y_at)
+    x_pt = pt.vector()
+    y_pt = pt.vector()
+    res = convert_strs_to_vars((("a", x_pt), y_pt))
+    assert res == etuple(etuple(var("a"), x_pt), y_pt)
 
     def constraint(x):
         return isinstance(x, str)
 
     res = convert_strs_to_vars(
-        (({"pattern": "a", "constraint": constraint}, x_at), y_at)
+        (({"pattern": "a", "constraint": constraint}, x_pt), y_pt)
     )
-    assert res == etuple(etuple(ConstrainedVar(constraint, "a"), x_at), y_at)
+    assert res == etuple(etuple(ConstrainedVar(constraint, "a"), x_pt), y_pt)
 
     # Make sure constrained logic variables are the same across distinct uses
     # of their string names
diff --git a/tests/graph/test_basic.py b/tests/graph/test_basic.py
index 830def5355..857880048b 100644
--- a/tests/graph/test_basic.py
+++ b/tests/graph/test_basic.py
@@ -5,7 +5,7 @@
 import pytest
 
 from pytensor import shared
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.graph.basic import (
     Apply,
     NominalVariable,
@@ -164,7 +164,7 @@ def test_constant(self):
         _, new = clone([r1, r2, r5], node.outputs, False)
         new_node = new[0].owner
         new_node.inputs = [MyVariable(7), MyVariable(8)]
-        c1 = at.constant(1.5)
+        c1 = pt.constant(1.5)
 
         i, o = clone([c1], [c1])
         assert i[0] is c1 and o[0] is c1
@@ -334,13 +334,13 @@ def test_constant(self):
         # Get counter value
         autoname_id = next(Variable.__count__)
         Variable.__count__ = count(autoname_id)
-        r1 = at.constant(1.5)
+        r1 = pt.constant(1.5)
         assert r1.auto_name == "auto_" + str(autoname_id), (
             r1.auto_name,
             "auto_" + str(autoname_id),
         )
 
-        r3 = at.constant(1.6)
+        r3 = pt.constant(1.6)
         assert r3.auto_name == "auto_" + str(autoname_id + 1)
 
     def test_tensorvariable(self):
@@ -377,14 +377,14 @@ def test_equal_computations():
         equal_computations([a], [a, b])
 
     assert equal_computations([a], [a])
-    assert equal_computations([at.as_tensor(1)], [at.as_tensor(1)])
+    assert equal_computations([pt.as_tensor(1)], [pt.as_tensor(1)])
     assert not equal_computations([b], [a])
-    assert not equal_computations([at.as_tensor(1)], [at.as_tensor(2)])
+    assert not equal_computations([pt.as_tensor(1)], [pt.as_tensor(2)])
 
     assert equal_computations([2], [2])
     assert equal_computations([np.r_[2, 1]], [np.r_[2, 1]])
-    assert equal_computations([np.r_[2, 1]], [at.as_tensor(np.r_[2, 1])])
-    assert equal_computations([at.as_tensor(np.r_[2, 1])], [np.r_[2, 1]])
+    assert equal_computations([np.r_[2, 1]], [pt.as_tensor(np.r_[2, 1])])
+    assert equal_computations([pt.as_tensor(np.r_[2, 1])], [np.r_[2, 1]])
 
     assert not equal_computations([2], [a])
     assert not equal_computations([np.r_[2, 1]], [a])
@@ -564,13 +564,13 @@ def test_get_var_by_name():
 def test_clone_new_inputs():
     """Make sure that `Apply.clone_with_new_inputs` properly handles `Type` changes."""
 
-    x = at.tensor(dtype=np.float64, shape=(None,))
-    y = at.tensor(dtype=np.float64, shape=(1,))
+    x = pt.tensor(dtype=np.float64, shape=(None,))
+    y = pt.tensor(dtype=np.float64, shape=(1,))
 
-    z = at.add(x, y)
+    z = pt.add(x, y)
     assert z.type.shape == (None,)
 
-    x_new = at.tensor(dtype=np.float64, shape=(1,))
+    x_new = pt.tensor(dtype=np.float64, shape=(1,))
 
     # The output nodes should be reconstructed, because the input types' static
     # shape information increased in specificity
@@ -583,7 +583,7 @@ def test_clone_new_inputs():
     # Now, attempt to decrease the specificity of the first input's static
     # shape information, but, because we're using strict conversion, we
     # shouldn't lose any information
-    z = at.add(x_new, y)
+    z = pt.add(x_new, y)
     assert z.type.shape == (1,)
 
     z_node_new = z.owner.clone_with_new_inputs([x, y], strict=True)
@@ -800,7 +800,7 @@ def test_single_pass_per_node(self, mocker):
         import pytensor.graph.basic
 
         inspect = mocker.spy(pytensor.graph.basic, "variable_depends_on")
-        x = at.dmatrix("x")
+        x = pt.dmatrix("x")
         m = x.shape[0][None, None]
 
         f = x / m
diff --git a/tests/graph/test_compute_test_value.py b/tests/graph/test_compute_test_value.py
index c04a4b1722..ea59ff68f8 100644
--- a/tests/graph/test_compute_test_value.py
+++ b/tests/graph/test_compute_test_value.py
@@ -2,8 +2,8 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
-from pytensor import scalar as aes
+import pytensor.tensor as pt
+from pytensor import scalar as ps
 from pytensor.configdefaults import config
 from pytensor.graph import utils
 from pytensor.graph.basic import Apply
@@ -28,7 +28,7 @@ class IncOneC(COp):
     __props__ = ()
 
     def make_node(self, input):
-        input = aes.as_scalar(input)
+        input = ps.as_scalar(input)
         output = input.type()
         return Apply(self, [input], [output])
 
@@ -173,7 +173,7 @@ def test_empty_elemwise(self):
         assert _allclose(f(), z.tag.test_value)
 
     def test_constant(self):
-        x = at.constant(np.random.random((2, 3)), dtype=config.floatX)
+        x = pt.constant(np.random.random((2, 3)), dtype=config.floatX)
         y = pytensor.shared(np.random.random((3, 6)).astype(config.floatX), "y")
 
         # should work
@@ -183,7 +183,7 @@ def test_constant(self):
         assert _allclose(f(), z.tag.test_value)
 
         # this test should fail
-        x = at.constant(np.random.random((2, 4)), dtype=config.floatX)
+        x = pt.constant(np.random.random((2, 4)), dtype=config.floatX)
         with pytest.raises(ValueError):
             dot(x, y)
 
@@ -220,7 +220,7 @@ def fx(prior_result, A):
 
         # Symbolic description of the result
         result, updates = pytensor.scan(
-            fn=fx, outputs_info=at.ones_like(A), non_sequences=A, n_steps=k
+            fn=fx, outputs_info=pt.ones_like(A), non_sequences=A, n_steps=k
         )
 
         # We only care about A**k, but scan has provided us with A**1 through A**k.
@@ -241,7 +241,7 @@ def fx(prior_result, A):
 
         with pytest.raises(ValueError) as e:
             pytensor.scan(
-                fn=fx, outputs_info=at.ones_like(A), non_sequences=A, n_steps=k
+                fn=fx, outputs_info=pt.ones_like(A), non_sequences=A, n_steps=k
             )
 
         assert str(e.traceback[0].path).endswith("test_compute_test_value.py")
@@ -261,12 +261,12 @@ def fx(prior_result, A):
 
         with pytest.raises(ValueError):
             pytensor.scan(
-                fn=fx, outputs_info=at.ones_like(A.T), non_sequences=A, n_steps=k
+                fn=fx, outputs_info=pt.ones_like(A.T), non_sequences=A, n_steps=k
             )
 
         with pytest.raises(ValueError, match="^could not broadcast input"):
             pytensor.scan(
-                fn=fx, outputs_info=at.ones_like(A.T), non_sequences=A, n_steps=k
+                fn=fx, outputs_info=pt.ones_like(A.T), non_sequences=A, n_steps=k
             )
 
     def test_no_c_code(self):
@@ -278,7 +278,7 @@ class IncOnePython(COp):
             __props__ = ()
 
             def make_node(self, input):
-                input = aes.as_scalar(input)
+                input = ps.as_scalar(input)
                 output = input.type()
                 return Apply(self, [input], [output])
 
@@ -287,7 +287,7 @@ def perform(self, node, inputs, outputs):
                 (output,) = outputs
                 output[0] = input + 1
 
-        i = aes.int32("i")
+        i = ps.int32("i")
         i.tag.test_value = 3
 
         o = IncOnePython()(i)
@@ -303,7 +303,7 @@ def perform(self, node, inputs, outputs):
         not config.cxx, reason="G++ not available, so we need to skip this test."
     )
     def test_no_perform(self):
-        i = aes.int32("i")
+        i = ps.int32("i")
         i.tag.test_value = 3
 
         # Class IncOneC is defined outside of the TestComputeTestValue
diff --git a/tests/graph/test_op.py b/tests/graph/test_op.py
index 40a8c0661c..59d81ad59e 100644
--- a/tests/graph/test_op.py
+++ b/tests/graph/test_op.py
@@ -3,7 +3,7 @@
 
 import pytensor
 import pytensor.graph.op as op
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import shared
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Apply, Variable
@@ -142,7 +142,7 @@ def test_test_value_ndarray():
 
 
 def test_test_value_constant():
-    x = at.as_tensor_variable(np.zeros((5, 5)))
+    x = pt.as_tensor_variable(np.zeros((5, 5)))
     v = op.get_test_value(x)
 
     assert np.all(v == np.zeros((5, 5)))
@@ -224,11 +224,11 @@ def perform(self, node, inputs, outputs):
 def test_op_input_broadcastable():
     # Test that we can create an op with a broadcastable subtype as input
     class SomeOp(pytensor.tensor.Op):
-        itypes = [at.dvector]
-        otypes = [at.dvector]
+        itypes = [pt.dvector]
+        otypes = [pt.dvector]
 
         def perform(self, *_):
             raise NotImplementedError()
 
-    x = at.TensorType(dtype="float64", shape=(1,))("x")
-    assert SomeOp()(x).type == at.dvector
+    x = pt.TensorType(dtype="float64", shape=(1,))("x")
+    assert SomeOp()(x).type == pt.dvector
diff --git a/tests/link/c/test_cmodule.py b/tests/link/c/test_cmodule.py
index 574e224368..0e99fb0820 100644
--- a/tests/link/c/test_cmodule.py
+++ b/tests/link/c/test_cmodule.py
@@ -15,7 +15,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.function import function
 from pytensor.compile.ops import DeepCopyOp
 from pytensor.configdefaults import config
@@ -360,7 +360,7 @@ def f_build(factor):
             # Some of the caching issues arise during constant folding within the
             # optimization passes, so we need these config changes to prevent the
             # exceptions from being caught
-            a = at.vector()
+            a = pt.vector()
             f = pytensor.function([a], factor * a)
             return f(np.array([1], dtype=config.floatX))
 
diff --git a/tests/link/c/test_op.py b/tests/link/c/test_op.py
index c9f40bbb72..35e81d8280 100644
--- a/tests/link/c/test_op.py
+++ b/tests/link/c/test_op.py
@@ -8,7 +8,7 @@
 import pytest
 
 import pytensor
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Apply
 from pytensor.graph.utils import MethodNotDefined
@@ -18,7 +18,7 @@
 test_dir = Path(__file__).parent.absolute()
 
 externalcop_test_code = f"""
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.graph.basic import Apply
 from pytensor.link.c.params_type import ParamsType
 from pytensor.link.c.op import ExternalCOp
@@ -44,7 +44,7 @@ def __init__(self, a, b, c):
         self.c = c
 
     def make_node(self, x):
-        x = at.as_tensor_variable(x)
+        x = pt.as_tensor_variable(x)
         return Apply(self, [x], [x.type()])
 
     def perform(self, node, inputs, output_storage, coefficients):
@@ -70,7 +70,7 @@ def do_constant_folding(self, fgraph, node):
 
     # The input only serves to distinguish thunks
     def make_node(self, i):
-        return Apply(self, [i], [aes.uint64()])
+        return Apply(self, [i], [ps.uint64()])
 
     def c_support_code_struct(self, node, name):
         return f"npy_uint64 counter{name};"
@@ -123,7 +123,7 @@ class IncOnePython(COp):
             __props__ = ()
 
             def make_node(self, input):
-                input = aes.as_scalar(input)
+                input = ps.as_scalar(input)
                 output = input.type()
                 return Apply(self, [input], [output])
 
@@ -132,7 +132,7 @@ def perform(self, node, inputs, outputs):
                 (output,) = outputs
                 output[0] = input + 1
 
-        i = aes.int32("i")
+        i = ps.int32("i")
         o = IncOnePython()(i)
 
         # Check that the c_code function is not implemented
@@ -159,7 +159,7 @@ class IncOneC(COp):
             __props__ = ()
 
             def make_node(self, input):
-                input = aes.as_scalar(input)
+                input = ps.as_scalar(input)
                 output = input.type()
                 return Apply(self, [input], [output])
 
@@ -171,7 +171,7 @@ def c_code(self, node, name, inputs, outputs, sub):
             def perform(self, *args, **kwargs):
                 raise NotImplementedError("No Python implementation available.")
 
-        i = aes.int32("i")
+        i = ps.int32("i")
         o = IncOneC()(i)
 
         # Check that the perform function is not implemented
diff --git a/tests/link/c/test_params_type.py b/tests/link/c/test_params_type.py
index 6ca9506f4f..a7aa76a3a7 100644
--- a/tests/link/c/test_params_type.py
+++ b/tests/link/c/test_params_type.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.graph.basic import Apply
 from pytensor.link.c.op import COp, ExternalCOp
 from pytensor.link.c.params_type import Params, ParamsType
@@ -28,7 +28,7 @@ def __init__(self, a, b, c):
         self.c = c
 
     def make_node(self, x):
-        x = at.as_tensor_variable(x)
+        x = pt.as_tensor_variable(x)
         return Apply(self, [x], [x.type()])
 
     def perform(self, node, inputs, output_storage):
@@ -115,7 +115,7 @@ def __init__(self, a, b, c):
         self.c = c
 
     def make_node(self, x):
-        x = at.as_tensor_variable(x)
+        x = pt.as_tensor_variable(x)
         return Apply(self, [x], [x.type()])
 
     def perform(self, node, inputs, output_storage):
diff --git a/tests/link/c/test_type.py b/tests/link/c/test_type.py
index d12570351f..6ff7af39fd 100644
--- a/tests/link/c/test_type.py
+++ b/tests/link/c/test_type.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pytensor
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.graph.basic import Apply
 from pytensor.link.c.op import COp
 from pytensor.link.c.type import CDataType, CEnumType, EnumList, EnumType
@@ -115,7 +115,7 @@ def get_params(self, node):
         return self.op_chosen
 
     def make_node(self, a, b):
-        return Apply(self, [aes.as_scalar(a), aes.as_scalar(b)], [aes.float64()])
+        return Apply(self, [ps.as_scalar(a), ps.as_scalar(b)], [ps.float64()])
 
     def perform(self, node, inputs, outputs):
         op = self.params_type.filter(self.get_params(node))
@@ -190,7 +190,7 @@ def get_params(self, node):
         return self.python_value
 
     def make_node(self):
-        return Apply(self, [], [aes.uint32()])
+        return Apply(self, [], [ps.uint32()])
 
     def perform(self, *args, **kwargs):
         raise NotImplementedError()
@@ -263,8 +263,8 @@ def test_enum_class(self):
             )
 
     def test_op_with_enumlist(self):
-        a = aes.int32()
-        b = aes.int32()
+        a = ps.int32()
+        b = ps.int32()
         c_add = MyOpEnumList("+")(a, b)
         c_sub = MyOpEnumList("-")(a, b)
         c_multiply = MyOpEnumList("*")(a, b)
diff --git a/tests/link/jax/test_elemwise.py b/tests/link/jax/test_elemwise.py
index e0002d3873..0f08944814 100644
--- a/tests/link/jax/test_elemwise.py
+++ b/tests/link/jax/test_elemwise.py
@@ -3,15 +3,15 @@
 import scipy.special
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile import get_mode
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import get_test_value
-from pytensor.tensor import elemwise as at_elemwise
-from pytensor.tensor.math import all as at_all
+from pytensor.tensor import elemwise as pt_elemwise
+from pytensor.tensor.math import all as pt_all
 from pytensor.tensor.math import prod
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.special import SoftmaxGrad, log_softmax, softmax
 from pytensor.tensor.type import matrix, tensor, vector, vectors
 from tests.link.jax.test_basic import compare_jax_and_py
@@ -23,59 +23,59 @@ def test_elemwise_runtime_broadcast():
 
 
 def test_jax_Dimshuffle():
-    a_at = matrix("a")
+    a_pt = matrix("a")
 
-    x = a_at.T
-    x_fg = FunctionGraph([a_at], [x])
+    x = a_pt.T
+    x_fg = FunctionGraph([a_pt], [x])
     compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0], [3.0, 4.0]].astype(config.floatX)])
 
-    x = a_at.dimshuffle([0, 1, "x"])
-    x_fg = FunctionGraph([a_at], [x])
+    x = a_pt.dimshuffle([0, 1, "x"])
+    x_fg = FunctionGraph([a_pt], [x])
     compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0], [3.0, 4.0]].astype(config.floatX)])
 
-    a_at = tensor(dtype=config.floatX, shape=(None, 1))
-    x = a_at.dimshuffle((0,))
-    x_fg = FunctionGraph([a_at], [x])
+    a_pt = tensor(dtype=config.floatX, shape=(None, 1))
+    x = a_pt.dimshuffle((0,))
+    x_fg = FunctionGraph([a_pt], [x])
     compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0, 3.0, 4.0]].astype(config.floatX)])
 
-    a_at = tensor(dtype=config.floatX, shape=(None, 1))
-    x = at_elemwise.DimShuffle([False, True], (0,))(a_at)
-    x_fg = FunctionGraph([a_at], [x])
+    a_pt = tensor(dtype=config.floatX, shape=(None, 1))
+    x = pt_elemwise.DimShuffle([False, True], (0,))(a_pt)
+    x_fg = FunctionGraph([a_pt], [x])
     compare_jax_and_py(x_fg, [np.c_[[1.0, 2.0, 3.0, 4.0]].astype(config.floatX)])
 
 
 def test_jax_CAReduce():
-    a_at = vector("a")
-    a_at.tag.test_value = np.r_[1, 2, 3].astype(config.floatX)
+    a_pt = vector("a")
+    a_pt.tag.test_value = np.r_[1, 2, 3].astype(config.floatX)
 
-    x = at_sum(a_at, axis=None)
-    x_fg = FunctionGraph([a_at], [x])
+    x = pt_sum(a_pt, axis=None)
+    x_fg = FunctionGraph([a_pt], [x])
 
     compare_jax_and_py(x_fg, [np.r_[1, 2, 3].astype(config.floatX)])
 
-    a_at = matrix("a")
-    a_at.tag.test_value = np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)
+    a_pt = matrix("a")
+    a_pt.tag.test_value = np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)
 
-    x = at_sum(a_at, axis=0)
-    x_fg = FunctionGraph([a_at], [x])
+    x = pt_sum(a_pt, axis=0)
+    x_fg = FunctionGraph([a_pt], [x])
 
     compare_jax_and_py(x_fg, [np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)])
 
-    x = at_sum(a_at, axis=1)
-    x_fg = FunctionGraph([a_at], [x])
+    x = pt_sum(a_pt, axis=1)
+    x_fg = FunctionGraph([a_pt], [x])
 
     compare_jax_and_py(x_fg, [np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)])
 
-    a_at = matrix("a")
-    a_at.tag.test_value = np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)
+    a_pt = matrix("a")
+    a_pt.tag.test_value = np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)
 
-    x = prod(a_at, axis=0)
-    x_fg = FunctionGraph([a_at], [x])
+    x = prod(a_pt, axis=0)
+    x_fg = FunctionGraph([a_pt], [x])
 
     compare_jax_and_py(x_fg, [np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)])
 
-    x = at_all(a_at)
-    x_fg = FunctionGraph([a_at], [x])
+    x = pt_all(a_pt)
+    x_fg = FunctionGraph([a_pt], [x])
 
     compare_jax_and_py(x_fg, [np.c_[[1, 2, 3], [1, 2, 3]].astype(config.floatX)])
 
@@ -112,10 +112,10 @@ def test_softmax_grad(axis):
 @pytest.mark.parametrize("size", [(10, 10), (1000, 1000), (10000, 10000)])
 @pytest.mark.parametrize("axis", [0, 1])
 def test_logsumexp_benchmark(size, axis, benchmark):
-    X = at.matrix("X")
-    X_max = at.max(X, axis=axis, keepdims=True)
-    X_max = at.switch(at.isinf(X_max), 0, X_max)
-    X_lse = at.log(at.sum(at.exp(X - X_max), axis=axis, keepdims=True)) + X_max
+    X = pt.matrix("X")
+    X_max = pt.max(X, axis=axis, keepdims=True)
+    X_max = pt.switch(pt.isinf(X_max), 0, X_max)
+    X_lse = pt.log(pt.sum(pt.exp(X - X_max), axis=axis, keepdims=True)) + X_max
 
     rng = np.random.default_rng(23920)
     X_val = rng.normal(size=size)
@@ -133,7 +133,7 @@ def test_logsumexp_benchmark(size, axis, benchmark):
 
 def test_multiple_input_multiply():
     x, y, z = vectors("xyz")
-    out = at.mul(x, y, z)
+    out = pt.mul(x, y, z)
 
     fg = FunctionGraph(outputs=[out], clone=False)
     compare_jax_and_py(fg, [[1.5], [2.5], [3.5]])
diff --git a/tests/link/jax/test_extra_ops.py b/tests/link/jax/test_extra_ops.py
index 78abd671b8..c9920b31cc 100644
--- a/tests/link/jax/test_extra_ops.py
+++ b/tests/link/jax/test_extra_ops.py
@@ -2,11 +2,11 @@
 import pytest
 from packaging.version import parse as version_parse
 
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import get_test_value
-from pytensor.tensor import extra_ops as at_extra_ops
+from pytensor.tensor import extra_ops as pt_extra_ops
 from pytensor.tensor.type import matrix
 from tests.link.jax.test_basic import compare_jax_and_py
 
@@ -23,40 +23,40 @@ def test_extra_ops():
     a = matrix("a")
     a.tag.test_value = np.arange(6, dtype=config.floatX).reshape((3, 2))
 
-    out = at_extra_ops.cumsum(a, axis=0)
+    out = pt_extra_ops.cumsum(a, axis=0)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
-    out = at_extra_ops.cumprod(a, axis=1)
+    out = pt_extra_ops.cumprod(a, axis=1)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
-    out = at_extra_ops.diff(a, n=2, axis=1)
+    out = pt_extra_ops.diff(a, n=2, axis=1)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
-    out = at_extra_ops.repeat(a, (3, 3), axis=1)
+    out = pt_extra_ops.repeat(a, (3, 3), axis=1)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
-    c = at.as_tensor(5)
+    c = ptb.as_tensor(5)
 
-    out = at_extra_ops.fill_diagonal(a, c)
+    out = pt_extra_ops.fill_diagonal(a, c)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
     with pytest.raises(NotImplementedError):
-        out = at_extra_ops.fill_diagonal_offset(a, c, c)
+        out = pt_extra_ops.fill_diagonal_offset(a, c, c)
         fgraph = FunctionGraph([a], [out])
         compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
     with pytest.raises(NotImplementedError):
-        out = at_extra_ops.Unique(axis=1)(a)
+        out = pt_extra_ops.Unique(axis=1)(a)
         fgraph = FunctionGraph([a], [out])
         compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
     indices = np.arange(np.prod((3, 4)))
-    out = at_extra_ops.unravel_index(indices, (3, 4), order="C")
+    out = pt_extra_ops.unravel_index(indices, (3, 4), order="C")
     fgraph = FunctionGraph([], out)
     compare_jax_and_py(
         fgraph, [get_test_value(i) for i in fgraph.inputs], must_be_device_array=False
@@ -72,21 +72,21 @@ def test_extra_ops_omni():
     a.tag.test_value = np.arange(6, dtype=config.floatX).reshape((3, 2))
 
     # This function also cannot take symbolic input.
-    c = at.as_tensor(5)
-    out = at_extra_ops.bartlett(c)
+    c = ptb.as_tensor(5)
+    out = pt_extra_ops.bartlett(c)
     fgraph = FunctionGraph([], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
     multi_index = np.unravel_index(np.arange(np.prod((3, 4))), (3, 4))
-    out = at_extra_ops.ravel_multi_index(multi_index, (3, 4))
+    out = pt_extra_ops.ravel_multi_index(multi_index, (3, 4))
     fgraph = FunctionGraph([], [out])
     compare_jax_and_py(
         fgraph, [get_test_value(i) for i in fgraph.inputs], must_be_device_array=False
     )
 
     # The inputs are "concrete", yet it still has problems?
-    out = at_extra_ops.Unique()(
-        at.as_tensor(np.arange(6, dtype=config.floatX).reshape((3, 2)))
+    out = pt_extra_ops.Unique()(
+        ptb.as_tensor(np.arange(6, dtype=config.floatX).reshape((3, 2)))
     )
     fgraph = FunctionGraph([], [out])
     compare_jax_and_py(fgraph, [])
@@ -97,6 +97,6 @@ def test_unique_nonconcrete():
     a = matrix("a")
     a.tag.test_value = np.arange(6, dtype=config.floatX).reshape((3, 2))
 
-    out = at_extra_ops.Unique()(a)
+    out = pt_extra_ops.Unique()(a)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
diff --git a/tests/link/jax/test_nlinalg.py b/tests/link/jax/test_nlinalg.py
index 5aec8f88c2..18247abac1 100644
--- a/tests/link/jax/test_nlinalg.py
+++ b/tests/link/jax/test_nlinalg.py
@@ -9,10 +9,10 @@
 from pytensor.graph.op import get_test_value
 from pytensor.graph.rewriting.db import RewriteDatabaseQuery
 from pytensor.link.jax import JAXLinker
-from pytensor.tensor import blas as at_blas
-from pytensor.tensor import nlinalg as at_nlinalg
+from pytensor.tensor import blas as pt_blas
+from pytensor.tensor import nlinalg as pt_nlinalg
 from pytensor.tensor.math import MaxAndArgmax
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.math import maximum
 from pytensor.tensor.type import dvector, matrix, scalar, tensor3, vector
 from tests.link.jax.test_basic import compare_jax_and_py
@@ -31,7 +31,7 @@ def test_jax_BatchedDot():
     b.tag.test_value = (
         np.linspace(1, -1, 10 * 3 * 2).astype(config.floatX).reshape((10, 3, 2))
     )
-    out = at_blas.BatchedDot()(a, b)
+    out = pt_blas.BatchedDot()(a, b)
     fgraph = FunctionGraph([a, b], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
@@ -52,7 +52,7 @@ def test_jax_basic_multiout():
 
     x = matrix("x")
 
-    outs = at_nlinalg.eig(x)
+    outs = pt_nlinalg.eig(x)
     out_fg = FunctionGraph([x], outs)
 
     def assert_fn(x, y):
@@ -60,23 +60,23 @@ def assert_fn(x, y):
 
     compare_jax_and_py(out_fg, [X.astype(config.floatX)], assert_fn=assert_fn)
 
-    outs = at_nlinalg.eigh(x)
+    outs = pt_nlinalg.eigh(x)
     out_fg = FunctionGraph([x], outs)
     compare_jax_and_py(out_fg, [X.astype(config.floatX)], assert_fn=assert_fn)
 
-    outs = at_nlinalg.qr(x, mode="full")
+    outs = pt_nlinalg.qr(x, mode="full")
     out_fg = FunctionGraph([x], outs)
     compare_jax_and_py(out_fg, [X.astype(config.floatX)], assert_fn=assert_fn)
 
-    outs = at_nlinalg.qr(x, mode="reduced")
+    outs = pt_nlinalg.qr(x, mode="reduced")
     out_fg = FunctionGraph([x], outs)
     compare_jax_and_py(out_fg, [X.astype(config.floatX)], assert_fn=assert_fn)
 
-    outs = at_nlinalg.svd(x)
+    outs = pt_nlinalg.svd(x)
     out_fg = FunctionGraph([x], outs)
     compare_jax_and_py(out_fg, [X.astype(config.floatX)], assert_fn=assert_fn)
 
-    outs = at_nlinalg.slogdet(x)
+    outs = pt_nlinalg.slogdet(x)
     out_fg = FunctionGraph([x], outs)
     compare_jax_and_py(out_fg, [X.astype(config.floatX)], assert_fn=assert_fn)
 
@@ -122,14 +122,14 @@ def test_tensor_basics():
     fgraph = FunctionGraph([y, x], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
-    out = at_max(y)
+    out = pt_max(y)
     fgraph = FunctionGraph([y], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
 
 def test_pinv():
     x = matrix("x")
-    x_inv = at_nlinalg.pinv(x)
+    x_inv = pt_nlinalg.pinv(x)
 
     fgraph = FunctionGraph([x], [x_inv])
     x_np = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
@@ -141,7 +141,7 @@ def test_pinv_hermitian():
     A_h_test = np.c_[[3, 3 + 2j], [3 - 2j, 2]]
     A_not_h_test = A_h_test + 0 + 1j
 
-    A_inv = at_nlinalg.pinv(A, hermitian=False)
+    A_inv = pt_nlinalg.pinv(A, hermitian=False)
     jax_fn = function([A], A_inv, mode="JAX")
 
     assert np.allclose(jax_fn(A_h_test), np.linalg.pinv(A_h_test, hermitian=False))
@@ -153,7 +153,7 @@ def test_pinv_hermitian():
         jax_fn(A_not_h_test), np.linalg.pinv(A_not_h_test, hermitian=True)
     )
 
-    A_inv = at_nlinalg.pinv(A, hermitian=True)
+    A_inv = pt_nlinalg.pinv(A, hermitian=True)
     jax_fn = function([A], A_inv, mode="JAX")
 
     assert np.allclose(jax_fn(A_h_test), np.linalg.pinv(A_h_test, hermitian=False))
diff --git a/tests/link/jax/test_random.py b/tests/link/jax/test_random.py
index b43469c182..811e8122de 100644
--- a/tests/link/jax/test_random.py
+++ b/tests/link/jax/test_random.py
@@ -3,8 +3,8 @@
 import scipy.stats as stats
 
 import pytensor
-import pytensor.tensor as at
-import pytensor.tensor.random.basic as aer
+import pytensor.tensor as pt
+import pytensor.tensor.random.basic as ptr
 from pytensor.compile.function import function
 from pytensor.compile.sharedvalue import SharedVariable, shared
 from pytensor.graph.basic import Constant
@@ -46,7 +46,7 @@ def test_random_RandomStream():
 def test_random_updates(rng_ctor):
     original_value = rng_ctor(seed=98)
     rng = shared(original_value, name="original_rng", borrow=False)
-    next_rng, x = at.random.normal(name="x", rng=rng).owner.outputs
+    next_rng, x = pt.random.normal(name="x", rng=rng).owner.outputs
 
     f = random_function([], [x], updates={rng: next_rng}, mode=jax_mode)
     assert f() != f()
@@ -73,7 +73,7 @@ def test_random_updates_input_storage_order():
         np.zeros(batchshape, dtype="float64"), name="inp_shared"
     )
 
-    inp = at.tensor4(dtype="float64", name="inp")
+    inp = pt.tensor4(dtype="float64", name="inp")
     inp_update = inp + pt_rng.normal(size=inp.shape, loc=5, scale=1e-5)
 
     # This function replaces inp by input_shared in the update expression
@@ -96,14 +96,14 @@ def test_random_updates_input_storage_order():
     "rv_op, dist_params, base_size, cdf_name, params_conv",
     [
         (
-            aer.beta,
+            ptr.beta,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -112,14 +112,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.cauchy,
+            ptr.cauchy,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -128,10 +128,10 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.exponential,
+            ptr.exponential,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
             ],
@@ -140,14 +140,14 @@ def test_random_updates_input_storage_order():
             lambda *args: (0, args[0]),
         ),
         (
-            aer._gamma,
+            ptr._gamma,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([0.5, 3.0], dtype=np.float64),
                 ),
             ],
@@ -156,14 +156,14 @@ def test_random_updates_input_storage_order():
             lambda a, b: (a, 0.0, b),
         ),
         (
-            aer.gumbel,
+            ptr.gumbel,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([1, 2], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -172,24 +172,24 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.laplace,
+            ptr.laplace,
             [
-                set_test_value(at.dvector(), np.array([1.0, 2.0], dtype=np.float64)),
-                set_test_value(at.dscalar(), np.array(1.0, dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([1.0, 2.0], dtype=np.float64)),
+                set_test_value(pt.dscalar(), np.array(1.0, dtype=np.float64)),
             ],
             (2,),
             "laplace",
             lambda *args: args,
         ),
         (
-            aer.logistic,
+            ptr.logistic,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -198,14 +198,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.lognormal,
+            ptr.lognormal,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([0, 0], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -214,14 +214,14 @@ def test_random_updates_input_storage_order():
             lambda mu, sigma: (sigma, 0, np.exp(mu)),
         ),
         (
-            aer.normal,
+            ptr.normal,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([1, 2], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -230,14 +230,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.pareto,
+            ptr.pareto,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([2.0, 10.0], dtype=np.float64),
                 ),
             ],
@@ -246,10 +246,10 @@ def test_random_updates_input_storage_order():
             lambda shape, scale: (shape, 0.0, scale),
         ),
         (
-            aer.poisson,
+            ptr.poisson,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([100000.0, 200000.0], dtype=np.float64),
                 ),
             ],
@@ -258,14 +258,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.randint,
+            ptr.randint,
             [
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(0, dtype=np.int64),
                 ),
                 set_test_value(  # high-value necessary since test on cdf
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(1000, dtype=np.int64),
                 ),
             ],
@@ -274,14 +274,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.integers,
+            ptr.integers,
             [
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(0, dtype=np.int64),
                 ),
                 set_test_value(  # high-value necessary since test on cdf
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(1000, dtype=np.int64),
                 ),
             ],
@@ -290,25 +290,25 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.standard_normal,
+            ptr.standard_normal,
             [],
             (2,),
             "norm",
             lambda *args: args,
         ),
         (
-            aer.t,
+            ptr.t,
             [
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(2.0, dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -317,14 +317,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.uniform,
+            ptr.uniform,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1000.0, dtype=np.float64),
                 ),
             ],
@@ -333,14 +333,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.halfnormal,
+            ptr.halfnormal,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([-1.0, 200.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1000.0, dtype=np.float64),
                 ),
             ],
@@ -349,14 +349,14 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.invgamma,
+            ptr.invgamma,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([10.4, 2.8], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([3.4, 7.3], dtype=np.float64),
                 ),
             ],
@@ -365,10 +365,10 @@ def test_random_updates_input_storage_order():
             lambda a, b: (a, 0, b),
         ),
         (
-            aer.chisquare,
+            ptr.chisquare,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([2.4, 4.9], dtype=np.float64),
                 ),
             ],
@@ -377,18 +377,18 @@ def test_random_updates_input_storage_order():
             lambda *args: args,
         ),
         (
-            aer.gengamma,
+            ptr.gengamma,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([10.4, 2.8], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([3.4, 7.3], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([0.9, 2.0], dtype=np.float64),
                 ),
             ],
@@ -397,14 +397,14 @@ def test_random_updates_input_storage_order():
             lambda alpha, p, lambd: (alpha / p, p, 0, lambd),
         ),
         (
-            aer.wald,
+            ptr.wald,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([10.4, 2.8], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([4.5, 2.0], dtype=np.float64),
                 ),
             ],
@@ -414,14 +414,14 @@ def test_random_updates_input_storage_order():
             lambda mean, scale: (mean / scale, 0, scale),
         ),
         pytest.param(
-            aer.vonmises,
+            ptr.vonmises,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([-0.5, 1.3], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([5.5, 13.0], dtype=np.float64),
                 ),
             ],
@@ -447,7 +447,7 @@ def test_random_RandomVariable(rv_op, dist_params, base_size, cdf_name, params_c
         The parameters passed to the op.
 
     """
-    if rv_op is aer.integers:
+    if rv_op is ptr.integers:
         # Integers only accepts Generator, not RandomState
         rng = shared(np.random.default_rng(29402))
     else:
@@ -476,7 +476,7 @@ def test_random_RandomVariable(rv_op, dist_params, base_size, cdf_name, params_c
 @pytest.mark.parametrize("size", [(), (4,)])
 def test_random_bernoulli(size):
     rng = shared(np.random.RandomState(123))
-    g = at.random.bernoulli(0.5, size=(1000,) + size, rng=rng)
+    g = pt.random.bernoulli(0.5, size=(1000,) + size, rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), 0.5, 1)
@@ -487,7 +487,7 @@ def test_random_mvnormal():
 
     mu = np.ones(4)
     cov = np.eye(4)
-    g = at.random.multivariate_normal(mu, cov, size=(10000,), rng=rng)
+    g = pt.random.multivariate_normal(mu, cov, size=(10000,), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), mu, atol=0.1)
@@ -502,7 +502,7 @@ def test_random_mvnormal():
 )
 def test_random_dirichlet(parameter, size):
     rng = shared(np.random.RandomState(123))
-    g = at.random.dirichlet(parameter, size=(1000,) + size, rng=rng)
+    g = pt.random.dirichlet(parameter, size=(1000,) + size, rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), 0.5, 1)
@@ -512,21 +512,21 @@ def test_random_choice():
     # Elements are picked at equal frequency
     num_samples = 10000
     rng = shared(np.random.RandomState(123))
-    g = at.random.choice(np.arange(4), size=num_samples, rng=rng)
+    g = pt.random.choice(np.arange(4), size=num_samples, rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(np.sum(samples == 3) / num_samples, 0.25, 2)
 
     # `replace=False` produces unique results
     rng = shared(np.random.RandomState(123))
-    g = at.random.choice(np.arange(100), replace=False, size=99, rng=rng)
+    g = pt.random.choice(np.arange(100), replace=False, size=99, rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     assert len(np.unique(samples)) == 99
 
     # We can pass an array with probabilities
     rng = shared(np.random.RandomState(123))
-    g = at.random.choice(np.arange(3), p=np.array([1.0, 0.0, 0.0]), size=10, rng=rng)
+    g = pt.random.choice(np.arange(3), p=np.array([1.0, 0.0, 0.0]), size=10, rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples, np.zeros(10))
@@ -534,7 +534,7 @@ def test_random_choice():
 
 def test_random_categorical():
     rng = shared(np.random.RandomState(123))
-    g = at.random.categorical(0.25 * np.ones(4), size=(10000, 4), rng=rng)
+    g = pt.random.categorical(0.25 * np.ones(4), size=(10000, 4), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), 6 / 4, 1)
@@ -543,7 +543,7 @@ def test_random_categorical():
 def test_random_permutation():
     array = np.arange(4)
     rng = shared(np.random.RandomState(123))
-    g = at.random.permutation(array, rng=rng)
+    g = pt.random.permutation(array, rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     permuted = g_fn()
     with pytest.raises(AssertionError):
@@ -553,7 +553,7 @@ def test_random_permutation():
 def test_random_geometric():
     rng = shared(np.random.RandomState(123))
     p = np.array([0.3, 0.7])
-    g = at.random.geometric(p, size=(10_000, 2), rng=rng)
+    g = pt.random.geometric(p, size=(10_000, 2), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), 1 / p, rtol=0.1)
@@ -564,7 +564,7 @@ def test_negative_binomial():
     rng = shared(np.random.RandomState(123))
     n = np.array([10, 40])
     p = np.array([0.3, 0.7])
-    g = at.random.negative_binomial(n, p, size=(10_000, 2), rng=rng)
+    g = pt.random.negative_binomial(n, p, size=(10_000, 2), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), n * (1 - p) / p, rtol=0.1)
@@ -578,7 +578,7 @@ def test_binomial():
     rng = shared(np.random.RandomState(123))
     n = np.array([10, 40])
     p = np.array([0.3, 0.7])
-    g = at.random.binomial(n, p, size=(10_000, 2), rng=rng)
+    g = pt.random.binomial(n, p, size=(10_000, 2), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), n * p, rtol=0.1)
@@ -593,7 +593,7 @@ def test_beta_binomial():
     n = np.array([10, 40])
     a = np.array([1.5, 13])
     b = np.array([0.5, 9])
-    g = at.random.betabinom(n, a, b, size=(10_000, 2), rng=rng)
+    g = pt.random.betabinom(n, a, b, size=(10_000, 2), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), n * a / (a + b), rtol=0.1)
@@ -611,7 +611,7 @@ def test_multinomial():
     rng = shared(np.random.RandomState(123))
     n = np.array([10, 40])
     p = np.array([[0.3, 0.7, 0.0], [0.1, 0.4, 0.5]])
-    g = at.random.multinomial(n, p, size=(10_000, 2), rng=rng)
+    g = pt.random.multinomial(n, p, size=(10_000, 2), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(samples.mean(axis=0), n[..., None] * p, rtol=0.1)
@@ -627,7 +627,7 @@ def test_vonmises_mu_outside_circle():
     rng = shared(np.random.RandomState(123))
     mu = np.array([-30, 40])
     kappa = np.array([100, 10])
-    g = at.random.vonmises(mu, kappa, size=(10_000, 2), rng=rng)
+    g = pt.random.vonmises(mu, kappa, size=(10_000, 2), rng=rng)
     g_fn = random_function([], g, mode=jax_mode)
     samples = g_fn()
     np.testing.assert_allclose(
@@ -726,17 +726,17 @@ def test_random_concrete_shape():
 
     """
     rng = shared(np.random.RandomState(123))
-    x_at = at.dmatrix()
-    out = at.random.normal(0, 1, size=x_at.shape, rng=rng)
-    jax_fn = random_function([x_at], out, mode=jax_mode)
+    x_pt = pt.dmatrix()
+    out = pt.random.normal(0, 1, size=x_pt.shape, rng=rng)
+    jax_fn = random_function([x_pt], out, mode=jax_mode)
     assert jax_fn(np.ones((2, 3))).shape == (2, 3)
 
 
 def test_random_concrete_shape_from_param():
     rng = shared(np.random.RandomState(123))
-    x_at = at.dmatrix()
-    out = at.random.normal(x_at, 1, rng=rng)
-    jax_fn = random_function([x_at], out, mode=jax_mode)
+    x_pt = pt.dmatrix()
+    out = pt.random.normal(x_pt, 1, rng=rng)
+    jax_fn = random_function([x_pt], out, mode=jax_mode)
     assert jax_fn(np.ones((2, 3))).shape == (2, 3)
 
 
@@ -753,9 +753,9 @@ def test_random_concrete_shape_subtensor():
 
     """
     rng = shared(np.random.RandomState(123))
-    x_at = at.dmatrix()
-    out = at.random.normal(0, 1, size=x_at.shape[1], rng=rng)
-    jax_fn = random_function([x_at], out, mode=jax_mode)
+    x_pt = pt.dmatrix()
+    out = pt.random.normal(0, 1, size=x_pt.shape[1], rng=rng)
+    jax_fn = random_function([x_pt], out, mode=jax_mode)
     assert jax_fn(np.ones((2, 3))).shape == (3,)
 
 
@@ -769,18 +769,18 @@ def test_random_concrete_shape_subtensor_tuple():
 
     """
     rng = shared(np.random.RandomState(123))
-    x_at = at.dmatrix()
-    out = at.random.normal(0, 1, size=(x_at.shape[0],), rng=rng)
-    jax_fn = random_function([x_at], out, mode=jax_mode)
+    x_pt = pt.dmatrix()
+    out = pt.random.normal(0, 1, size=(x_pt.shape[0],), rng=rng)
+    jax_fn = random_function([x_pt], out, mode=jax_mode)
     assert jax_fn(np.ones((2, 3))).shape == (2,)
 
 
 @pytest.mark.xfail(
-    reason="`size_at` should be specified as a static argument", strict=True
+    reason="`size_pt` should be specified as a static argument", strict=True
 )
 def test_random_concrete_shape_graph_input():
     rng = shared(np.random.RandomState(123))
-    size_at = at.scalar()
-    out = at.random.normal(0, 1, size=size_at, rng=rng)
-    jax_fn = random_function([size_at], out, mode=jax_mode)
+    size_pt = pt.scalar()
+    out = pt.random.normal(0, 1, size=size_pt, rng=rng)
+    jax_fn = random_function([size_pt], out, mode=jax_mode)
     assert jax_fn(10).shape == (10,)
diff --git a/tests/link/jax/test_scalar.py b/tests/link/jax/test_scalar.py
index 18877a496c..68f5a0bd6c 100644
--- a/tests/link/jax/test_scalar.py
+++ b/tests/link/jax/test_scalar.py
@@ -1,15 +1,15 @@
 import numpy as np
 import pytest
 
-import pytensor.scalar.basic as aes
-import pytensor.tensor as at
+import pytensor.scalar.basic as ps
+import pytensor.tensor as pt
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import get_test_value
 from pytensor.scalar.basic import Composite
 from pytensor.tensor import as_tensor
 from pytensor.tensor.elemwise import Elemwise
-from pytensor.tensor.math import all as at_all
+from pytensor.tensor.math import all as pt_all
 from pytensor.tensor.math import (
     cosh,
     erf,
@@ -46,18 +46,18 @@ def test_second():
     a0 = scalar("a0")
     b = scalar("b")
 
-    out = aes.second(a0, b)
+    out = ps.second(a0, b)
     fgraph = FunctionGraph([a0, b], [out])
     compare_jax_and_py(fgraph, [10.0, 5.0])
 
     a1 = vector("a1")
-    out = at.second(a1, b)
+    out = pt.second(a1, b)
     fgraph = FunctionGraph([a1, b], [out])
     compare_jax_and_py(fgraph, [np.zeros([5], dtype=config.floatX), 5.0])
 
     a2 = matrix("a2", shape=(1, None), dtype="float64")
     b2 = matrix("b2", shape=(None, 1), dtype="int32")
-    out = at.second(a2, b2)
+    out = pt.second(a2, b2)
     fgraph = FunctionGraph([a2, b2], [out])
     compare_jax_and_py(
         fgraph, [np.zeros((1, 3), dtype="float64"), np.ones((5, 1), dtype="int32")]
@@ -66,7 +66,7 @@ def test_second():
 
 def test_second_constant_scalar():
     b = scalar("b", dtype="int")
-    out = at.second(0.0, b)
+    out = pt.second(0.0, b)
     fgraph = FunctionGraph([b], [out])
     # Test dispatch directly as useless second is removed during compilation
     fn = jax_funcify(fgraph)
@@ -79,7 +79,7 @@ def test_identity():
     a = scalar("a")
     a.tag.test_value = 10
 
-    out = aes.identity(a)
+    out = ps.identity(a)
     fgraph = FunctionGraph([a], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
 
@@ -98,10 +98,10 @@ def test_identity():
     ],
 )
 def test_jax_Composite_singe_output(x, y, x_val, y_val):
-    x_s = aes.float64("x")
-    y_s = aes.float64("y")
+    x_s = ps.float64("x")
+    y_s = ps.float64("y")
 
-    comp_op = Elemwise(Composite([x_s, y_s], [x_s + y_s * 2 + aes.exp(x_s - y_s)]))
+    comp_op = Elemwise(Composite([x_s, y_s], [x_s + y_s * 2 + ps.exp(x_s - y_s)]))
 
     out = comp_op(x, y)
 
@@ -117,7 +117,7 @@ def test_jax_Composite_singe_output(x, y, x_val, y_val):
 def test_jax_Composite_multi_output():
     x = vector("x")
 
-    x_s = aes.float64("xs")
+    x_s = ps.float64("xs")
     outs = Elemwise(Composite(inputs=[x_s], outputs=[x_s + 1, x_s - 1]))(x)
 
     fgraph = FunctionGraph([x], outs)
@@ -234,36 +234,36 @@ def test_jax_variadic_Scalar():
 
 
 def test_add_scalars():
-    x = at.matrix("x")
+    x = pt.matrix("x")
     size = x.shape[0] + x.shape[0] + x.shape[1]
-    out = at.ones(size).astype(config.floatX)
+    out = pt.ones(size).astype(config.floatX)
 
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(out_fg, [np.ones((2, 3)).astype(config.floatX)])
 
 
 def test_mul_scalars():
-    x = at.matrix("x")
+    x = pt.matrix("x")
     size = x.shape[0] * x.shape[0] * x.shape[1]
-    out = at.ones(size).astype(config.floatX)
+    out = pt.ones(size).astype(config.floatX)
 
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(out_fg, [np.ones((2, 3)).astype(config.floatX)])
 
 
 def test_div_scalars():
-    x = at.matrix("x")
+    x = pt.matrix("x")
     size = x.shape[0] // x.shape[1]
-    out = at.ones(size).astype(config.floatX)
+    out = pt.ones(size).astype(config.floatX)
 
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(out_fg, [np.ones((12, 3)).astype(config.floatX)])
 
 
 def test_mod_scalars():
-    x = at.matrix("x")
+    x = pt.matrix("x")
     size = x.shape[0] % x.shape[1]
-    out = at.ones(size).astype(config.floatX)
+    out = pt.ones(size).astype(config.floatX)
 
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(out_fg, [np.ones((12, 3)).astype(config.floatX)])
@@ -295,8 +295,8 @@ def test_jax_logp():
 
     logp = (-tau * (value - mu) ** 2 + log(tau / np.pi / 2.0)) / 2.0
     conditions = [sigma > 0]
-    alltrue = at_all([at_all(1 * val) for val in conditions])
-    normal_logp = at.switch(alltrue, logp, -np.inf)
+    alltrue = pt_all([pt_all(1 * val) for val in conditions])
+    normal_logp = pt.switch(alltrue, logp, -np.inf)
 
     fgraph = FunctionGraph([mu, tau, sigma, value], [normal_logp])
 
diff --git a/tests/link/jax/test_scan.py b/tests/link/jax/test_scan.py
index e2f9b9425b..72725dac3d 100644
--- a/tests/link/jax/test_scan.py
+++ b/tests/link/jax/test_scan.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function, shared
 from pytensor.compile import get_mode
 from pytensor.configdefaults import config
@@ -22,7 +22,7 @@
 
 @pytest.mark.parametrize("view", [None, (-1,), slice(-2, None, None)])
 def test_scan_sit_sot(view):
-    x0 = at.scalar("x0", dtype="float64")
+    x0 = pt.scalar("x0", dtype="float64")
     xs, _ = scan(
         lambda xtm1: xtm1 + 1,
         outputs_info=[x0],
@@ -37,7 +37,7 @@ def test_scan_sit_sot(view):
 
 @pytest.mark.parametrize("view", [None, (-1,), slice(-4, -1, None)])
 def test_scan_mit_sot(view):
-    x0 = at.vector("x0", dtype="float64", shape=(3,))
+    x0 = pt.vector("x0", dtype="float64", shape=(3,))
     xs, _ = scan(
         lambda xtm3, xtm1: xtm3 + xtm1 + 1,
         outputs_info=[{"initial": x0, "taps": [-3, -1]}],
@@ -53,8 +53,8 @@ def test_scan_mit_sot(view):
 @pytest.mark.parametrize("view_x", [None, (-1,), slice(-4, -1, None)])
 @pytest.mark.parametrize("view_y", [None, (-1,), slice(-4, -1, None)])
 def test_scan_multiple_mit_sot(view_x, view_y):
-    x0 = at.vector("x0", dtype="float64", shape=(3,))
-    y0 = at.vector("y0", dtype="float64", shape=(4,))
+    x0 = pt.vector("x0", dtype="float64", shape=(3,))
+    y0 = pt.vector("y0", dtype="float64", shape=(4,))
 
     def step(xtm3, xtm1, ytm4, ytm2):
         return xtm3 + ytm4 + 1, xtm1 + ytm2 + 2
@@ -81,10 +81,10 @@ def step(xtm3, xtm1, ytm4, ytm2):
 def test_scan_nit_sot(view):
     rng = np.random.default_rng(seed=49)
 
-    xs = at.vector("x0", dtype="float64", shape=(10,))
+    xs = pt.vector("x0", dtype="float64", shape=(10,))
 
     ys, _ = scan(
-        lambda x: at.exp(x),
+        lambda x: pt.exp(x),
         outputs_info=[None],
         sequences=[xs],
     )
@@ -105,13 +105,13 @@ def test_scan_nit_sot(view):
 
 @pytest.mark.xfail(raises=NotImplementedError)
 def test_scan_mit_mot():
-    xs = at.vector("xs", shape=(10,))
+    xs = pt.vector("xs", shape=(10,))
     ys, _ = scan(
         lambda xtm2, xtm1: (xtm2 + xtm1),
         outputs_info=[{"initial": xs, "taps": [-2, -1]}],
         n_steps=10,
     )
-    grads_wrt_xs = at.grad(ys.sum(), wrt=xs)
+    grads_wrt_xs = pt.grad(ys.sum(), wrt=xs)
     fg = FunctionGraph([xs], [grads_wrt_xs])
     compare_jax_and_py(fg, [np.arange(10)])
 
@@ -188,7 +188,7 @@ def update_fn(rng):
 def test_scan_while():
     xs, _ = scan(
         lambda x: (x + 1, until(x < 10)),
-        outputs_info=[at.zeros(())],
+        outputs_info=[pt.zeros(())],
         n_steps=100,
     )
 
@@ -287,7 +287,7 @@ def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
 
 
 def test_scan_mitsot_with_nonseq():
-    a_at = scalar("a")
+    a_pt = scalar("a")
 
     def input_step_fn(y_tm1, y_tm3, a):
         y_tm1.name = "y_tm1"
@@ -296,24 +296,24 @@ def input_step_fn(y_tm1, y_tm3, a):
         res.name = "y_t"
         return res
 
-    y_scan_at, _ = scan(
+    y_scan_pt, _ = scan(
         fn=input_step_fn,
         outputs_info=[
             {
-                "initial": at.as_tensor_variable(
+                "initial": pt.as_tensor_variable(
                     np.r_[-1.0, 1.3, 0.0].astype(config.floatX)
                 ),
                 "taps": [-1, -3],
             },
         ],
-        non_sequences=[a_at],
+        non_sequences=[a_pt],
         n_steps=10,
         name="y_scan",
     )
-    y_scan_at.name = "y"
-    y_scan_at.owner.inputs[0].name = "y_all"
+    y_scan_pt.name = "y"
+    y_scan_pt.owner.inputs[0].name = "y_all"
 
-    out_fg = FunctionGraph([a_at], [y_scan_at])
+    out_fg = FunctionGraph([a_pt], [y_scan_pt])
 
     test_input_vals = [np.array(10.0).astype(config.floatX)]
     compare_jax_and_py(out_fg, test_input_vals)
@@ -353,8 +353,8 @@ def test_nd_scan_sit_sot_with_seq():
     n_steps = 3
     k = 3
 
-    x = at.matrix("x0", shape=(n_steps, k))
-    A = at.matrix("A", shape=(k, k))
+    x = pt.matrix("x0", shape=(n_steps, k))
+    A = pt.matrix("A", shape=(k, k))
 
     # Must specify mode = JAX for the inner func to avoid a GEMM Op in the JAX graph
     xs, _ = scan(
@@ -374,9 +374,9 @@ def test_nd_scan_sit_sot_with_seq():
 
 
 def test_nd_scan_mit_sot():
-    x0 = at.matrix("x0", shape=(3, 3))
-    A = at.matrix("A", shape=(3, 3))
-    B = at.matrix("B", shape=(3, 3))
+    x0 = pt.matrix("x0", shape=(3, 3))
+    A = pt.matrix("A", shape=(3, 3))
+    B = pt.matrix("B", shape=(3, 3))
 
     # Must specify mode = JAX for the inner func to avoid a GEMM Op in the JAX graph
     xs, _ = scan(
@@ -397,8 +397,8 @@ def test_nd_scan_mit_sot():
 
 
 def test_nd_scan_sit_sot_with_carry():
-    x0 = at.vector("x0", shape=(3,))
-    A = at.matrix("A", shape=(3, 3))
+    x0 = pt.vector("x0", shape=(3,))
+    A = pt.matrix("A", shape=(3, 3))
 
     def step(x, A):
         return A @ x, x.sum()
diff --git a/tests/link/jax/test_shape.py b/tests/link/jax/test_shape.py
index 4775a5027e..6eec401578 100644
--- a/tests/link/jax/test_shape.py
+++ b/tests/link/jax/test_shape.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.ops import DeepCopyOp, ViewOp
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
@@ -12,28 +12,28 @@
 
 def test_jax_shape_ops():
     x_np = np.zeros((20, 3))
-    x = Shape()(at.as_tensor_variable(x_np))
+    x = Shape()(pt.as_tensor_variable(x_np))
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [], must_be_device_array=False)
 
-    x = Shape_i(1)(at.as_tensor_variable(x_np))
+    x = Shape_i(1)(pt.as_tensor_variable(x_np))
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [], must_be_device_array=False)
 
 
 def test_jax_specify_shape():
-    in_at = at.matrix("in")
-    x = at.specify_shape(in_at, (4, None))
-    x_fg = FunctionGraph([in_at], [x])
+    in_pt = pt.matrix("in")
+    x = pt.specify_shape(in_pt, (4, None))
+    x_fg = FunctionGraph([in_pt], [x])
     compare_jax_and_py(x_fg, [np.ones((4, 5)).astype(config.floatX)])
 
     # When used to assert two arrays have similar shapes
-    in_at = at.matrix("in")
-    shape_at = at.matrix("shape")
-    x = at.specify_shape(in_at, shape_at.shape)
-    x_fg = FunctionGraph([in_at, shape_at], [x])
+    in_pt = pt.matrix("in")
+    shape_pt = pt.matrix("shape")
+    x = pt.specify_shape(in_pt, shape_pt.shape)
+    x_fg = FunctionGraph([in_pt, shape_pt], [x])
     compare_jax_and_py(
         x_fg,
         [np.ones((4, 5)).astype(config.floatX), np.ones((4, 5)).astype(config.floatX)],
@@ -60,29 +60,29 @@ def test_jax_Reshape_concrete_shape():
 
 
 @pytest.mark.xfail(
-    reason="`shape_at` should be specified as a static argument", strict=True
+    reason="`shape_pt` should be specified as a static argument", strict=True
 )
 def test_jax_Reshape_shape_graph_input():
     a = vector("a")
-    shape_at = iscalar("b")
-    x = reshape(a, (shape_at, shape_at))
-    x_fg = FunctionGraph([a, shape_at], [x])
+    shape_pt = iscalar("b")
+    x = reshape(a, (shape_pt, shape_pt))
+    x_fg = FunctionGraph([a, shape_pt], [x])
     compare_jax_and_py(x_fg, [np.r_[1.0, 2.0, 3.0, 4.0].astype(config.floatX), 2])
 
 
 def test_jax_compile_ops():
-    x = DeepCopyOp()(at.as_tensor_variable(1.1))
+    x = DeepCopyOp()(pt.as_tensor_variable(1.1))
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [])
 
     x_np = np.zeros((20, 1, 1))
-    x = Unbroadcast(0, 2)(at.as_tensor_variable(x_np))
+    x = Unbroadcast(0, 2)(pt.as_tensor_variable(x_np))
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [])
 
-    x = ViewOp()(at.as_tensor_variable(x_np))
+    x = ViewOp()(pt.as_tensor_variable(x_np))
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [])
diff --git a/tests/link/jax/test_slinalg.py b/tests/link/jax/test_slinalg.py
index dbe755c592..4ae9531f9b 100644
--- a/tests/link/jax/test_slinalg.py
+++ b/tests/link/jax/test_slinalg.py
@@ -1,12 +1,12 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
-from pytensor.tensor import nlinalg as at_nlinalg
-from pytensor.tensor import slinalg as at_slinalg
-from pytensor.tensor import subtensor as at_subtensor
+from pytensor.tensor import nlinalg as pt_nlinalg
+from pytensor.tensor import slinalg as pt_slinalg
+from pytensor.tensor import subtensor as pt_subtensor
 from pytensor.tensor.math import clip, cosh
 from pytensor.tensor.type import matrix, vector
 from tests.link.jax.test_basic import compare_jax_and_py
@@ -23,8 +23,8 @@ def test_jax_basic():
     z = cosh(x**2 + y / 3.0)
 
     # `[Inc]Subtensor`
-    out = at_subtensor.set_subtensor(z[0], -10.0)
-    out = at_subtensor.inc_subtensor(out[0, 1], 2.0)
+    out = pt_subtensor.set_subtensor(z[0], -10.0)
+    out = pt_subtensor.inc_subtensor(out[0, 1], 2.0)
     out = out[:5, :3]
 
     out_fg = FunctionGraph([x, y], [out])
@@ -46,13 +46,13 @@ def test_jax_basic():
     out_fg = FunctionGraph([x, y], [out])
     compare_jax_and_py(out_fg, test_input_vals)
 
-    out = at.diagonal(x, 0)
+    out = pt.diagonal(x, 0)
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(
         out_fg, [np.arange(10 * 10).reshape((10, 10)).astype(config.floatX)]
     )
 
-    out = at_slinalg.cholesky(x)
+    out = pt_slinalg.cholesky(x)
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(
         out_fg,
@@ -64,7 +64,7 @@ def test_jax_basic():
     )
 
     # not sure why this isn't working yet with lower=False
-    out = at_slinalg.Cholesky(lower=False)(x)
+    out = pt_slinalg.Cholesky(lower=False)(x)
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(
         out_fg,
@@ -75,7 +75,7 @@ def test_jax_basic():
         ],
     )
 
-    out = at_slinalg.solve(x, b)
+    out = pt_slinalg.solve(x, b)
     out_fg = FunctionGraph([x, b], [out])
     compare_jax_and_py(
         out_fg,
@@ -85,17 +85,17 @@ def test_jax_basic():
         ],
     )
 
-    out = at.diag(b)
+    out = pt.diag(b)
     out_fg = FunctionGraph([b], [out])
     compare_jax_and_py(out_fg, [np.arange(10).astype(config.floatX)])
 
-    out = at_nlinalg.det(x)
+    out = pt_nlinalg.det(x)
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(
         out_fg, [np.arange(10 * 10).reshape((10, 10)).astype(config.floatX)]
     )
 
-    out = at_nlinalg.matrix_inverse(x)
+    out = pt_nlinalg.matrix_inverse(x)
     out_fg = FunctionGraph([x], [out])
     compare_jax_and_py(
         out_fg,
@@ -114,7 +114,7 @@ def test_jax_SolveTriangular(trans, lower, check_finite):
     x = matrix("x")
     b = vector("b")
 
-    out = at_slinalg.solve_triangular(
+    out = pt_slinalg.solve_triangular(
         x,
         b,
         trans=trans,
diff --git a/tests/link/jax/test_subtensor.py b/tests/link/jax/test_subtensor.py
index 578af4cfc5..d8679bc430 100644
--- a/tests/link/jax/test_subtensor.py
+++ b/tests/link/jax/test_subtensor.py
@@ -1,10 +1,10 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
-from pytensor.tensor import subtensor as at_subtensor
+from pytensor.tensor import subtensor as pt_subtensor
 from pytensor.tensor.rewriting.jax import (
     boolean_indexing_set_or_inc,
     boolean_indexing_sum,
@@ -14,76 +14,76 @@
 
 def test_jax_Subtensor_constant():
     # Basic indices
-    x_at = at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
-    out_at = x_at[1, 2, 0]
-    assert isinstance(out_at.owner.op, at_subtensor.Subtensor)
-    out_fg = FunctionGraph([], [out_at])
+    x_pt = pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5)))
+    out_pt = x_pt[1, 2, 0]
+    assert isinstance(out_pt.owner.op, pt_subtensor.Subtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = x_at[1:, 1, :]
-    assert isinstance(out_at.owner.op, at_subtensor.Subtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[1:, 1, :]
+    assert isinstance(out_pt.owner.op, pt_subtensor.Subtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = x_at[:2, 1, :]
-    assert isinstance(out_at.owner.op, at_subtensor.Subtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[:2, 1, :]
+    assert isinstance(out_pt.owner.op, pt_subtensor.Subtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = x_at[1:2, 1, :]
-    assert isinstance(out_at.owner.op, at_subtensor.Subtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[1:2, 1, :]
+    assert isinstance(out_pt.owner.op, pt_subtensor.Subtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # Advanced indexing
-    out_at = at_subtensor.advanced_subtensor1(x_at, [1, 2])
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor1)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.advanced_subtensor1(x_pt, [1, 2])
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor1)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = x_at[[1, 2], [2, 3]]
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[[1, 2], [2, 3]]
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # Advanced and basic indexing
-    out_at = x_at[[1, 2], :]
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[[1, 2], :]
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = x_at[[1, 2], :, [3, 4]]
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[[1, 2], :, [3, 4]]
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # Flipping
-    out_at = x_at[::-1]
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x_pt[::-1]
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
 
 @pytest.mark.xfail(reason="`a` should be specified as static when JIT-compiling")
 def test_jax_Subtensor_dynamic():
-    a = at.iscalar("a")
-    x = at.arange(3)
-    out_at = x[:a]
-    assert isinstance(out_at.owner.op, at_subtensor.Subtensor)
-    out_fg = FunctionGraph([a], [out_at])
+    a = pt.iscalar("a")
+    x = pt.arange(3)
+    out_pt = x[:a]
+    assert isinstance(out_pt.owner.op, pt_subtensor.Subtensor)
+    out_fg = FunctionGraph([a], [out_pt])
     compare_jax_and_py(out_fg, [1])
 
 
 def test_jax_Subtensor_boolean_mask():
     """JAX does not support resizing arrays with boolean masks."""
-    x_at = at.vector("x", dtype="float64")
-    out_at = x_at[x_at < 0]
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor)
+    x_pt = pt.vector("x", dtype="float64")
+    out_pt = x_pt[x_pt < 0]
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor)
 
-    out_fg = FunctionGraph([x_at], [out_at])
+    out_fg = FunctionGraph([x_pt], [out_pt])
 
-    x_at_test = np.arange(-5, 5)
+    x_pt_test = np.arange(-5, 5)
     with pytest.raises(NotImplementedError, match="resizing arrays with boolean"):
-        compare_jax_and_py(out_fg, [x_at_test])
+        compare_jax_and_py(out_fg, [x_pt_test])
 
 
 def test_jax_Subtensor_boolean_mask_reexpressible():
@@ -97,15 +97,15 @@ def test_jax_Subtensor_boolean_mask_reexpressible():
     improvement over its user interface.
 
     """
-    x_at = at.matrix("x")
-    out_at = x_at[x_at < 0].sum()
-    out_fg = FunctionGraph([x_at], [out_at])
+    x_pt = pt.matrix("x")
+    out_pt = x_pt[x_pt < 0].sum()
+    out_fg = FunctionGraph([x_pt], [out_pt])
     compare_jax_and_py(out_fg, [np.arange(25).reshape(5, 5).astype(config.floatX)])
 
 
 def test_boolean_indexing_sum_not_applicable():
     """Test that boolean_indexing_sum does not return an invalid replacement in cases where it doesn't apply."""
-    x = at.matrix("x")
+    x = pt.matrix("x")
     out = x[x[:, 0] < 0, :].sum(axis=-1)
     fg = FunctionGraph([x], [out])
     assert boolean_indexing_sum.transform(fg, fg.outputs[0].owner) is None
@@ -119,98 +119,98 @@ def test_jax_IncSubtensor():
     rng = np.random.default_rng(213234)
 
     x_np = rng.uniform(-1, 1, size=(3, 4, 5)).astype(config.floatX)
-    x_at = at.constant(np.arange(3 * 4 * 5).reshape((3, 4, 5)).astype(config.floatX))
+    x_pt = pt.constant(np.arange(3 * 4 * 5).reshape((3, 4, 5)).astype(config.floatX))
 
     # "Set" basic indices
-    st_at = at.as_tensor_variable(np.array(-10.0, dtype=config.floatX))
-    out_at = at_subtensor.set_subtensor(x_at[1, 2, 3], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(np.array(-10.0, dtype=config.floatX))
+    out_pt = pt_subtensor.set_subtensor(x_pt[1, 2, 3], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
-    out_at = at_subtensor.set_subtensor(x_at[:2, 0, 0], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
+    out_pt = pt_subtensor.set_subtensor(x_pt[:2, 0, 0], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = at_subtensor.set_subtensor(x_at[0, 1:3, 0], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.set_subtensor(x_pt[0, 1:3, 0], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # "Set" advanced indices
-    st_at = at.as_tensor_variable(
+    st_pt = pt.as_tensor_variable(
         rng.uniform(-1, 1, size=(2, 4, 5)).astype(config.floatX)
     )
-    out_at = at_subtensor.set_subtensor(x_at[np.r_[0, 2]], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.set_subtensor(x_pt[np.r_[0, 2]], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
-    out_at = at_subtensor.set_subtensor(x_at[[0, 2], 0, 0], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
+    out_pt = pt_subtensor.set_subtensor(x_pt[[0, 2], 0, 0], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # "Set" boolean indices
-    mask_at = at.constant(x_np > 0)
-    out_at = at_subtensor.set_subtensor(x_at[mask_at], 0.0)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    mask_pt = pt.constant(x_np > 0)
+    out_pt = pt_subtensor.set_subtensor(x_pt[mask_pt], 0.0)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # "Increment" basic indices
-    st_at = at.as_tensor_variable(np.array(-10.0, dtype=config.floatX))
-    out_at = at_subtensor.inc_subtensor(x_at[1, 2, 3], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(np.array(-10.0, dtype=config.floatX))
+    out_pt = pt_subtensor.inc_subtensor(x_pt[1, 2, 3], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
-    out_at = at_subtensor.inc_subtensor(x_at[:2, 0, 0], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
+    out_pt = pt_subtensor.inc_subtensor(x_pt[:2, 0, 0], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    out_at = at_subtensor.set_subtensor(x_at[0, 1:3, 0], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.set_subtensor(x_pt[0, 1:3, 0], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # "Increment" advanced indices
-    st_at = at.as_tensor_variable(
+    st_pt = pt.as_tensor_variable(
         rng.uniform(-1, 1, size=(2, 4, 5)).astype(config.floatX)
     )
-    out_at = at_subtensor.inc_subtensor(x_at[np.r_[0, 2]], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.inc_subtensor(x_pt[np.r_[0, 2]], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    st_at = at.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
-    out_at = at_subtensor.inc_subtensor(x_at[[0, 2], 0, 0], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(np.r_[-1.0, 0.0].astype(config.floatX))
+    out_pt = pt_subtensor.inc_subtensor(x_pt[[0, 2], 0, 0], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
     # "Increment" boolean indices
-    mask_at = at.constant(x_np > 0)
-    out_at = at_subtensor.set_subtensor(x_at[mask_at], 1.0)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    mask_pt = pt.constant(x_np > 0)
+    out_pt = pt_subtensor.set_subtensor(x_pt[mask_pt], 1.0)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    st_at = at.as_tensor_variable(x_np[[0, 2], 0, :3])
-    out_at = at_subtensor.set_subtensor(x_at[[0, 2], 0, :3], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(x_np[[0, 2], 0, :3])
+    out_pt = pt_subtensor.set_subtensor(x_pt[[0, 2], 0, :3], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
-    st_at = at.as_tensor_variable(x_np[[0, 2], 0, :3])
-    out_at = at_subtensor.inc_subtensor(x_at[[0, 2], 0, :3], st_at)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    st_pt = pt.as_tensor_variable(x_np[[0, 2], 0, :3])
+    out_pt = pt_subtensor.inc_subtensor(x_pt[[0, 2], 0, :3], st_pt)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_jax_and_py(out_fg, [])
 
 
@@ -228,24 +228,24 @@ def test_jax_IncSubtensor_boolean_indexing_reexpressible():
     rng = np.random.default_rng(213234)
     x_np = rng.uniform(-1, 1, size=(4, 5)).astype(config.floatX)
 
-    x_at = at.matrix("x")
-    mask_at = at.as_tensor(x_at) > 0
-    out_at = at_subtensor.set_subtensor(x_at[mask_at], 0.0)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([x_at], [out_at])
+    x_pt = pt.matrix("x")
+    mask_pt = pt.as_tensor(x_pt) > 0
+    out_pt = pt_subtensor.set_subtensor(x_pt[mask_pt], 0.0)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([x_pt], [out_pt])
     compare_jax_and_py(out_fg, [x_np])
 
-    mask_at = at.as_tensor(x_at) > 0
-    out_at = at_subtensor.inc_subtensor(x_at[mask_at], 1.0)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([x_at], [out_at])
+    mask_pt = pt.as_tensor(x_pt) > 0
+    out_pt = pt_subtensor.inc_subtensor(x_pt[mask_pt], 1.0)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([x_pt], [out_pt])
     compare_jax_and_py(out_fg, [x_np])
 
 
 def test_boolean_indexing_set_or_inc_not_applicable():
     """Test that `boolean_indexing_set_or_inc` does not return an invalid replacement in cases where it doesn't apply."""
-    x = at.vector("x")
-    mask = at.as_tensor(x) > 0
-    out = at_subtensor.set_subtensor(x[mask], [0, 1, 2])
+    x = pt.vector("x")
+    mask = pt.as_tensor(x) > 0
+    out = pt_subtensor.set_subtensor(x[mask], [0, 1, 2])
     fg = FunctionGraph([x], [out])
     assert boolean_indexing_set_or_inc.transform(fg, fg.outputs[0].owner) is None
diff --git a/tests/link/jax/test_tensor_basic.py b/tests/link/jax/test_tensor_basic.py
index a9a4ccc261..94755ddf2c 100644
--- a/tests/link/jax/test_tensor_basic.py
+++ b/tests/link/jax/test_tensor_basic.py
@@ -8,7 +8,7 @@
 import jax.errors
 
 import pytensor
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import get_test_value
@@ -18,19 +18,19 @@
 
 
 def test_jax_Alloc():
-    x = at.alloc(0.0, 2, 3)
+    x = ptb.alloc(0.0, 2, 3)
     x_fg = FunctionGraph([], [x])
 
     _, [jax_res] = compare_jax_and_py(x_fg, [])
 
     assert jax_res.shape == (2, 3)
 
-    x = at.alloc(1.1, 2, 3)
+    x = ptb.alloc(1.1, 2, 3)
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [])
 
-    x = at.AllocEmpty("float32")(2, 3)
+    x = ptb.AllocEmpty("float32")(2, 3)
     x_fg = FunctionGraph([], [x])
 
     def compare_shape_dtype(x, y):
@@ -41,13 +41,13 @@ def compare_shape_dtype(x, y):
     compare_jax_and_py(x_fg, [], assert_fn=compare_shape_dtype)
 
     a = scalar("a")
-    x = at.alloc(a, 20)
+    x = ptb.alloc(a, 20)
     x_fg = FunctionGraph([a], [x])
 
     compare_jax_and_py(x_fg, [10.0])
 
     a = vector("a")
-    x = at.alloc(a, 20, 10)
+    x = ptb.alloc(a, 20, 10)
     x_fg = FunctionGraph([a], [x])
 
     compare_jax_and_py(x_fg, [np.ones(10, dtype=config.floatX)])
@@ -58,21 +58,21 @@ def test_alloc_runtime_broadcast():
 
 
 def test_jax_MakeVector():
-    x = at.make_vector(1, 2, 3)
+    x = ptb.make_vector(1, 2, 3)
     x_fg = FunctionGraph([], [x])
 
     compare_jax_and_py(x_fg, [])
 
 
 def test_arange():
-    out = at.arange(1, 10, 2)
+    out = ptb.arange(1, 10, 2)
     fgraph = FunctionGraph([], [out])
     compare_jax_and_py(fgraph, [])
 
 
 def test_arange_of_shape():
     x = vector("x")
-    out = at.arange(1, x.shape[-1], 2)
+    out = ptb.arange(1, x.shape[-1], 2)
     fgraph = FunctionGraph([x], [out])
     compare_jax_and_py(fgraph, [np.zeros((5,))])
 
@@ -82,7 +82,7 @@ def test_arange_nonconcrete():
 
     a = scalar("a")
     a.tag.test_value = 10
-    out = at.arange(a)
+    out = ptb.arange(a)
 
     with pytest.raises(NotImplementedError):
         fgraph = FunctionGraph([a], [out])
@@ -93,7 +93,7 @@ def test_jax_Join():
     a = matrix("a")
     b = matrix("b")
 
-    x = at.join(0, a, b)
+    x = ptb.join(0, a, b)
     x_fg = FunctionGraph([a, b], [x])
     compare_jax_and_py(
         x_fg,
@@ -110,7 +110,7 @@ def test_jax_Join():
         ],
     )
 
-    x = at.join(1, a, b)
+    x = ptb.join(1, a, b)
     x_fg = FunctionGraph([a, b], [x])
     compare_jax_and_py(
         x_fg,
@@ -131,7 +131,7 @@ def test_jax_Join():
 class TestJaxSplit:
     def test_basic(self):
         a = matrix("a")
-        a_splits = at.split(a, splits_size=[1, 2, 3], n_splits=3, axis=0)
+        a_splits = ptb.split(a, splits_size=[1, 2, 3], n_splits=3, axis=0)
         fg = FunctionGraph([a], a_splits)
         compare_jax_and_py(
             fg,
@@ -141,7 +141,7 @@ def test_basic(self):
         )
 
         a = matrix("a", shape=(6, None))
-        a_splits = at.split(a, splits_size=[2, a.shape[0] - 2], n_splits=2, axis=0)
+        a_splits = ptb.split(a, splits_size=[2, a.shape[0] - 2], n_splits=2, axis=0)
         fg = FunctionGraph([a], a_splits)
         compare_jax_and_py(
             fg,
@@ -153,28 +153,28 @@ def test_basic(self):
     def test_runtime_errors(self):
         a = matrix("a")
 
-        a_splits = at.split(a, splits_size=[2, 2, 2], n_splits=2, axis=0)
+        a_splits = ptb.split(a, splits_size=[2, 2, 2], n_splits=2, axis=0)
         fn = pytensor.function([a], a_splits, mode="JAX")
         with pytest.raises(
             ValueError, match="Length of splits is not equal to n_splits"
         ):
             fn(np.zeros((6, 4), dtype=pytensor.config.floatX))
 
-        a_splits = at.split(a, splits_size=[2, 4], n_splits=3, axis=0)
+        a_splits = ptb.split(a, splits_size=[2, 4], n_splits=3, axis=0)
         fn = pytensor.function([a], a_splits, mode="JAX")
         with pytest.raises(
             ValueError, match="Length of splits is not equal to n_splits"
         ):
             fn(np.zeros((6, 4), dtype=pytensor.config.floatX))
 
-        a_splits = at.split(a, splits_size=[2, 4], n_splits=2, axis=0)
+        a_splits = ptb.split(a, splits_size=[2, 4], n_splits=2, axis=0)
         fn = pytensor.function([a], a_splits, mode="JAX")
         with pytest.raises(
             ValueError, match="Split sizes do not sum up to input length along axis: 7"
         ):
             fn(np.zeros((7, 4), dtype=pytensor.config.floatX))
 
-        a_splits = at.split(a, splits_size=[2, -4, 8], n_splits=3, axis=0)
+        a_splits = ptb.split(a, splits_size=[2, -4, 8], n_splits=3, axis=0)
         fn = pytensor.function([a], a_splits, mode="JAX")
         with pytest.raises(
             ValueError,
@@ -185,7 +185,7 @@ def test_runtime_errors(self):
     def test_jax_split_not_supported(self):
         a = matrix("a", shape=(6, None))
 
-        a_splits = at.split(a, splits_size=[2, a.shape[1] - 2], n_splits=2, axis=1)
+        a_splits = ptb.split(a, splits_size=[2, a.shape[1] - 2], n_splits=2, axis=1)
         with pytest.warns(
             UserWarning, match="Split node does not have constant split positions."
         ):
@@ -195,7 +195,7 @@ def test_jax_split_not_supported(self):
             fn(np.zeros((6, 4), dtype=pytensor.config.floatX))
 
         split_axis = iscalar("split_axis")
-        a_splits = at.split(a, splits_size=[2, 4], n_splits=2, axis=split_axis)
+        a_splits = ptb.split(a, splits_size=[2, 4], n_splits=2, axis=split_axis)
         with pytest.warns(UserWarning, match="Split node does not have constant axis."):
             fn = pytensor.function([a, split_axis], a_splits, mode="JAX")
         # Same as above, an AttributeError surpasses the `TracerIntegerConversionError`
@@ -206,14 +206,14 @@ def test_jax_split_not_supported(self):
 
 def test_jax_eye():
     """Tests jaxification of the Eye operator"""
-    out = at.eye(3)
+    out = ptb.eye(3)
     out_fg = FunctionGraph([], [out])
 
     compare_jax_and_py(out_fg, [])
 
 
 def test_tri():
-    out = at.tri(10, 10, 0)
+    out = ptb.tri(10, 10, 0)
     fgraph = FunctionGraph([], [out])
     compare_jax_and_py(fgraph, [])
 
@@ -230,7 +230,7 @@ def test_tri_nonconcrete():
     n.tag.test_value = 10
     k.tag.test_value = 0
 
-    out = at.tri(m, n, k)
+    out = ptb.tri(m, n, k)
 
     # The actual error the user will see should be jax.errors.ConcretizationTypeError, but
     # the error handler raises an Attribute error first, so that's what this test needs to pass
diff --git a/tests/link/numba/test_basic.py b/tests/link/numba/test_basic.py
index 92ab879e5c..7a9eedfd9a 100644
--- a/tests/link/numba/test_basic.py
+++ b/tests/link/numba/test_basic.py
@@ -10,10 +10,10 @@
 
 numba = pytest.importorskip("numba")
 
-import pytensor.scalar as aes
-import pytensor.scalar.math as aesm
-import pytensor.tensor as at
-import pytensor.tensor.math as aem
+import pytensor.scalar as ps
+import pytensor.scalar.math as psm
+import pytensor.tensor as pt
+import pytensor.tensor.math as ptm
 from pytensor import config, shared
 from pytensor.compile.builders import OpFromGraph
 from pytensor.compile.function import function
@@ -32,7 +32,7 @@
 from pytensor.raise_op import assert_op
 from pytensor.scalar.basic import ScalarOp, as_scalar
 from pytensor.tensor import blas
-from pytensor.tensor import subtensor as at_subtensor
+from pytensor.tensor import subtensor as pt_subtensor
 from pytensor.tensor.elemwise import Elemwise
 from pytensor.tensor.shape import Reshape, Shape, Shape_i, SpecifyShape
 
@@ -296,12 +296,12 @@ def assert_fn(x, y):
     "v, expected, force_scalar, not_implemented",
     [
         (MyType(), None, False, True),
-        (aes.float32, numba.types.float32, False, False),
-        (at.fscalar, numba.types.Array(numba.types.float32, 0, "A"), False, False),
-        (at.fscalar, numba.types.float32, True, False),
-        (at.lvector, numba.types.int64[:], False, False),
-        (at.dmatrix, numba.types.float64[:, :], False, False),
-        (at.dmatrix, numba.types.float64, True, False),
+        (ps.float32, numba.types.float32, False, False),
+        (pt.fscalar, numba.types.Array(numba.types.float32, 0, "A"), False, False),
+        (pt.fscalar, numba.types.float32, True, False),
+        (pt.lvector, numba.types.int64[:], False, False),
+        (pt.dmatrix, numba.types.float64[:, :], False, False),
+        (pt.dmatrix, numba.types.float64, True, False),
     ],
 )
 def test_get_numba_type(v, expected, force_scalar, not_implemented):
@@ -321,30 +321,30 @@ def test_get_numba_type(v, expected, force_scalar, not_implemented):
         (Apply(MyOp(), [], []), numba.types.void(), False),
         (Apply(MyOp(), [], []), numba.types.void(), True),
         (
-            Apply(MyOp(), [at.lvector()], []),
+            Apply(MyOp(), [pt.lvector()], []),
             numba.types.void(numba.types.int64[:]),
             False,
         ),
-        (Apply(MyOp(), [at.lvector()], []), numba.types.void(numba.types.int64), True),
+        (Apply(MyOp(), [pt.lvector()], []), numba.types.void(numba.types.int64), True),
         (
-            Apply(MyOp(), [at.dmatrix(), aes.float32()], [at.dmatrix()]),
+            Apply(MyOp(), [pt.dmatrix(), ps.float32()], [pt.dmatrix()]),
             numba.types.float64[:, :](numba.types.float64[:, :], numba.types.float32),
             False,
         ),
         (
-            Apply(MyOp(), [at.dmatrix(), aes.float32()], [at.dmatrix()]),
+            Apply(MyOp(), [pt.dmatrix(), ps.float32()], [pt.dmatrix()]),
             numba.types.float64(numba.types.float64, numba.types.float32),
             True,
         ),
         (
-            Apply(MyOp(), [at.dmatrix(), aes.float32()], [at.dmatrix(), aes.int32()]),
+            Apply(MyOp(), [pt.dmatrix(), ps.float32()], [pt.dmatrix(), ps.int32()]),
             numba.types.Tuple([numba.types.float64[:, :], numba.types.int32])(
                 numba.types.float64[:, :], numba.types.float32
             ),
             False,
         ),
         (
-            Apply(MyOp(), [at.dmatrix(), aes.float32()], [at.dmatrix(), aes.int32()]),
+            Apply(MyOp(), [pt.dmatrix(), ps.float32()], [pt.dmatrix(), ps.int32()]),
             numba.types.Tuple([numba.types.float64, numba.types.int32])(
                 numba.types.float64, numba.types.float32
             ),
@@ -380,44 +380,44 @@ def test_box_unbox(input, wrapper_fn, check_fn):
 @pytest.mark.parametrize(
     "x, indices",
     [
-        (at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), (1,)),
+        (pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), (1,)),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
             (slice(None)),
         ),
-        (at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), (1, 2, 0)),
+        (pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), (1, 2, 0)),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
             (slice(1, 2), 1, slice(None)),
         ),
     ],
 )
 def test_Subtensor(x, indices):
     """Test NumPy's basic indexing."""
-    out_at = x[indices]
-    assert isinstance(out_at.owner.op, at_subtensor.Subtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x[indices]
+    assert isinstance(out_pt.owner.op, pt_subtensor.Subtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
 
 @pytest.mark.parametrize(
     "x, indices",
     [
-        (at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), ([1, 2],)),
+        (pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), ([1, 2],)),
     ],
 )
 def test_AdvancedSubtensor1(x, indices):
     """Test NumPy's advanced indexing in one dimension."""
-    out_at = at_subtensor.advanced_subtensor1(x, *indices)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor1)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.advanced_subtensor1(x, *indices)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor1)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
 
 def test_AdvancedSubtensor1_out_of_bounds():
-    out_at = at_subtensor.advanced_subtensor1(np.arange(3), [4])
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor1)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.advanced_subtensor1(np.arange(3), [4])
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor1)
+    out_fg = FunctionGraph([], [out_pt])
     with pytest.raises(IndexError):
         compare_numba_and_py(out_fg, [])
 
@@ -425,18 +425,18 @@ def test_AdvancedSubtensor1_out_of_bounds():
 @pytest.mark.parametrize(
     "x, indices",
     [
-        (at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), ([1, 2], [2, 3])),
+        (pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))), ([1, 2], [2, 3])),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
             ([1, 2], slice(None), [3, 4]),
         ),
     ],
 )
 def test_AdvancedSubtensor(x, indices):
     """Test NumPy's advanced indexing in more than one dimension."""
-    out_at = x[indices]
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = x[indices]
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
 
@@ -444,42 +444,42 @@ def test_AdvancedSubtensor(x, indices):
     "x, y, indices",
     [
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(np.array(10)),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(np.array(10)),
             (1,),
         ),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(4, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(4, 5))),
             (slice(None)),
         ),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(np.array(10)),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(np.array(10)),
             (1, 2, 0),
         ),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(1, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(1, 5))),
             (slice(1, 2), 1, slice(None)),
         ),
     ],
 )
 def test_IncSubtensor(x, y, indices):
-    out_at = at.set_subtensor(x[indices], y)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt.set_subtensor(x[indices], y)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
-    out_at = at.inc_subtensor(x[indices], y)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt.inc_subtensor(x[indices], y)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
-    x_at = x.type()
-    out_at = at.set_subtensor(x_at[indices], y, inplace=True)
-    assert isinstance(out_at.owner.op, at_subtensor.IncSubtensor)
-    out_fg = FunctionGraph([x_at], [out_at])
+    x_pt = x.type()
+    out_pt = pt.set_subtensor(x_pt[indices], y, inplace=True)
+    assert isinstance(out_pt.owner.op, pt_subtensor.IncSubtensor)
+    out_fg = FunctionGraph([x_pt], [out_pt])
     compare_numba_and_py(out_fg, [x.data])
 
 
@@ -487,32 +487,32 @@ def test_IncSubtensor(x, y, indices):
     "x, y, indices",
     [
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(2, 4, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(2, 4, 5))),
             ([1, 2],),
         ),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(2, 4, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(2, 4, 5))),
             ([1, 1],),
         ),
     ],
 )
 def test_AdvancedIncSubtensor1(x, y, indices):
-    out_at = at_subtensor.advanced_set_subtensor1(x, y, *indices)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor1)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.advanced_set_subtensor1(x, y, *indices)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor1)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
-    out_at = at_subtensor.advanced_inc_subtensor1(x, y, *indices)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor1)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt_subtensor.advanced_inc_subtensor1(x, y, *indices)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor1)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
-    x_at = x.type()
-    out_at = at_subtensor.AdvancedIncSubtensor1(inplace=True)(x_at, y, *indices)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor1)
-    out_fg = FunctionGraph([x_at], [out_at])
+    x_pt = x.type()
+    out_pt = pt_subtensor.AdvancedIncSubtensor1(inplace=True)(x_pt, y, *indices)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor1)
+    out_fg = FunctionGraph([x_pt], [out_pt])
     compare_numba_and_py(out_fg, [x.data])
 
 
@@ -520,40 +520,40 @@ def test_AdvancedIncSubtensor1(x, y, indices):
     "x, y, indices",
     [
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(2, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(2, 5))),
             ([1, 2], [2, 3]),
         ),
         (
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(2, 4))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(2, 4))),
             ([1, 2], slice(None), [3, 4]),
         ),
         pytest.param(
-            at.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
-            at.as_tensor(rng.poisson(size=(2, 5))),
+            pt.as_tensor(np.arange(3 * 4 * 5).reshape((3, 4, 5))),
+            pt.as_tensor(rng.poisson(size=(2, 5))),
             ([1, 1], [2, 2]),
         ),
     ],
 )
 def test_AdvancedIncSubtensor(x, y, indices):
-    out_at = at.set_subtensor(x[indices], y)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt.set_subtensor(x[indices], y)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
-    out_at = at.inc_subtensor(x[indices], y)
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([], [out_at])
+    out_pt = pt.inc_subtensor(x[indices], y)
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([], [out_pt])
     compare_numba_and_py(out_fg, [])
 
-    x_at = x.type()
-    out_at = at.set_subtensor(x_at[indices], y)
+    x_pt = x.type()
+    out_pt = pt.set_subtensor(x_pt[indices], y)
     # Inplace isn't really implemented for `AdvancedIncSubtensor`, so we just
     # hack it on here
-    out_at.owner.op.inplace = True
-    assert isinstance(out_at.owner.op, at_subtensor.AdvancedIncSubtensor)
-    out_fg = FunctionGraph([x_at], [out_at])
+    out_pt.owner.op.inplace = True
+    assert isinstance(out_pt.owner.op, pt_subtensor.AdvancedIncSubtensor)
+    out_fg = FunctionGraph([x_pt], [out_pt])
     compare_numba_and_py(out_fg, [x.data])
 
 
@@ -564,12 +564,12 @@ def test_AdvancedIncSubtensor(x, y, indices):
     ],
 )
 def test_Shape(x, i):
-    g = Shape()(at.as_tensor_variable(x))
+    g = Shape()(pt.as_tensor_variable(x))
     g_fg = FunctionGraph([], [g])
 
     compare_numba_and_py(g_fg, [])
 
-    g = Shape_i(i)(at.as_tensor_variable(x))
+    g = Shape_i(i)(pt.as_tensor_variable(x))
     g_fg = FunctionGraph([], [g])
 
     compare_numba_and_py(g_fg, [])
@@ -578,11 +578,11 @@ def test_Shape(x, i):
 @pytest.mark.parametrize(
     "v, shape, ndim",
     [
-        (set_test_value(at.vector(), np.array([4], dtype=config.floatX)), (), 0),
-        (set_test_value(at.vector(), np.arange(4, dtype=config.floatX)), (2, 2), 2),
+        (set_test_value(pt.vector(), np.array([4], dtype=config.floatX)), (), 0),
+        (set_test_value(pt.vector(), np.arange(4, dtype=config.floatX)), (2, 2), 2),
         (
-            set_test_value(at.vector(), np.arange(4, dtype=config.floatX)),
-            set_test_value(at.lvector(), np.array([2, 2], dtype="int64")),
+            set_test_value(pt.vector(), np.arange(4, dtype=config.floatX)),
+            set_test_value(pt.lvector(), np.array([2, 2], dtype="int64")),
             2,
         ),
     ],
@@ -601,7 +601,7 @@ def test_Reshape(v, shape, ndim):
 
 
 def test_Reshape_scalar():
-    v = at.vector()
+    v = pt.vector()
     v.tag.test_value = np.array([1.0], dtype=config.floatX)
     g = Reshape(1)(v[0], (1,))
     g_fg = FunctionGraph(outputs=[g])
@@ -619,17 +619,17 @@ def test_Reshape_scalar():
     "v, shape, fails",
     [
         (
-            set_test_value(at.matrix(), np.array([[1.0]], dtype=config.floatX)),
+            set_test_value(pt.matrix(), np.array([[1.0]], dtype=config.floatX)),
             (1, 1),
             False,
         ),
         (
-            set_test_value(at.matrix(), np.array([[1.0, 2.0]], dtype=config.floatX)),
+            set_test_value(pt.matrix(), np.array([[1.0, 2.0]], dtype=config.floatX)),
             (1, 1),
             True,
         ),
         (
-            set_test_value(at.matrix(), np.array([[1.0, 2.0]], dtype=config.floatX)),
+            set_test_value(pt.matrix(), np.array([[1.0, 2.0]], dtype=config.floatX)),
             (1, None),
             False,
         ),
@@ -653,7 +653,7 @@ def test_SpecifyShape(v, shape, fails):
 @pytest.mark.parametrize(
     "v",
     [
-        set_test_value(at.vector(), np.arange(4, dtype=config.floatX)),
+        set_test_value(pt.vector(), np.arange(4, dtype=config.floatX)),
     ],
 )
 def test_ViewOp(v):
@@ -675,9 +675,9 @@ def test_ViewOp(v):
         (
             [
                 set_test_value(
-                    at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)
+                    pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)
                 ),
-                set_test_value(at.lmatrix(), rng.poisson(size=(2, 3))),
+                set_test_value(pt.lmatrix(), rng.poisson(size=(2, 3))),
             ],
             MySingleOut,
             UserWarning,
@@ -685,9 +685,9 @@ def test_ViewOp(v):
         (
             [
                 set_test_value(
-                    at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)
+                    pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)
                 ),
-                set_test_value(at.lmatrix(), rng.poisson(size=(2, 3))),
+                set_test_value(pt.lmatrix(), rng.poisson(size=(2, 3))),
             ],
             MyMultiOut,
             UserWarning,
@@ -717,7 +717,7 @@ def test_perform(inputs, op, exc):
 def test_perform_params():
     """This tests for `Op.perform` implementations that require the `params` arguments."""
 
-    x = at.vector()
+    x = pt.vector()
     x.tag.test_value = np.array([1.0, 2.0], dtype=config.floatX)
 
     out = assert_op(x, np.array(True))
@@ -736,7 +736,7 @@ def test_perform_type_convert():
     native scalar and it's supposed to return an `np.ndarray`.
     """
 
-    x = at.vector()
+    x = pt.vector()
     x.tag.test_value = np.array([1.0, 2.0], dtype=config.floatX)
 
     out = assert_op(x.sum(), np.array(True))
@@ -752,33 +752,33 @@ def test_perform_type_convert():
     "x, y, exc",
     [
         (
-            set_test_value(at.matrix(), rng.random(size=(3, 2)).astype(config.floatX)),
-            set_test_value(at.vector(), rng.random(size=(2,)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(3, 2)).astype(config.floatX)),
+            set_test_value(pt.vector(), rng.random(size=(2,)).astype(config.floatX)),
             None,
         ),
         (
             set_test_value(
-                at.matrix(dtype="float64"), rng.random(size=(3, 2)).astype("float64")
+                pt.matrix(dtype="float64"), rng.random(size=(3, 2)).astype("float64")
             ),
             set_test_value(
-                at.vector(dtype="float32"), rng.random(size=(2,)).astype("float32")
+                pt.vector(dtype="float32"), rng.random(size=(2,)).astype("float32")
             ),
             None,
         ),
         (
-            set_test_value(at.lmatrix(), rng.poisson(size=(3, 2))),
-            set_test_value(at.fvector(), rng.random(size=(2,)).astype("float32")),
+            set_test_value(pt.lmatrix(), rng.poisson(size=(3, 2))),
+            set_test_value(pt.fvector(), rng.random(size=(2,)).astype("float32")),
             None,
         ),
         (
-            set_test_value(at.lvector(), rng.random(size=(2,)).astype(np.int64)),
-            set_test_value(at.lvector(), rng.random(size=(2,)).astype(np.int64)),
+            set_test_value(pt.lvector(), rng.random(size=(2,)).astype(np.int64)),
+            set_test_value(pt.lvector(), rng.random(size=(2,)).astype(np.int64)),
             None,
         ),
     ],
 )
 def test_Dot(x, y, exc):
-    g = aem.Dot()(x, y)
+    g = ptm.Dot()(x, y)
     g_fg = FunctionGraph(outputs=[g])
 
     cm = contextlib.suppress() if exc is None else pytest.warns(exc)
@@ -797,33 +797,33 @@ def test_Dot(x, y, exc):
     "x, exc",
     [
         (
-            set_test_value(aes.float64(), np.array(0.0, dtype="float64")),
+            set_test_value(ps.float64(), np.array(0.0, dtype="float64")),
             None,
         ),
         (
-            set_test_value(aes.float64(), np.array(-32.0, dtype="float64")),
+            set_test_value(ps.float64(), np.array(-32.0, dtype="float64")),
             None,
         ),
         (
-            set_test_value(aes.float64(), np.array(-40.0, dtype="float64")),
+            set_test_value(ps.float64(), np.array(-40.0, dtype="float64")),
             None,
         ),
         (
-            set_test_value(aes.float64(), np.array(32.0, dtype="float64")),
+            set_test_value(ps.float64(), np.array(32.0, dtype="float64")),
             None,
         ),
         (
-            set_test_value(aes.float64(), np.array(40.0, dtype="float64")),
+            set_test_value(ps.float64(), np.array(40.0, dtype="float64")),
             None,
         ),
         (
-            set_test_value(aes.int64(), np.array(32, dtype="int64")),
+            set_test_value(ps.int64(), np.array(32, dtype="int64")),
             None,
         ),
     ],
 )
 def test_Softplus(x, exc):
-    g = aesm.Softplus(aes.upgrade_to_float)(x)
+    g = psm.Softplus(ps.upgrade_to_float)(x)
     g_fg = FunctionGraph(outputs=[g])
 
     cm = contextlib.suppress() if exc is None else pytest.warns(exc)
@@ -843,22 +843,22 @@ def test_Softplus(x, exc):
     [
         (
             set_test_value(
-                at.dtensor3(),
+                pt.dtensor3(),
                 rng.random(size=(2, 3, 3)).astype("float64"),
             ),
             set_test_value(
-                at.dtensor3(),
+                pt.dtensor3(),
                 rng.random(size=(2, 3, 3)).astype("float64"),
             ),
             None,
         ),
         (
             set_test_value(
-                at.dtensor3(),
+                pt.dtensor3(),
                 rng.random(size=(2, 3, 3)).astype("float64"),
             ),
             set_test_value(
-                at.ltensor3(),
+                pt.ltensor3(),
                 rng.poisson(size=(2, 3, 3)).astype("int64"),
             ),
             None,
@@ -952,15 +952,15 @@ def test_shared_updates():
     [
         ([], lambda: np.array(True), np.r_[1, 2, 3], np.r_[-1, -2, -3]),
         (
-            [set_test_value(at.dscalar(), np.array(0.2, dtype=np.float64))],
+            [set_test_value(pt.dscalar(), np.array(0.2, dtype=np.float64))],
             lambda x: x < 0.5,
             np.r_[1, 2, 3],
             np.r_[-1, -2, -3],
         ),
         (
             [
-                set_test_value(at.dscalar(), np.array(0.3, dtype=np.float64)),
-                set_test_value(at.dscalar(), np.array(0.5, dtype=np.float64)),
+                set_test_value(pt.dscalar(), np.array(0.3, dtype=np.float64)),
+                set_test_value(pt.dscalar(), np.array(0.5, dtype=np.float64)),
             ],
             lambda x, y: x > y,
             x,
@@ -968,28 +968,28 @@ def test_shared_updates():
         ),
         (
             [
-                set_test_value(at.dvector(), np.array([0.3, 0.1], dtype=np.float64)),
-                set_test_value(at.dvector(), np.array([0.5, 0.9], dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([0.3, 0.1], dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([0.5, 0.9], dtype=np.float64)),
             ],
-            lambda x, y: at.all(x > y),
+            lambda x, y: pt.all(x > y),
             x,
             y,
         ),
         (
             [
-                set_test_value(at.dvector(), np.array([0.3, 0.1], dtype=np.float64)),
-                set_test_value(at.dvector(), np.array([0.5, 0.9], dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([0.3, 0.1], dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([0.5, 0.9], dtype=np.float64)),
             ],
-            lambda x, y: at.all(x > y),
+            lambda x, y: pt.all(x > y),
             [x, 2 * x],
             [y, 3 * y],
         ),
         (
             [
-                set_test_value(at.dvector(), np.array([0.5, 0.9], dtype=np.float64)),
-                set_test_value(at.dvector(), np.array([0.3, 0.1], dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([0.5, 0.9], dtype=np.float64)),
+                set_test_value(pt.dvector(), np.array([0.3, 0.1], dtype=np.float64)),
             ],
-            lambda x, y: at.all(x > y),
+            lambda x, y: pt.all(x > y),
             [x, 2 * x],
             [y, 3 * y],
         ),
@@ -1008,41 +1008,41 @@ def test_IfElse(inputs, cond_fn, true_vals, false_vals):
 
 @pytest.mark.xfail(reason="https://github.com/numba/numba/issues/7409")
 def test_config_options_parallel():
-    x = at.dvector()
+    x = pt.dvector()
 
     with config.change_flags(numba__vectorize_target="parallel"):
-        pytensor_numba_fn = function([x], at.sum(x), mode=numba_mode)
+        pytensor_numba_fn = function([x], pt.sum(x), mode=numba_mode)
         numba_mul_fn = pytensor_numba_fn.vm.jit_fn.py_func.__globals__["impl_sum"]
         assert numba_mul_fn.targetoptions["parallel"] is True
 
 
 def test_config_options_fastmath():
-    x = at.dvector()
+    x = pt.dvector()
 
     with config.change_flags(numba__fastmath=True):
-        pytensor_numba_fn = function([x], at.sum(x), mode=numba_mode)
+        pytensor_numba_fn = function([x], pt.sum(x), mode=numba_mode)
         print(list(pytensor_numba_fn.vm.jit_fn.py_func.__globals__.keys()))
         numba_mul_fn = pytensor_numba_fn.vm.jit_fn.py_func.__globals__["impl_sum"]
         assert numba_mul_fn.targetoptions["fastmath"] is True
 
 
 def test_config_options_cached():
-    x = at.dvector()
+    x = pt.dvector()
 
     with config.change_flags(numba__cache=True):
-        pytensor_numba_fn = function([x], at.sum(x), mode=numba_mode)
+        pytensor_numba_fn = function([x], pt.sum(x), mode=numba_mode)
         numba_mul_fn = pytensor_numba_fn.vm.jit_fn.py_func.__globals__["impl_sum"]
         assert not isinstance(numba_mul_fn._cache, numba.core.caching.NullCache)
 
     with config.change_flags(numba__cache=False):
-        pytensor_numba_fn = function([x], at.sum(x), mode=numba_mode)
+        pytensor_numba_fn = function([x], pt.sum(x), mode=numba_mode)
         numba_mul_fn = pytensor_numba_fn.vm.jit_fn.py_func.__globals__["impl_sum"]
         assert isinstance(numba_mul_fn._cache, numba.core.caching.NullCache)
 
 
 def test_scalar_return_value_conversion():
     r"""Make sure that we convert \"native\" scalars to `ndarray`\s in the graph outputs."""
-    x = at.scalar(name="x")
+    x = pt.scalar(name="x")
     x_fn = function(
         [x],
         2 * x,
@@ -1052,7 +1052,7 @@ def test_scalar_return_value_conversion():
 
 
 def test_OpFromGraph():
-    x, y, z = at.matrices("xyz")
+    x, y, z = pt.matrices("xyz")
     ofg_1 = OpFromGraph([x, y], [x + y], inline=False)
     ofg_2 = OpFromGraph([x, y], [x * y, x - y], inline=False)
 
diff --git a/tests/link/numba/test_elemwise.py b/tests/link/numba/test_elemwise.py
index 82d45d233f..f50d585389 100644
--- a/tests/link/numba/test_elemwise.py
+++ b/tests/link/numba/test_elemwise.py
@@ -5,9 +5,9 @@
 import scipy.special
 
 import pytensor
-import pytensor.tensor as at
-import pytensor.tensor.inplace as ati
-import pytensor.tensor.math as aem
+import pytensor.tensor as pt
+import pytensor.tensor.inplace as pti
+import pytensor.tensor.math as ptm
 from pytensor import config, function
 from pytensor.compile import get_mode
 from pytensor.compile.ops import deep_copy_op
@@ -15,7 +15,7 @@
 from pytensor.gradient import grad
 from pytensor.graph.basic import Constant
 from pytensor.graph.fg import FunctionGraph
-from pytensor.tensor import elemwise as at_elemwise
+from pytensor.tensor import elemwise as pt_elemwise
 from pytensor.tensor.math import All, Any, Max, Mean, Min, Prod, ProdWithoutZeros, Sum
 from pytensor.tensor.special import LogSoftmax, Softmax, SoftmaxGrad
 from tests.link.numba.test_basic import (
@@ -33,73 +33,73 @@
     "inputs, input_vals, output_fn, exc",
     [
         (
-            [at.vector()],
+            [pt.vector()],
             [rng.uniform(size=100).astype(config.floatX)],
-            lambda x: at.gammaln(x),
+            lambda x: pt.gammaln(x),
             None,
         ),
         (
-            [at.vector()],
+            [pt.vector()],
             [rng.standard_normal(100).astype(config.floatX)],
-            lambda x: at.sigmoid(x),
+            lambda x: pt.sigmoid(x),
             None,
         ),
         (
-            [at.vector()],
+            [pt.vector()],
             [rng.standard_normal(100).astype(config.floatX)],
-            lambda x: at.log1mexp(x),
+            lambda x: pt.log1mexp(x),
             None,
         ),
         (
-            [at.vector()],
+            [pt.vector()],
             [rng.standard_normal(100).astype(config.floatX)],
-            lambda x: at.erf(x),
+            lambda x: pt.erf(x),
             None,
         ),
         (
-            [at.vector()],
+            [pt.vector()],
             [rng.standard_normal(100).astype(config.floatX)],
-            lambda x: at.erfc(x),
+            lambda x: pt.erfc(x),
             None,
         ),
         (
-            [at.vector()],
+            [pt.vector()],
             [rng.standard_normal(100).astype(config.floatX)],
-            lambda x: at.erfcx(x),
+            lambda x: pt.erfcx(x),
             None,
         ),
         (
-            [at.vector() for i in range(4)],
+            [pt.vector() for i in range(4)],
             [rng.standard_normal(100).astype(config.floatX) for i in range(4)],
             lambda x, y, x1, y1: (x + y) * (x1 + y1) * y,
             None,
         ),
         (
-            [at.matrix(), at.scalar()],
+            [pt.matrix(), pt.scalar()],
             [rng.normal(size=(2, 2)).astype(config.floatX), 0.0],
-            lambda a, b: at.switch(a, b, a),
+            lambda a, b: pt.switch(a, b, a),
             None,
         ),
         (
-            [at.scalar(), at.scalar()],
+            [pt.scalar(), pt.scalar()],
             [
                 np.array(1.0, dtype=config.floatX),
                 np.array(1.0, dtype=config.floatX),
             ],
-            lambda x, y: ati.add_inplace(deep_copy_op(x), deep_copy_op(y)),
+            lambda x, y: pti.add_inplace(deep_copy_op(x), deep_copy_op(y)),
             None,
         ),
         (
-            [at.vector(), at.vector()],
+            [pt.vector(), pt.vector()],
             [
                 rng.standard_normal(100).astype(config.floatX),
                 rng.standard_normal(100).astype(config.floatX),
             ],
-            lambda x, y: ati.add_inplace(deep_copy_op(x), deep_copy_op(y)),
+            lambda x, y: pti.add_inplace(deep_copy_op(x), deep_copy_op(y)),
             None,
         ),
         (
-            [at.vector(), at.vector()],
+            [pt.vector(), pt.vector()],
             [
                 rng.standard_normal(100).astype(config.floatX),
                 rng.standard_normal(100).astype(config.floatX),
@@ -127,8 +127,8 @@ def test_elemwise_runtime_broadcast():
 
 
 def test_elemwise_speed(benchmark):
-    x = at.dmatrix("y")
-    y = at.dvector("z")
+    x = pt.dmatrix("y")
+    y = pt.dvector("z")
 
     out = np.exp(2 * x * y + y)
 
@@ -151,53 +151,53 @@ def test_elemwise_speed(benchmark):
         # `{'drop': [], 'shuffle': [], 'augment': [0, 1]}`
         (
             set_test_value(
-                at.lscalar(name="a"),
+                pt.lscalar(name="a"),
                 np.array(1, dtype=np.int64),
             ),
             ("x", "x"),
         ),
-        # I.e. `a_at.T`
+        # I.e. `a_pt.T`
         # `{'drop': [], 'shuffle': [1, 0], 'augment': []}`
         (
             set_test_value(
-                at.matrix("a"), np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
+                pt.matrix("a"), np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
             ),
             (1, 0),
         ),
         # `{'drop': [], 'shuffle': [0, 1], 'augment': [2]}`
         (
             set_test_value(
-                at.matrix("a"), np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
+                pt.matrix("a"), np.array([[1.0, 2.0], [3.0, 4.0]], dtype=config.floatX)
             ),
             (1, 0, "x"),
         ),
         # `{'drop': [1], 'shuffle': [2, 0], 'augment': [0, 2, 4]}`
         (
             set_test_value(
-                at.tensor(dtype=config.floatX, shape=(None, 1, None), name="a"),
+                pt.tensor(dtype=config.floatX, shape=(None, 1, None), name="a"),
                 np.array([[[1.0, 2.0]], [[3.0, 4.0]]], dtype=config.floatX),
             ),
             ("x", 2, "x", 0, "x"),
         ),
-        # I.e. `a_at.dimshuffle((0,))`
+        # I.e. `a_pt.dimshuffle((0,))`
         # `{'drop': [1], 'shuffle': [0], 'augment': []}`
         (
             set_test_value(
-                at.tensor(dtype=config.floatX, shape=(None, 1), name="a"),
+                pt.tensor(dtype=config.floatX, shape=(None, 1), name="a"),
                 np.array([[1.0], [2.0], [3.0], [4.0]], dtype=config.floatX),
             ),
             (0,),
         ),
         (
             set_test_value(
-                at.tensor(dtype=config.floatX, shape=(None, 1), name="a"),
+                pt.tensor(dtype=config.floatX, shape=(None, 1), name="a"),
                 np.array([[1.0], [2.0], [3.0], [4.0]], dtype=config.floatX),
             ),
             (0,),
         ),
         (
             set_test_value(
-                at.tensor(dtype=config.floatX, shape=(1, 1, 1), name="a"),
+                pt.tensor(dtype=config.floatX, shape=(1, 1, 1), name="a"),
                 np.array([[[1.0]]], dtype=config.floatX),
             ),
             (),
@@ -205,7 +205,7 @@ def test_elemwise_speed(benchmark):
     ],
 )
 def test_Dimshuffle(v, new_order):
-    g = at_elemwise.DimShuffle(v.broadcastable, new_order)(v)
+    g = pt_elemwise.DimShuffle(v.broadcastable, new_order)(v)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
         g_fg,
@@ -218,8 +218,8 @@ def test_Dimshuffle(v, new_order):
 
 
 def test_Dimshuffle_returns_array():
-    x = at.vector("x", shape=(1,))
-    y = 2 * at_elemwise.DimShuffle([True], [])(x)
+    x = pt.vector("x", shape=(1,))
+    y = 2 * pt_elemwise.DimShuffle([True], [])(x)
     func = pytensor.function([x], y, mode="NUMBA")
     out = func(np.zeros(1, dtype=config.floatX))
     assert out.ndim == 0
@@ -227,11 +227,11 @@ def test_Dimshuffle_returns_array():
 
 def test_Dimshuffle_non_contiguous():
     """The numba impl of reshape doesn't work with
-    non-contiguous arrays, make sure we work around that."""
-    x = at.dvector()
-    idx = at.vector(dtype="int64")
+    non-contiguous arrays, make sure we work around thpt."""
+    x = pt.dvector()
+    idx = pt.vector(dtype="int64")
     op = pytensor.tensor.elemwise.DimShuffle([True], [])
-    out = op(at.specify_shape(x[idx][::2], (1,)))
+    out = op(pt.specify_shape(x[idx][::2], (1,)))
     func = pytensor.function([x, idx], out, mode="NUMBA")
     assert func(np.zeros(3), np.array([1])).ndim == 0
 
@@ -244,28 +244,28 @@ def test_Dimshuffle_non_contiguous():
                 axis=axis, dtype=dtype, acc_dtype=acc_dtype
             )(x),
             0,
-            set_test_value(at.vector(), np.arange(3, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(3, dtype=config.floatX)),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: All(axis)(x),
             0,
-            set_test_value(at.vector(), np.arange(3, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(3, dtype=config.floatX)),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Any(axis)(x),
             0,
-            set_test_value(at.vector(), np.arange(3, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(3, dtype=config.floatX)),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Mean(axis)(x),
             0,
-            set_test_value(at.vector(), np.arange(3, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(3, dtype=config.floatX)),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Mean(axis)(x),
             0,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -274,7 +274,7 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             0,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -283,7 +283,7 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             (0, 1),
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -292,7 +292,7 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             (1, 0),
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -301,7 +301,7 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             None,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -310,7 +310,7 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             1,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -318,14 +318,14 @@ def test_Dimshuffle_non_contiguous():
                 axis=axis, dtype=dtype, acc_dtype=acc_dtype
             )(x),
             0,
-            set_test_value(at.vector(), np.arange(3, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(3, dtype=config.floatX)),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: ProdWithoutZeros(
                 axis=axis, dtype=dtype, acc_dtype=acc_dtype
             )(x),
             0,
-            set_test_value(at.vector(), np.arange(3, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(3, dtype=config.floatX)),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Prod(
@@ -333,7 +333,7 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             0,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
@@ -342,35 +342,35 @@ def test_Dimshuffle_non_contiguous():
             )(x),
             1,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Max(axis)(x),
             None,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Max(axis)(x),
             None,
             set_test_value(
-                at.lmatrix(), np.arange(3 * 2, dtype=np.int64).reshape((3, 2))
+                pt.lmatrix(), np.arange(3 * 2, dtype=np.int64).reshape((3, 2))
             ),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Min(axis)(x),
             None,
             set_test_value(
-                at.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(3 * 2, dtype=config.floatX).reshape((3, 2))
             ),
         ),
         (
             lambda x, axis=None, dtype=None, acc_dtype=None: Min(axis)(x),
             None,
             set_test_value(
-                at.lmatrix(), np.arange(3 * 2, dtype=np.int64).reshape((3, 2))
+                pt.lmatrix(), np.arange(3 * 2, dtype=np.int64).reshape((3, 2))
             ),
         ),
     ],
@@ -390,11 +390,11 @@ def test_CAReduce(careduce_fn, axis, v):
 
 
 def test_scalar_Elemwise_Clip():
-    a = at.scalar("a")
-    b = at.scalar("b")
+    a = pt.scalar("a")
+    b = pt.scalar("b")
 
-    z = at.switch(1, a, b)
-    c = at.clip(z, 1, 3)
+    z = pt.switch(1, a, b)
+    c = pt.clip(z, 1, 3)
     c_fg = FunctionGraph(outputs=[c])
 
     compare_numba_and_py(c_fg, [1, 1])
@@ -405,25 +405,25 @@ def test_scalar_Elemwise_Clip():
     [
         (
             set_test_value(
-                at.matrix(), np.array([[1, 1, 1], [0, 0, 0]], dtype=config.floatX)
+                pt.matrix(), np.array([[1, 1, 1], [0, 0, 0]], dtype=config.floatX)
             ),
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             None,
             None,
         ),
         (
             set_test_value(
-                at.matrix(), np.array([[1, 1, 1], [0, 0, 0]], dtype=config.floatX)
+                pt.matrix(), np.array([[1, 1, 1], [0, 0, 0]], dtype=config.floatX)
             ),
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             0,
             None,
         ),
         (
             set_test_value(
-                at.matrix(), np.array([[1, 1, 1], [0, 0, 0]], dtype=config.floatX)
+                pt.matrix(), np.array([[1, 1, 1], [0, 0, 0]], dtype=config.floatX)
             ),
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             1,
             None,
         ),
@@ -446,8 +446,8 @@ def test_SoftmaxGrad(dy, sm, axis, exc):
 
 
 def test_SoftMaxGrad_constant_dy():
-    dy = at.constant(np.zeros((3,), dtype=config.floatX))
-    sm = at.vector(shape=(3,))
+    dy = pt.constant(np.zeros((3,), dtype=config.floatX))
+    sm = pt.vector(shape=(3,))
 
     g = SoftmaxGrad(axis=None)(dy, sm)
     g_fg = FunctionGraph(outputs=[g])
@@ -459,17 +459,17 @@ def test_SoftMaxGrad_constant_dy():
     "x, axis, exc",
     [
         (
-            set_test_value(at.vector(), rng.random(size=(2,)).astype(config.floatX)),
+            set_test_value(pt.vector(), rng.random(size=(2,)).astype(config.floatX)),
             None,
             None,
         ),
         (
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             None,
             None,
         ),
         (
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             0,
             None,
         ),
@@ -495,17 +495,17 @@ def test_Softmax(x, axis, exc):
     "x, axis, exc",
     [
         (
-            set_test_value(at.vector(), rng.random(size=(2,)).astype(config.floatX)),
+            set_test_value(pt.vector(), rng.random(size=(2,)).astype(config.floatX)),
             None,
             None,
         ),
         (
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             0,
             None,
         ),
         (
-            set_test_value(at.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.random(size=(2, 3)).astype(config.floatX)),
             1,
             None,
         ),
@@ -531,29 +531,29 @@ def test_LogSoftmax(x, axis, exc):
     "x, axes, exc",
     [
         (
-            set_test_value(at.dscalar(), np.array(0.0, dtype="float64")),
+            set_test_value(pt.dscalar(), np.array(0.0, dtype="float64")),
             [],
             None,
         ),
         (
-            set_test_value(at.dvector(), rng.random(size=(3,)).astype("float64")),
+            set_test_value(pt.dvector(), rng.random(size=(3,)).astype("float64")),
             [0],
             None,
         ),
         (
-            set_test_value(at.dmatrix(), rng.random(size=(3, 2)).astype("float64")),
+            set_test_value(pt.dmatrix(), rng.random(size=(3, 2)).astype("float64")),
             [0],
             None,
         ),
         (
-            set_test_value(at.dmatrix(), rng.random(size=(3, 2)).astype("float64")),
+            set_test_value(pt.dmatrix(), rng.random(size=(3, 2)).astype("float64")),
             [0, 1],
             None,
         ),
     ],
 )
 def test_MaxAndArgmax(x, axes, exc):
-    g = aem.MaxAndArgmax(axes)(x)
+    g = ptm.MaxAndArgmax(axes)(x)
 
     if isinstance(g, list):
         g_fg = FunctionGraph(outputs=g)
@@ -575,10 +575,10 @@ def test_MaxAndArgmax(x, axes, exc):
 @pytest.mark.parametrize("size", [(10, 10), (1000, 1000), (10000, 10000)])
 @pytest.mark.parametrize("axis", [0, 1])
 def test_logsumexp_benchmark(size, axis, benchmark):
-    X = at.matrix("X")
-    X_max = at.max(X, axis=axis, keepdims=True)
-    X_max = at.switch(at.isinf(X_max), 0, X_max)
-    X_lse = at.log(at.sum(at.exp(X - X_max), axis=axis, keepdims=True)) + X_max
+    X = pt.matrix("X")
+    X_max = pt.max(X, axis=axis, keepdims=True)
+    X_max = pt.switch(pt.isinf(X_max), 0, X_max)
+    X_lse = pt.log(pt.sum(pt.exp(X - X_max), axis=axis, keepdims=True)) + X_max
 
     rng = np.random.default_rng(23920)
     X_val = rng.normal(size=size)
@@ -610,7 +610,7 @@ def test_fused_elemwise_benchmark(benchmark):
 def test_elemwise_out_type():
     # Create a graph with an elemwise
     # Ravel failes if the elemwise output type is reported incorrectly
-    x = at.matrix()
+    x = pt.matrix()
     y = (2 * x).ravel()
 
     # Pass in the input as mutable, to trigger the inplace rewrites
diff --git a/tests/link/numba/test_extra_ops.py b/tests/link/numba/test_extra_ops.py
index 4575aa44a9..65383ae28c 100644
--- a/tests/link/numba/test_extra_ops.py
+++ b/tests/link/numba/test_extra_ops.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import config
 from pytensor.compile.sharedvalue import SharedVariable
 from pytensor.graph.basic import Constant
@@ -18,7 +18,7 @@
 @pytest.mark.parametrize(
     "val",
     [
-        set_test_value(at.lscalar(), np.array(6, dtype="int64")),
+        set_test_value(pt.lscalar(), np.array(6, dtype="int64")),
     ],
 )
 def test_Bartlett(val):
@@ -41,56 +41,56 @@ def test_Bartlett(val):
     [
         (
             set_test_value(
-                at.matrix(), np.arange(3, dtype=config.floatX).reshape((3, 1))
+                pt.matrix(), np.arange(3, dtype=config.floatX).reshape((3, 1))
             ),
             1,
             "add",
         ),
         (
             set_test_value(
-                at.dtensor3(), np.arange(30, dtype=config.floatX).reshape((2, 3, 5))
+                pt.dtensor3(), np.arange(30, dtype=config.floatX).reshape((2, 3, 5))
             ),
             -1,
             "add",
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
             ),
             0,
             "add",
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
             ),
             1,
             "add",
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
             ),
             None,
             "add",
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
             ),
             0,
             "mul",
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
             ),
             1,
             "mul",
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
+                pt.matrix(), np.arange(6, dtype=config.floatX).reshape((3, 2))
             ),
             None,
             "mul",
@@ -115,8 +115,8 @@ def test_CumOp(val, axis, mode):
     "a, val",
     [
         (
-            set_test_value(at.lmatrix(), np.zeros((10, 2), dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lmatrix(), np.zeros((10, 2), dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
         )
     ],
 )
@@ -138,19 +138,19 @@ def test_FillDiagonal(a, val):
     "a, val, offset",
     [
         (
-            set_test_value(at.lmatrix(), np.zeros((10, 2), dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
-            set_test_value(at.lscalar(), np.array(-1, dtype="int64")),
+            set_test_value(pt.lmatrix(), np.zeros((10, 2), dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(-1, dtype="int64")),
         ),
         (
-            set_test_value(at.lmatrix(), np.zeros((10, 2), dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
-            set_test_value(at.lscalar(), np.array(0, dtype="int64")),
+            set_test_value(pt.lmatrix(), np.zeros((10, 2), dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(0, dtype="int64")),
         ),
         (
-            set_test_value(at.lmatrix(), np.zeros((10, 3), dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lmatrix(), np.zeros((10, 3), dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
         ),
     ],
 )
@@ -172,65 +172,65 @@ def test_FillDiagonalOffset(a, val, offset):
     "arr, shape, mode, order, exc",
     [
         (
-            tuple(set_test_value(at.lscalar(), v) for v in np.array([0])),
-            set_test_value(at.lvector(), np.array([2])),
+            tuple(set_test_value(pt.lscalar(), v) for v in np.array([0])),
+            set_test_value(pt.lvector(), np.array([2])),
             "raise",
             "C",
             None,
         ),
         (
-            tuple(set_test_value(at.lscalar(), v) for v in np.array([0, 0, 3])),
-            set_test_value(at.lvector(), np.array([2, 3, 4])),
+            tuple(set_test_value(pt.lscalar(), v) for v in np.array([0, 0, 3])),
+            set_test_value(pt.lvector(), np.array([2, 3, 4])),
             "raise",
             "C",
             None,
         ),
         (
             tuple(
-                set_test_value(at.lvector(), v)
+                set_test_value(pt.lvector(), v)
                 for v in np.array([[0, 1], [2, 0], [1, 3]])
             ),
-            set_test_value(at.lvector(), np.array([2, 3, 4])),
+            set_test_value(pt.lvector(), np.array([2, 3, 4])),
             "raise",
             "C",
             None,
         ),
         (
             tuple(
-                set_test_value(at.lvector(), v)
+                set_test_value(pt.lvector(), v)
                 for v in np.array([[0, 1], [2, 0], [1, 3]])
             ),
-            set_test_value(at.lvector(), np.array([2, 3, 4])),
+            set_test_value(pt.lvector(), np.array([2, 3, 4])),
             "raise",
             "F",
             NotImplementedError,
         ),
         (
             tuple(
-                set_test_value(at.lvector(), v)
+                set_test_value(pt.lvector(), v)
                 for v in np.array([[0, 1, 2], [2, 0, 3], [1, 3, 5]])
             ),
-            set_test_value(at.lvector(), np.array([2, 3, 4])),
+            set_test_value(pt.lvector(), np.array([2, 3, 4])),
             "raise",
             "C",
             ValueError,
         ),
         (
             tuple(
-                set_test_value(at.lvector(), v)
+                set_test_value(pt.lvector(), v)
                 for v in np.array([[0, 1, 2], [2, 0, 3], [1, 3, 5]])
             ),
-            set_test_value(at.lvector(), np.array([2, 3, 4])),
+            set_test_value(pt.lvector(), np.array([2, 3, 4])),
             "wrap",
             "C",
             None,
         ),
         (
             tuple(
-                set_test_value(at.lvector(), v)
+                set_test_value(pt.lvector(), v)
                 for v in np.array([[0, 1, 2], [2, 0, 3], [1, 3, 5]])
             ),
-            set_test_value(at.lvector(), np.array([2, 3, 4])),
+            set_test_value(pt.lvector(), np.array([2, 3, 4])),
             "clip",
             "C",
             None,
@@ -257,26 +257,26 @@ def test_RavelMultiIndex(arr, shape, mode, order, exc):
     "x, repeats, axis, exc",
     [
         (
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
-            set_test_value(at.lscalar(), np.array(0, dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(0, dtype="int64")),
             None,
             None,
         ),
         (
-            set_test_value(at.lmatrix(), np.zeros((2, 2), dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lmatrix(), np.zeros((2, 2), dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
             None,
             None,
         ),
         (
-            set_test_value(at.lvector(), np.arange(2, dtype="int64")),
-            set_test_value(at.lvector(), np.array([1, 1], dtype="int64")),
+            set_test_value(pt.lvector(), np.arange(2, dtype="int64")),
+            set_test_value(pt.lvector(), np.array([1, 1], dtype="int64")),
             None,
             None,
         ),
         (
-            set_test_value(at.lmatrix(), np.zeros((2, 2), dtype="int64")),
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lmatrix(), np.zeros((2, 2), dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
             0,
             UserWarning,
         ),
@@ -302,7 +302,7 @@ def test_Repeat(x, repeats, axis, exc):
     "x, axis, return_index, return_inverse, return_counts, exc",
     [
         (
-            set_test_value(at.lscalar(), np.array(1, dtype="int64")),
+            set_test_value(pt.lscalar(), np.array(1, dtype="int64")),
             None,
             False,
             False,
@@ -310,7 +310,7 @@ def test_Repeat(x, repeats, axis, exc):
             None,
         ),
         (
-            set_test_value(at.lvector(), np.array([1, 1, 2], dtype="int64")),
+            set_test_value(pt.lvector(), np.array([1, 1, 2], dtype="int64")),
             None,
             False,
             False,
@@ -318,7 +318,7 @@ def test_Repeat(x, repeats, axis, exc):
             None,
         ),
         (
-            set_test_value(at.lmatrix(), np.array([[1, 1], [2, 2]], dtype="int64")),
+            set_test_value(pt.lmatrix(), np.array([[1, 1], [2, 2]], dtype="int64")),
             None,
             False,
             False,
@@ -327,7 +327,7 @@ def test_Repeat(x, repeats, axis, exc):
         ),
         (
             set_test_value(
-                at.lmatrix(), np.array([[1, 1], [1, 1], [2, 2]], dtype="int64")
+                pt.lmatrix(), np.array([[1, 1], [1, 1], [2, 2]], dtype="int64")
             ),
             0,
             False,
@@ -337,7 +337,7 @@ def test_Repeat(x, repeats, axis, exc):
         ),
         (
             set_test_value(
-                at.lmatrix(), np.array([[1, 1], [1, 1], [2, 2]], dtype="int64")
+                pt.lmatrix(), np.array([[1, 1], [1, 1], [2, 2]], dtype="int64")
             ),
             0,
             True,
@@ -371,20 +371,20 @@ def test_Unique(x, axis, return_index, return_inverse, return_counts, exc):
     "arr, shape, order, exc",
     [
         (
-            set_test_value(at.lvector(), np.array([9, 15, 1], dtype="int64")),
-            at.as_tensor([2, 3, 4]),
+            set_test_value(pt.lvector(), np.array([9, 15, 1], dtype="int64")),
+            pt.as_tensor([2, 3, 4]),
             "C",
             None,
         ),
         (
-            set_test_value(at.lvector(), np.array([1, 0], dtype="int64")),
-            at.as_tensor([2]),
+            set_test_value(pt.lvector(), np.array([1, 0], dtype="int64")),
+            pt.as_tensor([2]),
             "C",
             None,
         ),
         (
-            set_test_value(at.lvector(), np.array([9, 15, 1], dtype="int64")),
-            at.as_tensor([2, 3, 4]),
+            set_test_value(pt.lvector(), np.array([9, 15, 1], dtype="int64")),
+            pt.as_tensor([2, 3, 4]),
             "F",
             NotImplementedError,
         ),
@@ -414,19 +414,19 @@ def test_UnravelIndex(arr, shape, order, exc):
     "a, v, side, sorter, exc",
     [
         (
-            set_test_value(at.vector(), np.array([1.0, 2.0, 3.0], dtype=config.floatX)),
-            set_test_value(at.matrix(), rng.random((3, 2)).astype(config.floatX)),
+            set_test_value(pt.vector(), np.array([1.0, 2.0, 3.0], dtype=config.floatX)),
+            set_test_value(pt.matrix(), rng.random((3, 2)).astype(config.floatX)),
             "left",
             None,
             None,
         ),
         pytest.param(
             set_test_value(
-                at.vector(),
+                pt.vector(),
                 np.array([0.29769574, 0.71649186, 0.20475563]).astype(config.floatX),
             ),
             set_test_value(
-                at.matrix(),
+                pt.matrix(),
                 np.array(
                     [
                         [0.18847123, 0.39659508],
@@ -440,10 +440,10 @@ def test_UnravelIndex(arr, shape, order, exc):
             None,
         ),
         (
-            set_test_value(at.vector(), np.array([1.0, 2.0, 3.0], dtype=config.floatX)),
-            set_test_value(at.matrix(), rng.random((3, 2)).astype(config.floatX)),
+            set_test_value(pt.vector(), np.array([1.0, 2.0, 3.0], dtype=config.floatX)),
+            set_test_value(pt.matrix(), rng.random((3, 2)).astype(config.floatX)),
             "right",
-            set_test_value(at.lvector(), np.array([0, 2, 1])),
+            set_test_value(pt.lvector(), np.array([0, 2, 1])),
             UserWarning,
         ),
     ],
diff --git a/tests/link/numba/test_nlinalg.py b/tests/link/numba/test_nlinalg.py
index 71a2fb7998..4732a8f3d0 100644
--- a/tests/link/numba/test_nlinalg.py
+++ b/tests/link/numba/test_nlinalg.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.sharedvalue import SharedVariable
 from pytensor.graph.basic import Constant
 from pytensor.graph.fg import FunctionGraph
@@ -19,7 +19,7 @@
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             True,
@@ -27,7 +27,7 @@
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -37,7 +37,7 @@
         ),
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             False,
@@ -70,21 +70,21 @@ def test_Cholesky(x, lower, exc):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
-            set_test_value(at.dvector(), rng.random(size=(3,)).astype("float64")),
+            set_test_value(pt.dvector(), rng.random(size=(3,)).astype("float64")),
             "gen",
             None,
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
             ),
-            set_test_value(at.dvector(), rng.random(size=(3,)).astype("float64")),
+            set_test_value(pt.dvector(), rng.random(size=(3,)).astype("float64")),
             "gen",
             None,
         ),
@@ -115,14 +115,14 @@ def test_Solve(A, x, lower, exc):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             None,
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(rng.poisson(size=(3, 3)).astype("int64")),
             ),
             None,
@@ -150,14 +150,14 @@ def test_Det(x, exc):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             None,
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(rng.poisson(size=(3, 3)).astype("int64")),
             ),
             None,
@@ -209,21 +209,21 @@ def test_SLogDet(x, exc):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(x),
             ),
             None,
         ),
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(y),
             ),
             None,
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -257,7 +257,7 @@ def test_Eig(x, exc):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             "L",
@@ -265,7 +265,7 @@ def test_Eig(x, exc):
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -301,7 +301,7 @@ def test_Eigh(x, uplo, exc):
         (
             nlinalg.MatrixInverse,
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             None,
@@ -310,7 +310,7 @@ def test_Eigh(x, uplo, exc):
         (
             nlinalg.MatrixInverse,
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -321,7 +321,7 @@ def test_Eigh(x, uplo, exc):
         (
             nlinalg.MatrixPinv,
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             None,
@@ -330,7 +330,7 @@ def test_Eigh(x, uplo, exc):
         (
             nlinalg.MatrixPinv,
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -361,7 +361,7 @@ def test_matrix_inverses(op, x, exc, op_args):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             "reduced",
@@ -369,7 +369,7 @@ def test_matrix_inverses(op, x, exc, op_args):
         ),
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             "r",
@@ -377,7 +377,7 @@ def test_matrix_inverses(op, x, exc, op_args):
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -387,7 +387,7 @@ def test_matrix_inverses(op, x, exc, op_args):
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -422,7 +422,7 @@ def test_QRFull(x, mode, exc):
     [
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             True,
@@ -431,7 +431,7 @@ def test_QRFull(x, mode, exc):
         ),
         (
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 (lambda x: x.T.dot(x))(rng.random(size=(3, 3)).astype("float64")),
             ),
             False,
@@ -440,7 +440,7 @@ def test_QRFull(x, mode, exc):
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
@@ -451,7 +451,7 @@ def test_QRFull(x, mode, exc):
         ),
         (
             set_test_value(
-                at.lmatrix(),
+                pt.lmatrix(),
                 (lambda x: x.T.dot(x))(
                     rng.integers(1, 10, size=(3, 3)).astype("int64")
                 ),
diff --git a/tests/link/numba/test_random.py b/tests/link/numba/test_random.py
index de6fa5ea6f..35d9bb24af 100644
--- a/tests/link/numba/test_random.py
+++ b/tests/link/numba/test_random.py
@@ -4,8 +4,8 @@
 import pytest
 import scipy.stats as stats
 
-import pytensor.tensor as at
-import pytensor.tensor.random.basic as aer
+import pytensor.tensor as pt
+import pytensor.tensor.random.basic as ptr
 from pytensor import shared
 from pytensor.compile.function import function
 from pytensor.compile.sharedvalue import SharedVariable
@@ -26,259 +26,259 @@
     "rv_op, dist_args, size",
     [
         (
-            aer.normal,
+            ptr.normal,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.uniform,
+            ptr.uniform,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.triangular,
+            ptr.triangular,
             [
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(-5.0, dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(5.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.lognormal,
+            ptr.lognormal,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         pytest.param(
-            aer.pareto,
+            ptr.pareto,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([2.0, 10.0], dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
             marks=pytest.mark.xfail(reason="Not implemented"),
         ),
         (
-            aer.exponential,
+            ptr.exponential,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.weibull,
+            ptr.weibull,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.logistic,
+            ptr.logistic,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.geometric,
+            ptr.geometric,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([0.3, 0.4], dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.hypergeometric,
+            ptr.hypergeometric,
             [
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(7, dtype=np.int64),
                 ),
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(8, dtype=np.int64),
                 ),
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(15, dtype=np.int64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.wald,
+            ptr.wald,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.laplace,
+            ptr.laplace,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.binomial,
+            ptr.binomial,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([1, 2], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(0.9, dtype=np.float64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         (
-            aer.normal,
+            ptr.normal,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([1, 2], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
-            at.as_tensor(tuple(set_test_value(at.lscalar(), v) for v in [3, 2])),
+            pt.as_tensor(tuple(set_test_value(pt.lscalar(), v) for v in [3, 2])),
         ),
         (
-            aer.poisson,
+            ptr.poisson,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
             ],
             None,
         ),
         (
-            aer.halfnormal,
+            ptr.halfnormal,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([1, 2], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
             None,
         ),
         (
-            aer.bernoulli,
+            ptr.bernoulli,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([0.1, 0.9], dtype=np.float64),
                 ),
             ],
             None,
         ),
         (
-            aer.randint,
+            ptr.randint,
             [
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(0, dtype=np.int64),
                 ),
                 set_test_value(
-                    at.lscalar(),
+                    pt.lscalar(),
                     np.array(5, dtype=np.int64),
                 ),
             ],
-            at.as_tensor([3, 2]),
+            pt.as_tensor([3, 2]),
         ),
         pytest.param(
-            aer.multivariate_normal,
+            ptr.multivariate_normal,
             [
                 set_test_value(
-                    at.dmatrix(),
+                    pt.dmatrix(),
                     np.array([[1, 2], [3, 4]], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.tensor(dtype="float64", shape=(1, None, None)),
+                    pt.tensor(dtype="float64", shape=(1, None, None)),
                     np.eye(2)[None, ...],
                 ),
             ],
-            at.as_tensor(tuple(set_test_value(at.lscalar(), v) for v in [4, 3, 2])),
+            pt.as_tensor(tuple(set_test_value(pt.lscalar(), v) for v in [4, 3, 2])),
             marks=pytest.mark.xfail(reason="Not implemented"),
         ),
     ],
@@ -304,14 +304,14 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
     "rv_op, dist_args, base_size, cdf_name, params_conv",
     [
         (
-            aer.beta,
+            ptr.beta,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -320,14 +320,14 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
             lambda *args: args,
         ),
         (
-            aer._gamma,
+            ptr._gamma,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([0.5, 3.0], dtype=np.float64),
                 ),
             ],
@@ -336,14 +336,14 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
             lambda a, b: (a, 0.0, b),
         ),
         (
-            aer.cauchy,
+            ptr.cauchy,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -352,10 +352,10 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
             lambda *args: args,
         ),
         (
-            aer.chisquare,
+            ptr.chisquare,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 )
             ],
@@ -364,14 +364,14 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
             lambda *args: args,
         ),
         (
-            aer.gumbel,
+            ptr.gumbel,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([1.0, 2.0], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -380,14 +380,14 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
             lambda *args: args,
         ),
         (
-            aer.negative_binomial,
+            ptr.negative_binomial,
             [
                 set_test_value(
-                    at.lvector(),
+                    pt.lvector(),
                     np.array([100, 200], dtype=np.int64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(0.09, dtype=np.float64),
                 ),
             ],
@@ -396,14 +396,14 @@ def test_aligned_RandomVariable(rv_op, dist_args, size):
             lambda *args: args,
         ),
         pytest.param(
-            aer.vonmises,
+            ptr.vonmises,
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([-0.5, 0.5], dtype=np.float64),
                 ),
                 set_test_value(
-                    at.dscalar(),
+                    pt.dscalar(),
                     np.array(1.0, dtype=np.float64),
                 ),
             ],
@@ -448,7 +448,7 @@ def test_unaligned_RandomVariable(rv_op, dist_args, base_size, cdf_name, params_
         pytest.param(
             [
                 set_test_value(
-                    at.dvector(),
+                    pt.dvector(),
                     np.array([100000, 1, 1], dtype=np.float64),
                 ),
             ],
@@ -458,7 +458,7 @@ def test_unaligned_RandomVariable(rv_op, dist_args, base_size, cdf_name, params_
         pytest.param(
             [
                 set_test_value(
-                    at.dmatrix(),
+                    pt.dmatrix(),
                     np.array(
                         [[100000, 1, 1], [1, 100000, 1], [1, 1, 100000]],
                         dtype=np.float64,
@@ -471,7 +471,7 @@ def test_unaligned_RandomVariable(rv_op, dist_args, base_size, cdf_name, params_
         pytest.param(
             [
                 set_test_value(
-                    at.dmatrix(),
+                    pt.dmatrix(),
                     np.array(
                         [[100000, 1, 1]],
                         dtype=np.float64,
@@ -484,7 +484,7 @@ def test_unaligned_RandomVariable(rv_op, dist_args, base_size, cdf_name, params_
         pytest.param(
             [
                 set_test_value(
-                    at.dmatrix(),
+                    pt.dmatrix(),
                     np.array(
                         [[100000, 1, 1], [1, 100000, 1], [1, 1, 100000]],
                         dtype=np.float64,
@@ -500,7 +500,7 @@ def test_unaligned_RandomVariable(rv_op, dist_args, base_size, cdf_name, params_
 )
 def test_CategoricalRV(dist_args, size, cm):
     rng = shared(np.random.RandomState(29402))
-    g = aer.categorical(*dist_args, size=size, rng=rng)
+    g = ptr.categorical(*dist_args, size=size, rng=rng)
     g_fg = FunctionGraph(outputs=[g])
 
     with cm:
@@ -519,7 +519,7 @@ def test_CategoricalRV(dist_args, size, cm):
     [
         pytest.param(
             set_test_value(
-                at.dvector(),
+                pt.dvector(),
                 np.array([100000, 1, 1], dtype=np.float64),
             ),
             None,
@@ -527,7 +527,7 @@ def test_CategoricalRV(dist_args, size, cm):
         ),
         pytest.param(
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 np.array(
                     [[100000, 1, 1], [1, 100000, 1], [1, 1, 100000]],
                     dtype=np.float64,
@@ -538,7 +538,7 @@ def test_CategoricalRV(dist_args, size, cm):
         ),
         pytest.param(
             set_test_value(
-                at.dmatrix(),
+                pt.dmatrix(),
                 np.array(
                     [[100000, 1, 1], [1, 100000, 1], [1, 1, 100000]],
                     dtype=np.float64,
@@ -551,7 +551,7 @@ def test_CategoricalRV(dist_args, size, cm):
 )
 def test_DirichletRV(a, size, cm):
     rng = shared(np.random.RandomState(29402))
-    g = aer.dirichlet(a, size=size, rng=rng)
+    g = ptr.dirichlet(a, size=size, rng=rng)
     g_fn = function([a], g, mode=numba_mode)
 
     with cm:
@@ -574,7 +574,7 @@ def test_RandomState_updates():
     rng = shared(np.random.RandomState(1))
     rng_new = shared(np.random.RandomState(2))
 
-    x = at.random.normal(size=10, rng=rng)
+    x = pt.random.normal(size=10, rng=rng)
     res = function([], x, updates={rng: rng_new}, mode=numba_mode)()
 
     ref = np.random.RandomState(2).normal(size=10)
@@ -583,7 +583,7 @@ def test_RandomState_updates():
 
 def test_random_Generator():
     rng = shared(np.random.default_rng(29402))
-    g = aer.normal(rng=rng)
+    g = ptr.normal(rng=rng)
     g_fg = FunctionGraph(outputs=[g])
 
     with pytest.raises(TypeError):
diff --git a/tests/link/numba/test_scalar.py b/tests/link/numba/test_scalar.py
index 7676b1bf40..cf6d63d516 100644
--- a/tests/link/numba/test_scalar.py
+++ b/tests/link/numba/test_scalar.py
@@ -1,9 +1,9 @@
 import numpy as np
 import pytest
 
-import pytensor.scalar as aes
-import pytensor.scalar.basic as aesb
-import pytensor.tensor as at
+import pytensor.scalar as ps
+import pytensor.scalar.basic as psb
+import pytensor.tensor as pt
 from pytensor import config
 from pytensor.compile.sharedvalue import SharedVariable
 from pytensor.graph.basic import Constant
@@ -20,18 +20,18 @@
     "x, y",
     [
         (
-            set_test_value(at.lvector(), np.arange(4, dtype="int64")),
-            set_test_value(at.dvector(), np.arange(4, dtype="float64")),
+            set_test_value(pt.lvector(), np.arange(4, dtype="int64")),
+            set_test_value(pt.dvector(), np.arange(4, dtype="float64")),
         ),
         (
-            set_test_value(at.dmatrix(), np.arange(4, dtype="float64").reshape((2, 2))),
-            set_test_value(at.lscalar(), np.array(4, dtype="int64")),
+            set_test_value(pt.dmatrix(), np.arange(4, dtype="float64").reshape((2, 2))),
+            set_test_value(pt.lscalar(), np.array(4, dtype="int64")),
         ),
     ],
 )
 def test_Second(x, y):
     # We use the `Elemwise`-wrapped version of `Second`
-    g = at.second(x, y)
+    g = pt.second(x, y)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
         g_fg,
@@ -46,13 +46,13 @@ def test_Second(x, y):
 @pytest.mark.parametrize(
     "v, min, max",
     [
-        (set_test_value(at.scalar(), np.array(10, dtype=config.floatX)), 3.0, 7.0),
-        (set_test_value(at.scalar(), np.array(1, dtype=config.floatX)), 3.0, 7.0),
-        (set_test_value(at.scalar(), np.array(10, dtype=config.floatX)), 7.0, 3.0),
+        (set_test_value(pt.scalar(), np.array(10, dtype=config.floatX)), 3.0, 7.0),
+        (set_test_value(pt.scalar(), np.array(1, dtype=config.floatX)), 3.0, 7.0),
+        (set_test_value(pt.scalar(), np.array(10, dtype=config.floatX)), 7.0, 3.0),
     ],
 )
 def test_Clip(v, min, max):
-    g = aes.clip(v, min, max)
+    g = ps.clip(v, min, max)
     g_fg = FunctionGraph(outputs=[g])
 
     compare_numba_and_py(
@@ -69,35 +69,35 @@ def test_Clip(v, min, max):
     "inputs, input_values, scalar_fn",
     [
         (
-            [at.scalar("x"), at.scalar("y"), at.scalar("z")],
+            [pt.scalar("x"), pt.scalar("y"), pt.scalar("z")],
             [
                 np.array(10, dtype=config.floatX),
                 np.array(20, dtype=config.floatX),
                 np.array(30, dtype=config.floatX),
             ],
-            lambda x, y, z: aes.add(x, y, z),
+            lambda x, y, z: ps.add(x, y, z),
         ),
         (
-            [at.scalar("x"), at.scalar("y"), at.scalar("z")],
+            [pt.scalar("x"), pt.scalar("y"), pt.scalar("z")],
             [
                 np.array(10, dtype=config.floatX),
                 np.array(20, dtype=config.floatX),
                 np.array(30, dtype=config.floatX),
             ],
-            lambda x, y, z: aes.mul(x, y, z),
+            lambda x, y, z: ps.mul(x, y, z),
         ),
         (
-            [at.scalar("x"), at.scalar("y")],
+            [pt.scalar("x"), pt.scalar("y")],
             [
                 np.array(10, dtype=config.floatX),
                 np.array(20, dtype=config.floatX),
             ],
-            lambda x, y: x + y * 2 + aes.exp(x - y),
+            lambda x, y: x + y * 2 + ps.exp(x - y),
         ),
     ],
 )
 def test_Composite(inputs, input_values, scalar_fn):
-    composite_inputs = [aes.ScalarType(config.floatX)(name=i.name) for i in inputs]
+    composite_inputs = [ps.ScalarType(config.floatX)(name=i.name) for i in inputs]
     comp_op = Elemwise(Composite(composite_inputs, [scalar_fn(*composite_inputs)]))
     out_fg = FunctionGraph(inputs, [comp_op(*inputs)])
     compare_numba_and_py(out_fg, input_values)
@@ -106,12 +106,12 @@ def test_Composite(inputs, input_values, scalar_fn):
 @pytest.mark.parametrize(
     "v, dtype",
     [
-        (set_test_value(at.fscalar(), np.array(1.0, dtype="float32")), aesb.float64),
-        (set_test_value(at.dscalar(), np.array(1.0, dtype="float64")), aesb.float32),
+        (set_test_value(pt.fscalar(), np.array(1.0, dtype="float32")), psb.float64),
+        (set_test_value(pt.dscalar(), np.array(1.0, dtype="float64")), psb.float32),
     ],
 )
 def test_Cast(v, dtype):
-    g = aesb.Cast(dtype)(v)
+    g = psb.Cast(dtype)(v)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
         g_fg,
@@ -126,11 +126,11 @@ def test_Cast(v, dtype):
 @pytest.mark.parametrize(
     "v, dtype",
     [
-        (set_test_value(at.iscalar(), np.array(10, dtype="int32")), aesb.float64),
+        (set_test_value(pt.iscalar(), np.array(10, dtype="int32")), psb.float64),
     ],
 )
 def test_reciprocal(v, dtype):
-    g = aesb.reciprocal(v)
+    g = psb.reciprocal(v)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
         g_fg,
diff --git a/tests/link/numba/test_scan.py b/tests/link/numba/test_scan.py
index 2481fc9a12..5dcd69f57f 100644
--- a/tests/link/numba/test_scan.py
+++ b/tests/link/numba/test_scan.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import config, function, grad
 from pytensor.compile.mode import Mode, get_mode
 from pytensor.graph.fg import FunctionGraph
@@ -23,7 +23,7 @@
         # sequences
         (
             lambda a_t: 2 * a_t,
-            [at.dvector("a")],
+            [pt.dvector("a")],
             [{}],
             [],
             None,
@@ -33,7 +33,7 @@
         ),
         # nit-sot
         (
-            lambda: at.as_tensor(2.0),
+            lambda: pt.as_tensor(2.0),
             [],
             [{}],
             [],
@@ -44,10 +44,10 @@
         ),
         # nit-sot, non_seq
         (
-            lambda c: at.as_tensor(2.0) * c,
+            lambda c: pt.as_tensor(2.0) * c,
             [],
             [{}],
-            [at.dscalar("c")],
+            [pt.dscalar("c")],
             3,
             [1.0],
             None,
@@ -57,7 +57,7 @@
         (
             lambda a_tm1: 2 * a_tm1,
             [],
-            [{"initial": at.as_tensor(0.0, dtype="floatX"), "taps": [-1]}],
+            [{"initial": pt.as_tensor(0.0, dtype="floatX"), "taps": [-1]}],
             [],
             3,
             [],
@@ -68,7 +68,7 @@
         (
             lambda a_tm1: (a_tm1 + 1, until(a_tm1 > 2)),
             [],
-            [{"initial": at.as_tensor(1, dtype=np.int64), "taps": [-1]}],
+            [{"initial": pt.as_tensor(1, dtype=np.int64), "taps": [-1]}],
             [],
             3,
             [],
@@ -92,7 +92,7 @@
         (
             lambda a_tm1: 2 * a_tm1,
             [],
-            [{"initial": at.as_tensor([0.0, 1.0], dtype="floatX"), "taps": [-2]}],
+            [{"initial": pt.as_tensor([0.0, 1.0], dtype="floatX"), "taps": [-2]}],
             [],
             6,
             [],
@@ -104,8 +104,8 @@
             lambda a_tm1, b_tm1: (2 * a_tm1, 2 * b_tm1),
             [],
             [
-                {"initial": at.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
-                {"initial": at.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
+                {"initial": pt.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
+                {"initial": pt.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
             ],
             [],
             10,
@@ -176,24 +176,24 @@ def test_scan_multiple_output(benchmark):
     """
 
     def binomln(n, k):
-        return at.exp(n + 1) - at.exp(k + 1) - at.exp(n - k + 1)
+        return pt.exp(n + 1) - pt.exp(k + 1) - pt.exp(n - k + 1)
 
     def binom_log_prob(n, p, value):
-        return binomln(n, value) + value * at.exp(p) + (n - value) * at.exp(1 - p)
+        return binomln(n, value) + value * pt.exp(p) + (n - value) * pt.exp(1 - p)
 
     # sequences
-    at_C = at.ivector("C_t")
-    at_D = at.ivector("D_t")
+    pt_C = pt.ivector("C_t")
+    pt_D = pt.ivector("D_t")
     # outputs_info (initial conditions)
-    st0 = at.lscalar("s_t0")
-    et0 = at.lscalar("e_t0")
-    it0 = at.lscalar("i_t0")
-    logp_c = at.scalar("logp_c")
-    logp_d = at.scalar("logp_d")
+    st0 = pt.lscalar("s_t0")
+    et0 = pt.lscalar("e_t0")
+    it0 = pt.lscalar("i_t0")
+    logp_c = pt.scalar("logp_c")
+    logp_d = pt.scalar("logp_d")
     # non_sequences
-    beta = at.scalar("beta")
-    gamma = at.scalar("gamma")
-    delta = at.scalar("delta")
+    beta = pt.scalar("beta")
+    gamma = pt.scalar("gamma")
+    delta = pt.scalar("delta")
 
     def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
         bt0 = st0 * beta
@@ -209,7 +209,7 @@ def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
 
     (st, et, it, logp_c_all, logp_d_all), _ = scan(
         fn=seir_one_step,
-        sequences=[at_C, at_D],
+        sequences=[pt_C, pt_D],
         outputs_info=[st0, et0, it0, logp_c, logp_d],
         non_sequences=[beta, gamma, delta],
     )
@@ -220,7 +220,7 @@ def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
     logp_d_all.name = "D_t_logp"
 
     out_fg = FunctionGraph(
-        [at_C, at_D, st0, et0, it0, logp_c, logp_d, beta, gamma, delta],
+        [pt_C, pt_D, st0, et0, it0, logp_c, logp_d, beta, gamma, delta],
         [st, et, it, logp_c_all, logp_d_all],
     )
 
@@ -252,14 +252,14 @@ def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
 
 @config.change_flags(compute_test_value="raise")
 def test_scan_tap_output():
-    a_at = at.scalar("a")
-    a_at.tag.test_value = 10.0
+    a_pt = pt.scalar("a")
+    a_pt.tag.test_value = 10.0
 
-    b_at = at.arange(11).astype(config.floatX)
-    b_at.name = "b"
+    b_pt = pt.arange(11).astype(config.floatX)
+    b_pt.name = "b"
 
-    c_at = at.arange(20, 31, dtype=config.floatX)
-    c_at.name = "c"
+    c_pt = pt.arange(20, 31, dtype=config.floatX)
+    c_pt.name = "c"
 
     def input_step_fn(b, b2, c, x_tm1, y_tm1, y_tm3, a):
         x_tm1.name = "x_tm1"
@@ -270,40 +270,40 @@ def input_step_fn(b, b2, c, x_tm1, y_tm1, y_tm3, a):
         x_t = x_tm1 + 1
         x_t.name = "x_t"
         y_t.name = "y_t"
-        return x_t, y_t, at.fill((10,), z_t)
+        return x_t, y_t, pt.fill((10,), z_t)
 
     scan_res, _ = scan(
         fn=input_step_fn,
         sequences=[
             {
-                "input": b_at,
+                "input": b_pt,
                 "taps": [-1, -2],
             },
             {
-                "input": c_at,
+                "input": c_pt,
                 "taps": [-2],
             },
         ],
         outputs_info=[
             {
-                "initial": at.as_tensor_variable(0.0, dtype=config.floatX),
+                "initial": pt.as_tensor_variable(0.0, dtype=config.floatX),
                 "taps": [-1],
             },
             {
-                "initial": at.as_tensor_variable(
+                "initial": pt.as_tensor_variable(
                     np.r_[-1.0, 1.3, 0.0].astype(config.floatX)
                 ),
                 "taps": [-1, -3],
             },
             None,
         ],
-        non_sequences=[a_at],
+        non_sequences=[a_pt],
         n_steps=5,
         name="yz_scan",
         strict=True,
     )
 
-    out_fg = FunctionGraph([a_at, b_at, c_at], scan_res)
+    out_fg = FunctionGraph([a_pt, b_pt, c_pt], scan_res)
 
     test_input_vals = [
         np.array(10.0).astype(config.floatX),
@@ -317,10 +317,10 @@ def test_scan_while():
     def power_of_2(previous_power, max_value):
         return previous_power * 2, until(previous_power * 2 > max_value)
 
-    max_value = at.scalar()
+    max_value = pt.scalar()
     values, _ = scan(
         power_of_2,
-        outputs_info=at.constant(1.0),
+        outputs_info=pt.constant(1.0),
         non_sequences=max_value,
         n_steps=1024,
     )
@@ -334,7 +334,7 @@ def power_of_2(previous_power, max_value):
 
 
 def test_scan_multiple_none_output():
-    A = at.dvector("A")
+    A = pt.dvector("A")
 
     def power_step(prior_result, x):
         return prior_result * x, prior_result * x * x, prior_result * x * x * x
@@ -342,7 +342,7 @@ def power_step(prior_result, x):
     result, _ = scan(
         power_step,
         non_sequences=[A],
-        outputs_info=[at.ones_like(A), None, None],
+        outputs_info=[pt.ones_like(A), None, None],
         n_steps=3,
     )
 
@@ -359,8 +359,8 @@ def test_scan_save_mem_basic(n_steps_val):
     def f_pow2(x_tm2, x_tm1):
         return 2 * x_tm1 + x_tm2
 
-    init_x = at.dvector("init_x")
-    n_steps = at.iscalar("n_steps")
+    init_x = pt.dvector("init_x")
+    n_steps = pt.iscalar("n_steps")
     output, _ = scan(
         f_pow2,
         sequences=[],
@@ -397,8 +397,8 @@ def get_sum_of_grad(inp):
 
 
 def test_mitmots_basic():
-    init_x = at.dvector()
-    seq = at.dvector()
+    init_x = pt.dvector()
+    seq = pt.dvector()
 
     def inner_fct(seq, state_old, state_current):
         return state_old * 2 + state_current + seq
diff --git a/tests/link/numba/test_tensor_basic.py b/tests/link/numba/test_tensor_basic.py
index b9d6c9707b..265c98faa0 100644
--- a/tests/link/numba/test_tensor_basic.py
+++ b/tests/link/numba/test_tensor_basic.py
@@ -1,9 +1,9 @@
 import numpy as np
 import pytest
 
-import pytensor.scalar as aes
-import pytensor.tensor as at
-import pytensor.tensor.basic as atb
+import pytensor.scalar as ps
+import pytensor.tensor as pt
+import pytensor.tensor.basic as ptb
 from pytensor import config, function
 from pytensor.compile import get_mode
 from pytensor.compile.sharedvalue import SharedVariable
@@ -31,12 +31,12 @@
     [
         (0.0, (2, 3)),
         (1.1, (2, 3)),
-        (set_test_value(at.scalar("a"), np.array(10.0, dtype=config.floatX)), (20,)),
-        (set_test_value(at.vector("a"), np.ones(10, dtype=config.floatX)), (20, 10)),
+        (set_test_value(pt.scalar("a"), np.array(10.0, dtype=config.floatX)), (20,)),
+        (set_test_value(pt.vector("a"), np.ones(10, dtype=config.floatX)), (20, 10)),
     ],
 )
 def test_Alloc(v, shape):
-    g = at.alloc(v, *shape)
+    g = pt.alloc(v, *shape)
     g_fg = FunctionGraph(outputs=[g])
 
     _, (numba_res,) = compare_numba_and_py(
@@ -56,7 +56,7 @@ def test_alloc_runtime_broadcast():
 
 
 def test_AllocEmpty():
-    x = at.empty((2, 3), dtype="float32")
+    x = pt.empty((2, 3), dtype="float32")
     x_fg = FunctionGraph([], [x])
 
     # We cannot compare the values in the arrays, only the shapes and dtypes
@@ -64,10 +64,10 @@ def test_AllocEmpty():
 
 
 @pytest.mark.parametrize(
-    "v", [set_test_value(aes.float64(), np.array(1.0, dtype="float64"))]
+    "v", [set_test_value(ps.float64(), np.array(1.0, dtype="float64"))]
 )
 def test_TensorFromScalar(v):
-    g = atb.TensorFromScalar()(v)
+    g = ptb.TensorFromScalar()(v)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
         g_fg,
@@ -82,11 +82,11 @@ def test_TensorFromScalar(v):
 @pytest.mark.parametrize(
     "v",
     [
-        set_test_value(at.scalar(), np.array(1.0, dtype=config.floatX)),
+        set_test_value(pt.scalar(), np.array(1.0, dtype=config.floatX)),
     ],
 )
 def test_ScalarFromTensor(v):
-    g = atb.ScalarFromTensor()(v)
+    g = ptb.ScalarFromTensor()(v)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
         g_fg,
@@ -99,7 +99,7 @@ def test_ScalarFromTensor(v):
 
 
 def test_Unbroadcast():
-    v = set_test_value(at.row(), np.array([[1.0, 2.0]], dtype=config.floatX))
+    v = set_test_value(pt.row(), np.array([[1.0, 2.0]], dtype=config.floatX))
     g = Unbroadcast(0)(v)
     g_fg = FunctionGraph(outputs=[g])
     compare_numba_and_py(
@@ -117,31 +117,31 @@ def test_Unbroadcast():
     [
         (
             (
-                set_test_value(at.scalar(), np.array(1, dtype=config.floatX)),
-                set_test_value(at.scalar(), np.array(2, dtype=config.floatX)),
-                set_test_value(at.scalar(), np.array(3, dtype=config.floatX)),
+                set_test_value(pt.scalar(), np.array(1, dtype=config.floatX)),
+                set_test_value(pt.scalar(), np.array(2, dtype=config.floatX)),
+                set_test_value(pt.scalar(), np.array(3, dtype=config.floatX)),
             ),
             config.floatX,
         ),
         (
             (
-                set_test_value(at.dscalar(), np.array(1, dtype=np.float64)),
-                set_test_value(at.lscalar(), np.array(3, dtype=np.int32)),
+                set_test_value(pt.dscalar(), np.array(1, dtype=np.float64)),
+                set_test_value(pt.lscalar(), np.array(3, dtype=np.int32)),
             ),
             "float64",
         ),
         (
-            (set_test_value(at.iscalar(), np.array(1, dtype=np.int32)),),
+            (set_test_value(pt.iscalar(), np.array(1, dtype=np.int32)),),
             "float64",
         ),
         (
-            (set_test_value(at.scalar(dtype=bool), True),),
+            (set_test_value(pt.scalar(dtype=bool), True),),
             bool,
         ),
     ],
 )
 def test_MakeVector(vals, dtype):
-    g = atb.MakeVector(dtype)(*vals)
+    g = ptb.MakeVector(dtype)(*vals)
     g_fg = FunctionGraph(outputs=[g])
 
     compare_numba_and_py(
@@ -158,15 +158,15 @@ def test_MakeVector(vals, dtype):
     "start, stop, step, dtype",
     [
         (
-            set_test_value(at.lscalar(), np.array(1)),
-            set_test_value(at.lscalar(), np.array(10)),
-            set_test_value(at.lscalar(), np.array(3)),
+            set_test_value(pt.lscalar(), np.array(1)),
+            set_test_value(pt.lscalar(), np.array(10)),
+            set_test_value(pt.lscalar(), np.array(3)),
             config.floatX,
         ),
     ],
 )
 def test_ARange(start, stop, step, dtype):
-    g = atb.ARange(dtype)(start, stop, step)
+    g = ptb.ARange(dtype)(start, stop, step)
     g_fg = FunctionGraph(outputs=[g])
 
     compare_numba_and_py(
@@ -185,10 +185,10 @@ def test_ARange(start, stop, step, dtype):
         (
             (
                 set_test_value(
-                    at.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
                 ),
                 set_test_value(
-                    at.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
                 ),
             ),
             0,
@@ -196,10 +196,10 @@ def test_ARange(start, stop, step, dtype):
         (
             (
                 set_test_value(
-                    at.matrix(), rng.normal(size=(2, 1)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(2, 1)).astype(config.floatX)
                 ),
                 set_test_value(
-                    at.matrix(), rng.normal(size=(3, 1)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(3, 1)).astype(config.floatX)
                 ),
             ),
             0,
@@ -207,10 +207,10 @@ def test_ARange(start, stop, step, dtype):
         (
             (
                 set_test_value(
-                    at.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
                 ),
                 set_test_value(
-                    at.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(1, 2)).astype(config.floatX)
                 ),
             ),
             1,
@@ -218,10 +218,10 @@ def test_ARange(start, stop, step, dtype):
         (
             (
                 set_test_value(
-                    at.matrix(), rng.normal(size=(2, 2)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(2, 2)).astype(config.floatX)
                 ),
                 set_test_value(
-                    at.matrix(), rng.normal(size=(2, 1)).astype(config.floatX)
+                    pt.matrix(), rng.normal(size=(2, 1)).astype(config.floatX)
                 ),
             ),
             1,
@@ -229,7 +229,7 @@ def test_ARange(start, stop, step, dtype):
     ],
 )
 def test_Join(vals, axis):
-    g = at.join(axis, *vals)
+    g = pt.join(axis, *vals)
     g_fg = FunctionGraph(outputs=[g])
 
     compare_numba_and_py(
@@ -244,10 +244,10 @@ def test_Join(vals, axis):
 
 def test_Join_view():
     vals = (
-        set_test_value(at.matrix(), rng.normal(size=(2, 2)).astype(config.floatX)),
-        set_test_value(at.matrix(), rng.normal(size=(2, 2)).astype(config.floatX)),
+        set_test_value(pt.matrix(), rng.normal(size=(2, 2)).astype(config.floatX)),
+        set_test_value(pt.matrix(), rng.normal(size=(2, 2)).astype(config.floatX)),
     )
-    g = atb.Join(view=1)(1, *vals)
+    g = ptb.Join(view=1)(1, *vals)
     g_fg = FunctionGraph(outputs=[g])
 
     with pytest.raises(NotImplementedError):
@@ -267,45 +267,45 @@ def test_Join_view():
         (
             0,
             0,
-            set_test_value(at.vector(), rng.normal(size=20).astype(config.floatX)),
-            set_test_value(at.vector(dtype="int64"), []),
+            set_test_value(pt.vector(), rng.normal(size=20).astype(config.floatX)),
+            set_test_value(pt.vector(dtype="int64"), []),
         ),
         (
             5,
             0,
-            set_test_value(at.vector(), rng.normal(size=5).astype(config.floatX)),
+            set_test_value(pt.vector(), rng.normal(size=5).astype(config.floatX)),
             set_test_value(
-                at.vector(dtype="int64"), rng.multinomial(5, np.ones(5) / 5)
+                pt.vector(dtype="int64"), rng.multinomial(5, np.ones(5) / 5)
             ),
         ),
         (
             5,
             0,
-            set_test_value(at.vector(), rng.normal(size=10).astype(config.floatX)),
+            set_test_value(pt.vector(), rng.normal(size=10).astype(config.floatX)),
             set_test_value(
-                at.vector(dtype="int64"), rng.multinomial(10, np.ones(5) / 5)
+                pt.vector(dtype="int64"), rng.multinomial(10, np.ones(5) / 5)
             ),
         ),
         (
             5,
             -1,
-            set_test_value(at.matrix(), rng.normal(size=(11, 7)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.normal(size=(11, 7)).astype(config.floatX)),
             set_test_value(
-                at.vector(dtype="int64"), rng.multinomial(7, np.ones(5) / 5)
+                pt.vector(dtype="int64"), rng.multinomial(7, np.ones(5) / 5)
             ),
         ),
         (
             5,
             -2,
-            set_test_value(at.matrix(), rng.normal(size=(11, 7)).astype(config.floatX)),
+            set_test_value(pt.matrix(), rng.normal(size=(11, 7)).astype(config.floatX)),
             set_test_value(
-                at.vector(dtype="int64"), rng.multinomial(11, np.ones(5) / 5)
+                pt.vector(dtype="int64"), rng.multinomial(11, np.ones(5) / 5)
             ),
         ),
     ],
 )
 def test_Split(n_splits, axis, values, sizes):
-    g = at.split(values, sizes, n_splits, axis=axis)
+    g = pt.split(values, sizes, n_splits, axis=axis)
     assert len(g) == n_splits
     if n_splits == 0:
         return
@@ -323,10 +323,10 @@ def test_Split(n_splits, axis, values, sizes):
 
 def test_Split_view():
     # https://github.com/pymc-devs/pytensor/issues/343
-    x1 = at.matrix("x1")
-    x2 = at.matrix("x2", shape=(None, 1))
-    v = at.vector("v", shape=(2,), dtype=int)
-    out = at.split(x1, v, n_splits=2, axis=1)[0] + x2
+    x1 = pt.matrix("x1")
+    x2 = pt.matrix("x2", shape=(None, 1))
+    v = pt.vector("v", shape=(2,), dtype=int)
+    out = pt.split(x1, v, n_splits=2, axis=1)[0] + x2
 
     fn = function([x1, x2, v], out, mode="NUMBA")
     # Check that the addition of split[0] and x2 is not in place
@@ -350,24 +350,24 @@ def test_Split_view():
     [
         (
             set_test_value(
-                at.matrix(), np.arange(10 * 10, dtype=config.floatX).reshape((10, 10))
+                pt.matrix(), np.arange(10 * 10, dtype=config.floatX).reshape((10, 10))
             ),
             0,
         ),
         (
             set_test_value(
-                at.matrix(), np.arange(10 * 10, dtype=config.floatX).reshape((10, 10))
+                pt.matrix(), np.arange(10 * 10, dtype=config.floatX).reshape((10, 10))
             ),
             -1,
         ),
         (
-            set_test_value(at.vector(), np.arange(10, dtype=config.floatX)),
+            set_test_value(pt.vector(), np.arange(10, dtype=config.floatX)),
             0,
         ),
     ],
 )
 def test_ExtractDiag(val, offset):
-    g = at.diag(val, offset)
+    g = pt.diag(val, offset)
     g_fg = FunctionGraph(outputs=[g])
 
     compare_numba_and_py(
@@ -389,10 +389,10 @@ def test_ExtractDiag_exhaustive(k, axis1, axis2, reverse_axis):
     if reverse_axis:
         axis1, axis2 = axis2, axis1
 
-    x = at.tensor4("x")
+    x = pt.tensor4("x")
     x_shape = (2, 3, 4, 5)
     x_test = np.arange(np.prod(x_shape)).reshape(x_shape)
-    out = at.diagonal(x, k, axis1, axis2)
+    out = pt.diagonal(x, k, axis1, axis2)
     numba_fn = numba_funcify(out.owner.op, out.owner)
     np.testing.assert_allclose(numba_fn(x_test), np.diagonal(x_test, k, axis1, axis2))
 
@@ -400,23 +400,23 @@ def test_ExtractDiag_exhaustive(k, axis1, axis2, reverse_axis):
 @pytest.mark.parametrize(
     "n, m, k, dtype",
     [
-        (set_test_value(at.lscalar(), np.array(1, dtype=np.int64)), None, 0, None),
+        (set_test_value(pt.lscalar(), np.array(1, dtype=np.int64)), None, 0, None),
         (
-            set_test_value(at.lscalar(), np.array(1, dtype=np.int64)),
-            set_test_value(at.lscalar(), np.array(2, dtype=np.int64)),
+            set_test_value(pt.lscalar(), np.array(1, dtype=np.int64)),
+            set_test_value(pt.lscalar(), np.array(2, dtype=np.int64)),
             0,
             "float32",
         ),
         (
-            set_test_value(at.lscalar(), np.array(1, dtype=np.int64)),
-            set_test_value(at.lscalar(), np.array(2, dtype=np.int64)),
+            set_test_value(pt.lscalar(), np.array(1, dtype=np.int64)),
+            set_test_value(pt.lscalar(), np.array(2, dtype=np.int64)),
             1,
             "int64",
         ),
     ],
 )
 def test_Eye(n, m, k, dtype):
-    g = at.eye(n, m, k, dtype=dtype)
+    g = pt.eye(n, m, k, dtype=dtype)
     g_fg = FunctionGraph(outputs=[g])
 
     compare_numba_and_py(
diff --git a/tests/scalar/test_basic.py b/tests/scalar/test_basic.py
index 9720022f0d..24f3148acb 100644
--- a/tests/scalar/test_basic.py
+++ b/tests/scalar/test_basic.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 import tests.unittest_tools as utt
 from pytensor.compile.mode import Mode
 from pytensor.graph.fg import FunctionGraph
@@ -204,7 +204,7 @@ def test_non_scalar_error(self):
             TypeError,
             match="The fgraph of Composite must be exclusively composed of scalar operations",
         ):
-            Composite([x], [(at.zeros((2,)) + x).sum()])
+            Composite([x], [(pt.zeros((2,)) + x).sum()])
 
     def test_multi_out_perform(self):
         from pytensor.graph.basic import Apply
diff --git a/tests/scalar/test_math.py b/tests/scalar/test_math.py
index 1998ed5fa5..34567d34db 100644
--- a/tests/scalar/test_math.py
+++ b/tests/scalar/test_math.py
@@ -4,7 +4,7 @@
 import pytest
 import scipy.special as sp
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function
 from pytensor.compile.mode import Mode
 from pytensor.graph import ancestors
@@ -24,16 +24,16 @@
 
 
 def test_gammainc_python():
-    x1 = at.dscalar()
-    x2 = at.dscalar()
+    x1 = pt.dscalar()
+    x2 = pt.dscalar()
     y = gammainc(x1, x2)
     test_func = function([x1, x2], y, mode=Mode("py"))
     assert np.isclose(test_func(1, 2), sp.gammainc(1, 2))
 
 
 def test_gammainc_nan_c():
-    x1 = at.dscalar()
-    x2 = at.dscalar()
+    x1 = pt.dscalar()
+    x2 = pt.dscalar()
     y = gammainc(x1, x2)
     test_func = make_function(CLinker().accept(FunctionGraph([x1, x2], [y])))
     assert np.isnan(test_func(-1, 1))
@@ -42,16 +42,16 @@ def test_gammainc_nan_c():
 
 
 def test_gammaincc_python():
-    x1 = at.dscalar()
-    x2 = at.dscalar()
+    x1 = pt.dscalar()
+    x2 = pt.dscalar()
     y = gammaincc(x1, x2)
     test_func = function([x1, x2], y, mode=Mode("py"))
     assert np.isclose(test_func(1, 2), sp.gammaincc(1, 2))
 
 
 def test_gammaincc_nan_c():
-    x1 = at.dscalar()
-    x2 = at.dscalar()
+    x1 = pt.dscalar()
+    x2 = pt.dscalar()
     y = gammaincc(x1, x2)
     test_func = make_function(CLinker().accept(FunctionGraph([x1, x2], [y])))
     assert np.isnan(test_func(-1, 1))
@@ -60,8 +60,8 @@ def test_gammaincc_nan_c():
 
 
 def test_gammal_nan_c():
-    x1 = at.dscalar()
-    x2 = at.dscalar()
+    x1 = pt.dscalar()
+    x2 = pt.dscalar()
     y = gammal(x1, x2)
     test_func = make_function(CLinker().accept(FunctionGraph([x1, x2], [y])))
     assert np.isnan(test_func(-1, 1))
@@ -70,8 +70,8 @@ def test_gammal_nan_c():
 
 
 def test_gammau_nan_c():
-    x1 = at.dscalar()
-    x2 = at.dscalar()
+    x1 = pt.dscalar()
+    x2 = pt.dscalar()
     y = gammau(x1, x2)
     test_func = make_function(CLinker().accept(FunctionGraph([x1, x2], [y])))
     assert np.isnan(test_func(-1, 1))
@@ -80,14 +80,14 @@ def test_gammau_nan_c():
 
 
 def test_betainc():
-    a, b, x = at.scalars("a", "b", "x")
+    a, b, x = pt.scalars("a", "b", "x")
     res = betainc(a, b, x)
     test_func = function([a, b, x], res, mode=Mode("py"))
     assert np.isclose(test_func(15, 10, 0.7), sp.betainc(15, 10, 0.7))
 
 
 def test_betainc_derivative_nan():
-    a, b, x = at.scalars("a", "b", "x")
+    a, b, x = pt.scalars("a", "b", "x")
     res = betainc_grad(a, b, x, True)
     test_func = function([a, b, x], res, mode=Mode("py"))
     assert not np.isnan(test_func(1, 1, 1))
@@ -119,7 +119,7 @@ def test_scalarloop_grad_mixed_dtypes(op, scalar_loop_grads):
         if not wrt:
             continue
         # The ScalarLoop in the graph will fail if the input types are different from the updates
-        grad = at.grad(out, wrt=wrt)
+        grad = pt.grad(out, wrt=wrt)
         assert any(
             (var.owner and isinstance(var.owner.op, ScalarLoop))
             for var in ancestors(grad)
diff --git a/tests/scan/test_basic.py b/tests/scan/test_basic.py
index 698b1ff0b0..968cb13dbc 100644
--- a/tests/scan/test_basic.py
+++ b/tests/scan/test_basic.py
@@ -19,7 +19,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.debugmode import DebugMode
 from pytensor.compile.function import function
 from pytensor.compile.function.pfunc import rebuild_collect_shared
@@ -38,9 +38,9 @@
 from pytensor.scan.basic import scan
 from pytensor.scan.op import Scan
 from pytensor.scan.utils import until
-from pytensor.tensor.math import all as at_all
+from pytensor.tensor.math import all as pt_all
 from pytensor.tensor.math import dot, exp, mean, sigmoid
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tanh
 from pytensor.tensor.random import normal
 from pytensor.tensor.random.utils import RandomStream
@@ -277,7 +277,7 @@ def inner_fn(x):
 
         out, out_updates = scan(
             inner_fn,
-            outputs_info=[at.as_tensor(0.0, dtype=config.floatX), None],
+            outputs_info=[pt.as_tensor(0.0, dtype=config.floatX), None],
             n_steps=4,
         )
 
@@ -1074,7 +1074,7 @@ def detect_large_outputs(fgraph, i, node, fn):
         # Symbolic description of the result
         result, updates = scan(
             fn=lambda prior_result, A: prior_result * A,
-            outputs_info=at.ones_like(A),
+            outputs_info=pt.ones_like(A),
             non_sequences=A,
             n_steps=k,
             mode=mode,
@@ -1106,7 +1106,7 @@ def test_inner_grad(self):
         gy.name = "gy"
         hy, updates = scan(
             lambda i, gy, x: grad(gy[i] * fc2, x),
-            sequences=at.arange(gy.shape[0]),
+            sequences=pt.arange(gy.shape[0]),
             non_sequences=[gy, x],
         )
 
@@ -1285,9 +1285,9 @@ def test_grad_dtype_change(self):
         c = iscalar("c")
 
         def inner_fn(cond, x, y):
-            new_cond = at.cast(at.switch(cond, x, y), "int32")
-            new_x = at.switch(cond, sigmoid(y * x), x)
-            new_y = at.switch(cond, y, sigmoid(x))
+            new_cond = pt.cast(pt.switch(cond, x, y), "int32")
+            new_x = pt.switch(cond, sigmoid(y * x), x)
+            new_y = pt.switch(cond, y, sigmoid(x))
             return new_cond, new_x, new_y
 
         values, _ = scan(
@@ -1572,7 +1572,7 @@ def test_grad_multiple_outs_some_uncomputable(self):
         def f_rnn_cmpl(u_t, u2_t, x_tm1, W_in):
             trng1 = RandomStream(123)
             x_t = (
-                at.cast(u2_t, config.floatX)
+                pt.cast(u2_t, config.floatX)
                 + dot(u_t, W_in)
                 + x_tm1
                 + trng1.uniform(low=-1.1, high=1.1, dtype=config.floatX)
@@ -1658,7 +1658,7 @@ def f_rnn_cmpl(u_t, x_tm1, W_in):
             trng1 = RandomStream(123)
             rnd_nb = trng1.uniform(-0.1, 0.1)
             x_t = dot(u_t, W_in) + x_tm1 + rnd_nb
-            x_t = at.cast(x_t, dtype=config.floatX)
+            x_t = pt.cast(x_t, dtype=config.floatX)
             return x_t
 
         cost, updates = scan_project_sum(
@@ -1713,7 +1713,7 @@ def test_grad_wrt_shared(self):
         x1 = shared(3.0)
         x1.name = "x1"
         x2 = vector("x2")
-        y, updates = scan(lambda v: at.cast(v * x1, config.floatX), sequences=x2)
+        y, updates = scan(lambda v: pt.cast(v * x1, config.floatX), sequences=x2)
         m = grad(y.sum(), x1)
 
         f = function([x2], m, allow_input_downcast=True)
@@ -1726,7 +1726,7 @@ def test_inner_grad_wrt_shared(self):
 
         out, updates = scan(
             lambda i, v: grad(K[i], v),
-            sequences=at.arange(K.shape[0]),
+            sequences=pt.arange(K.shape[0]),
             non_sequences=x1,
         )
         f = function([x1], out, allow_input_downcast=True)
@@ -1844,7 +1844,7 @@ def test_grad_multiple_seqs_different_nsteps(self):
         c = vector("c")
         x = scalar("x")
         _max_coefficients_supported = 1000
-        full_range = at.arange(_max_coefficients_supported)
+        full_range = pt.arange(_max_coefficients_supported)
         components, updates = scan(
             fn=lambda coeff, power, free_var: coeff * (free_var**power),
             outputs_info=None,
@@ -1864,7 +1864,7 @@ def test_grad_of_grad_of_state(self):
         c = vector("c")
         x = scalar("x")
         _max_coefficients_supported = 1000
-        full_range = at.arange(_max_coefficients_supported)
+        full_range = pt.arange(_max_coefficients_supported)
         components, updates = scan(
             fn=lambda coeff, power, free_var: coeff * (free_var**power),
             outputs_info=None,
@@ -1972,21 +1972,21 @@ def rnn_fn(_u, _y, _W):
 
         n2o_u, _ = scan(
             lambda i, o, u, h0, W, eu: (grad(o[i], u) * eu).sum(),
-            sequences=at.arange(o.shape[0]),
+            sequences=pt.arange(o.shape[0]),
             non_sequences=[o, u, h0, W, eu],
             name="jacobU",
         )
 
         n2o_h0, _ = scan(
             lambda i, o, u, h0, W, eh0: (grad(o[i], h0) * eh0).sum(),
-            sequences=at.arange(o.shape[0]),
+            sequences=pt.arange(o.shape[0]),
             non_sequences=[o, u, h0, W, eh0],
             name="jacobh",
         )
 
         n2o_W, _ = scan(
             lambda i, o, u, h0, W, eW: (grad(o[i], W) * eW).sum(),
-            sequences=at.arange(o.shape[0]),
+            sequences=pt.arange(o.shape[0]),
             non_sequences=[o, u, h0, W, eW],
             name="jacobW",
         )
@@ -2056,21 +2056,21 @@ def rnn_fn(_u, _y, _W):
 
         n2o_u, _ = scan(
             lambda i, o, u, h0, W, eu: (grad(o[i], u) * eu).sum(),
-            sequences=at.arange(o.shape[0]),
+            sequences=pt.arange(o.shape[0]),
             non_sequences=[o, u, h0, W, eu],
             name="jacobU",
         )
 
         n2o_h0, _ = scan(
             lambda i, o, u, h0, W, eh0: (grad(o[i], h0) * eh0).sum(),
-            sequences=at.arange(o.shape[0]),
+            sequences=pt.arange(o.shape[0]),
             non_sequences=[o, u, h0, W, eh0],
             name="jacobh",
         )
 
         n2o_W, _ = scan(
             lambda i, o, u, h0, W, eW: (grad(o[i], W) * eW).sum(),
-            sequences=at.arange(o.shape[0]),
+            sequences=pt.arange(o.shape[0]),
             non_sequences=[o, u, h0, W, eW],
             name="jacobW",
         )
@@ -2123,7 +2123,7 @@ def test_R_op_mitmot(self):
         hidden_rec, _ = scan(
             lambda x, h_tm1: transfer(dot(h_tm1, W2) + x),
             sequences=hidden,
-            outputs_info=[at.zeros_like(hidden[0])],
+            outputs_info=[pt.zeros_like(hidden[0])],
         )
 
         hidden_rec.reshape(
@@ -2161,7 +2161,7 @@ def perform(self, node, inputs, outputs):
     myop = MyOp()
 
     def scan_fn():
-        return myop(at.as_tensor(1))
+        return myop(pt.as_tensor(1))
 
     res, _ = scan(scan_fn, n_steps=4, mode=mode)
 
@@ -2192,11 +2192,11 @@ def f_py():
 
     py_res = f_py()
 
-    s_r = at.as_tensor_variable(r, dtype=config.floatX)
+    s_r = pt.as_tensor_variable(r, dtype=config.floatX)
     s_y, updates = scan(
         fn=lambda ri, rii, M: ri + M * rii,
         sequences=[s_r[1:]],
-        non_sequences=[at.as_tensor_variable(M, dtype=config.floatX)],
+        non_sequences=[pt.as_tensor_variable(M, dtype=config.floatX)],
         outputs_info=s_r[0],
         mode=Mode(linker="cvm", optimizer="fast_run"),
     )
@@ -2254,11 +2254,11 @@ def test_compute_test_value_grad():
         W_flat.tag.test_value = WEIGHT
         W = W_flat.reshape((2, 2, 3))
 
-        outputs_mi = at.as_tensor_variable(np.asarray(0, dtype="float32"))
+        outputs_mi = pt.as_tensor_variable(np.asarray(0, dtype="float32"))
         outputs_mi.tag.test_value = np.asarray(0, dtype="float32")
 
         def loss_mi(mi, sum_mi, W):
-            outputs_ti = at.as_tensor_variable(np.asarray(0, dtype="float32"))
+            outputs_ti = pt.as_tensor_variable(np.asarray(0, dtype="float32"))
             outputs_ti.tag.test_value = np.asarray(0, dtype="float32")
 
             def loss_ti(ti, sum_ti, mi, W):
@@ -2267,7 +2267,7 @@ def loss_ti(ti, sum_ti, mi, W):
             result_ti, _ = scan(
                 fn=loss_ti,
                 outputs_info=outputs_ti,
-                sequences=at.arange(W.shape[1], dtype="int32"),
+                sequences=pt.arange(W.shape[1], dtype="int32"),
                 non_sequences=[mi, W],
             )
             lossmi = result_ti[-1]
@@ -2276,7 +2276,7 @@ def loss_ti(ti, sum_ti, mi, W):
         result_mi, _ = scan(
             fn=loss_mi,
             outputs_info=outputs_mi,
-            sequences=at.arange(W.shape[0], dtype="int32"),
+            sequences=pt.arange(W.shape[0], dtype="int32"),
             non_sequences=[W],
         )
 
@@ -2316,7 +2316,7 @@ def test_constant_folding_n_steps():
     # folding optimization step.
     res, _ = scan(
         lambda x: x * 2,
-        outputs_info=at.ones(()),
+        outputs_info=pt.ones(()),
         # The constant `n_steps` was causing the crash.
         n_steps=10,
     )
@@ -2342,7 +2342,7 @@ def f(x, y):
 
 def test_inconsistent_broadcast_error():
     x = tensor3()
-    initial_x = at.constant(np.zeros((1, 10)))
+    initial_x = pt.constant(np.zeros((1, 10)))
     y, updates = scan(
         fn=lambda x, prev_x: x + prev_x,
         sequences=x,
@@ -2358,7 +2358,7 @@ def test_missing_input_error():
     inc = scalar("inc")
 
     def count_up():
-        return at.zeros(()), {c: c + inc}
+        return pt.zeros(()), {c: c + inc}
 
     with pytest.raises(MissingInputError):
         _, updates = scan(count_up, n_steps=20)
@@ -2394,7 +2394,7 @@ def tile_array(inp):
         X = matrix(name="x")
         arr = tile_array(self.seq)
         r, _ = scan(
-            lambda x, u: (x * x, until(at_all(x > u))),
+            lambda x, u: (x * x, until(pt_all(x > u))),
             sequences=X,
             non_sequences=[self.threshold],
         )
@@ -2458,21 +2458,21 @@ def accum(seq_t, prev_sum):
 def test_inner_get_vector_length():
     """Make sure we can handle/preserve fixed shape terms when cloning the body of a `Scan`."""
 
-    rng_at = RandomStream()
+    rng_pt = RandomStream()
 
     s1 = lscalar("s1")
     s2 = lscalar("s2")
-    size_at = at.as_tensor([s1, s2])
+    size_pt = pt.as_tensor([s1, s2])
 
     def scan_body(size):
         # `size` will be cloned and replaced with an ownerless `TensorVariable`.
         # This will cause `RandomVariable.infer_shape` to fail, because it expects
         # `get_vector_length` to work on all `size` arguments.
-        return rng_at.normal(0, 1, size=size)
+        return rng_pt.normal(0, 1, size=size)
 
     res, _ = scan(
         scan_body,
-        non_sequences=[size_at],
+        non_sequences=[size_pt],
         n_steps=10,
         strict=True,
     )
@@ -2484,23 +2484,23 @@ def scan_body(size):
     size_clone = res.owner.op.inner_inputs[1]
     assert size_clone.owner is None
 
-    # Make sure the cloned `size` maps to the original `size_at`
+    # Make sure the cloned `size` maps to the original `size_pt`
     inner_outer_map = res.owner.op.get_oinp_iinp_iout_oout_mappings()
     outer_input_idx = inner_outer_map["outer_inp_from_inner_inp"][1]
     original_size = res.owner.inputs[outer_input_idx]
-    assert original_size == size_at
+    assert original_size == size_pt
 
     with config.change_flags(on_opt_error="raise", on_shape_error="raise"):
-        res_fn = function([size_at], res.shape)
+        res_fn = function([size_pt], res.shape)
 
     assert np.array_equal(res_fn((1, 2)), (10, 1, 2))
 
     # Second case has an empty size non-sequence
-    size_at = at.as_tensor([], dtype=np.int64)
+    size_pt = pt.as_tensor([], dtype=np.int64)
 
     res, _ = scan(
         scan_body,
-        non_sequences=[size_at],
+        non_sequences=[size_pt],
         n_steps=10,
         strict=True,
     )
@@ -2512,11 +2512,11 @@ def scan_body(size):
     assert np.array_equal(res_fn(), (10,))
 
     # Third case has a constant size non-sequence
-    size_at = at.as_tensor([3], dtype=np.int64)
+    size_pt = pt.as_tensor([3], dtype=np.int64)
 
     res, _ = scan(
         scan_body,
-        non_sequences=[size_at],
+        non_sequences=[size_pt],
         n_steps=10,
         strict=True,
     )
@@ -2532,7 +2532,7 @@ def scan_body(size):
 def test_profile_info():
     from pytensor.scan.utils import ScanProfileStats
 
-    z, updates = scan(fn=lambda u: u + 1, sequences=[at.arange(10)], profile=True)
+    z, updates = scan(fn=lambda u: u + 1, sequences=[pt.arange(10)], profile=True)
 
     assert isinstance(z.owner.op, Scan)
     fn = z.owner.op.fn
@@ -2542,7 +2542,7 @@ def test_profile_info():
 
     # Set the `ScanProfileStats` name
     z, updates = scan(
-        fn=lambda u: u + 1, sequences=[at.arange(10)], profile="profile_name"
+        fn=lambda u: u + 1, sequences=[pt.arange(10)], profile="profile_name"
     )
 
     assert isinstance(z.owner.op, Scan)
@@ -2553,7 +2553,7 @@ def test_profile_info():
 
     # Use an existing profile object
     profile = fn.profile
-    z, updates = scan(fn=lambda u: u + 1, sequences=[at.arange(10)], profile=profile)
+    z, updates = scan(fn=lambda u: u + 1, sequences=[pt.arange(10)], profile=profile)
 
     assert isinstance(z.owner.op, Scan)
     fn = z.owner.op.fn
@@ -2609,11 +2609,11 @@ def test_gibbs_chain(self):
 
         def f(vsample_tm1):
             hmean_t = sigmoid(dot(vsample_tm1, W) + bhid)
-            hsample_t = at.cast(
+            hsample_t = pt.cast(
                 trng.binomial(1, hmean_t, size=hmean_t.shape), dtype="float32"
             )
             vmean_t = sigmoid(dot(hsample_t, W.T) + bvis)
-            return at.cast(
+            return pt.cast(
                 trng.binomial(1, vmean_t, size=vmean_t.shape), dtype="float32"
             )
 
@@ -2781,7 +2781,7 @@ def test_use_scan_direct_output(self):
 
         x = scalar()
         seq = vector()
-        outputs_info = [x, at.zeros_like(x)]
+        outputs_info = [x, pt.zeros_like(x)]
         (out1, out2), updates = scan(
             lambda a, b, c: (a + b, b + c),
             sequences=seq,
@@ -2820,7 +2820,7 @@ def test_use_scan_direct_output2(self):
 
         x = dcol()
         seq = dcol()
-        outputs_info = [x, at.zeros_like(x)]
+        outputs_info = [x, pt.zeros_like(x)]
         (out1, out2), updates = scan(
             lambda a, b, c: (a + b, a + c), sequences=seq, outputs_info=outputs_info
         )
@@ -2859,7 +2859,7 @@ def test_same(self):
         )
 
         def f(inp, mem):
-            i = at.join(0, inp, mem)
+            i = pt.join(0, inp, mem)
             d = dot(i, W)
             return d, d
 
@@ -3064,7 +3064,7 @@ def loss_inner(sum_inner, W):
 
             result_inner, _ = scan(
                 fn=loss_inner,
-                outputs_info=at.as_tensor_variable(np.asarray(0, dtype=np.float32)),
+                outputs_info=pt.as_tensor_variable(np.asarray(0, dtype=np.float32)),
                 non_sequences=[W],
                 n_steps=1,
             )
@@ -3073,7 +3073,7 @@ def loss_inner(sum_inner, W):
         # Also test return_list for that case.
         result_outer, _ = scan(
             fn=loss_outer,
-            outputs_info=at.as_tensor_variable(np.asarray(0, dtype=np.float32)),
+            outputs_info=pt.as_tensor_variable(np.asarray(0, dtype=np.float32)),
             non_sequences=[W],
             n_steps=n_steps,
             return_list=True,
@@ -3110,7 +3110,7 @@ def inner_fn(x_tm1, y_tm1, z_tm1):
     def test_disconnected_gradient(self):
         v = vector("v")
         m = matrix("m")
-        u0 = at.zeros((7,))
+        u0 = pt.zeros((7,))
 
         [u, m2], _ = scan(lambda _, u: [u, v], sequences=m, outputs_info=[u0, None])
         # This used to raise an exception with older versions because for a
@@ -3120,7 +3120,7 @@ def test_disconnected_gradient(self):
     def test_disconnected_gradient2(self):
         v = vector("v")
         m = matrix("m")
-        u0 = at.zeros((7,))
+        u0 = pt.zeros((7,))
 
         [u, m2], _ = scan(
             lambda x, u: [x + u, u + v], sequences=m, outputs_info=[u0, None]
@@ -3314,7 +3314,7 @@ def _active(x, pre_h):
         value, scan_updates = scan(
             _active,
             sequences=X,
-            outputs_info=[at.alloc(floatx(0.0), 1, out_size)],
+            outputs_info=[pt.alloc(floatx(0.0), 1, out_size)],
         )
         cost = mean(value)
         gW_x = grad(cost, W_x)
@@ -3351,7 +3351,7 @@ def test_outputs_info_not_typed(self):
         max_coefficients_supported = 10000
 
         # Generate the components of the polynomial
-        full_range = at.arange(max_coefficients_supported)
+        full_range = pt.arange(max_coefficients_supported)
         components, updates = scan(
             fn=lambda coeff, power, free_var: coeff * (free_var**power),
             sequences=[coefficients, full_range],
@@ -3360,7 +3360,7 @@ def test_outputs_info_not_typed(self):
         polynomial1 = components.sum()
         polynomial2, updates = scan(
             fn=lambda coeff, power, prev, free_var: prev + coeff * (free_var**power),
-            outputs_info=at.constant(0, dtype="floatX"),
+            outputs_info=pt.constant(0, dtype="floatX"),
             sequences=[coefficients, full_range],
             non_sequences=x,
         )
@@ -3405,7 +3405,7 @@ def test_crash_nonseq_grad(self):
 
         rand_stream = RandomStream()
         inp = matrix()
-        norm_inp = inp / at_sum(inp, axis=0)
+        norm_inp = inp / pt_sum(inp, axis=0)
 
         def unit_dropout(out_idx):
             def stochastic_pooling(in_idx):
@@ -3416,14 +3416,14 @@ def stochastic_pooling(in_idx):
                 return inp + sample
 
             pooled, updates_inner = scan(
-                fn=stochastic_pooling, sequences=at.arange(inp.shape[0])
+                fn=stochastic_pooling, sequences=pt.arange(inp.shape[0])
             )
 
             # randomly add stuff to units
             rand_nums = rand_stream.binomial(1, 0.5, size=pooled.shape)
             return pooled + rand_nums, updates_inner
 
-        out, updates_outer = scan(unit_dropout, sequences=[at.arange(inp.shape[0])])
+        out, updates_outer = scan(unit_dropout, sequences=[pt.arange(inp.shape[0])])
 
         with pytest.raises(NullTypeGradError):
             grad(out.sum(), inp)
@@ -3623,7 +3623,7 @@ def step(seq1, sitsot_m1, mitsot_m2, mitsot_m1):
             diff = mitsot_m1 + seq1
             next_mitsot_val = mitsot_m2 + diff
             next_sitsot_val = sitsot_m1 - diff
-            nitsot_out = at.alloc(
+            nitsot_out = pt.alloc(
                 np.asarray(0.0, "float32"), next_mitsot_val + next_sitsot_val
             )
             return next_sitsot_val, next_mitsot_val, nitsot_out
@@ -3887,7 +3887,7 @@ def test_grad_multiple_outs_some_disconnected_2(self):
         # sequences
         (
             lambda a_t: 2 * a_t,
-            [at.arange(10)],
+            [pt.arange(10)],
             [{}],
             [],
             None,
@@ -3895,7 +3895,7 @@ def test_grad_multiple_outs_some_disconnected_2(self):
         ),
         # nit-sot
         (
-            lambda: at.as_tensor(2.0),
+            lambda: pt.as_tensor(2.0),
             [],
             [{}],
             [],
@@ -3904,7 +3904,7 @@ def test_grad_multiple_outs_some_disconnected_2(self):
         ),
         # nit-sot, non_seq
         (
-            lambda c: at.as_tensor(2.0) * c,
+            lambda c: pt.as_tensor(2.0) * c,
             [],
             [{}],
             [scalar("c", dtype="floatX")],
@@ -3915,7 +3915,7 @@ def test_grad_multiple_outs_some_disconnected_2(self):
         (
             lambda a_tm1: 2 * a_tm1,
             [],
-            [{"initial": at.as_tensor(0.0, dtype="floatX"), "taps": [-1]}],
+            [{"initial": pt.as_tensor(0.0, dtype="floatX"), "taps": [-1]}],
             [],
             3,
             lambda op: op.info.n_sit_sot > 0,
@@ -3924,7 +3924,7 @@ def test_grad_multiple_outs_some_disconnected_2(self):
         (
             lambda a_tm1: (a_tm1 + 1, until(a_tm1 > 2)),
             [],
-            [{"initial": at.as_tensor(1, dtype=np.int64), "taps": [-1]}],
+            [{"initial": pt.as_tensor(1, dtype=np.int64), "taps": [-1]}],
             [],
             3,
             lambda op: op.info.n_sit_sot > 0,
@@ -3942,7 +3942,7 @@ def test_grad_multiple_outs_some_disconnected_2(self):
         (
             lambda a_tm1: 2 * a_tm1,
             [],
-            [{"initial": at.as_tensor([0.0, 1.0], dtype="floatX"), "taps": [-2]}],
+            [{"initial": pt.as_tensor([0.0, 1.0], dtype="floatX"), "taps": [-2]}],
             [],
             6,
             lambda op: op.info.n_mit_sot > 0,
@@ -3952,8 +3952,8 @@ def test_grad_multiple_outs_some_disconnected_2(self):
             lambda a_tm1, b_tm1: (2 * a_tm1, 2 * b_tm1),
             [],
             [
-                {"initial": at.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
-                {"initial": at.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
+                {"initial": pt.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
+                {"initial": pt.as_tensor(0.0, dtype="floatX"), "taps": [-1]},
             ],
             [],
             10,
@@ -4013,7 +4013,7 @@ def fn(n):
         s_in_y, _ = scan(
             fn=lambda z: (z + 1, until(z > 2)),
             outputs_info=[
-                {"taps": [-1], "initial": at.as_tensor(0.0, dtype=np.float64)}
+                {"taps": [-1], "initial": pt.as_tensor(0.0, dtype=np.float64)}
             ],
             mode=mode,
             n_steps=n - 1,
@@ -4025,7 +4025,7 @@ def fn(n):
     s_y, updates = scan(
         fn=fn,
         outputs_info=[None],
-        sequences=[at.as_tensor([3, 2, 1], dtype=np.int64)],
+        sequences=[pt.as_tensor([3, 2, 1], dtype=np.int64)],
         mode=mode,
         allow_gc=False,
     )
diff --git a/tests/scan/test_printing.py b/tests/scan/test_printing.py
index 725a48627d..42d81fbf11 100644
--- a/tests/scan/test_printing.py
+++ b/tests/scan/test_printing.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.printing import debugprint, pydot_imported, pydotprint
@@ -17,7 +17,7 @@ def test_debugprint_sitsot():
     # Symbolic description of the result
     result, updates = pytensor.scan(
         fn=lambda prior_result, A: prior_result * A,
-        outputs_info=at.ones_like(A),
+        outputs_info=pt.ones_like(A),
         non_sequences=A,
         n_steps=k,
     )
@@ -75,7 +75,7 @@ def test_debugprint_sitsot_no_extra_info():
     # Symbolic description of the result
     result, updates = pytensor.scan(
         fn=lambda prior_result, A: prior_result * A,
-        outputs_info=at.ones_like(A),
+        outputs_info=pt.ones_like(A),
         non_sequences=A,
         n_steps=k,
     )
@@ -138,7 +138,7 @@ def test_debugprint_nitsot():
         fn=lambda coefficient, power, free_variable: coefficient
         * (free_variable**power),
         outputs_info=None,
-        sequences=[coefficients, at.arange(max_coefficients_supported)],
+        sequences=[coefficients, pt.arange(max_coefficients_supported)],
         non_sequences=x,
     )
     # Sum them up
@@ -205,7 +205,7 @@ def test_debugprint_nested_scans():
     def compute_A_k(A, k):
         result, updates = pytensor.scan(
             fn=lambda prior_result, A: prior_result * A,
-            outputs_info=at.ones_like(A),
+            outputs_info=pt.ones_like(A),
             non_sequences=A,
             n_steps=k,
         )
@@ -217,7 +217,7 @@ def compute_A_k(A, k):
     components, updates = pytensor.scan(
         fn=lambda c, power, some_A, some_k: c * (compute_A_k(some_A, some_k) ** power),
         outputs_info=None,
-        sequences=[c, at.arange(n)],
+        sequences=[c, pt.arange(n)],
         non_sequences=[A, k],
     )
     final_result = components.sum()
@@ -491,7 +491,7 @@ def test_debugprint_mitmot():
     # Symbolic description of the result
     result, updates = pytensor.scan(
         fn=lambda prior_result, A: prior_result * A,
-        outputs_info=at.ones_like(A),
+        outputs_info=pt.ones_like(A),
         non_sequences=A,
         n_steps=k,
     )
@@ -620,18 +620,18 @@ def test_debugprint_mitmot():
 
 
 def test_debugprint_compiled_fn():
-    M = at.tensor(dtype=np.float64, shape=(20000, 2, 2))
-    one = at.as_tensor(1, dtype=np.int64)
-    zero = at.as_tensor(0, dtype=np.int64)
+    M = pt.tensor(dtype=np.float64, shape=(20000, 2, 2))
+    one = pt.as_tensor(1, dtype=np.int64)
+    zero = pt.as_tensor(0, dtype=np.int64)
 
     def no_shared_fn(n, x_tm1, M):
         p = M[n, x_tm1]
-        return at.switch(at.lt(zero, p[0]), one, zero)
+        return pt.switch(pt.lt(zero, p[0]), one, zero)
 
     out, updates = pytensor.scan(
         no_shared_fn,
         outputs_info=[{"initial": zero, "taps": [-1]}],
-        sequences=[at.arange(M.shape[0])],
+        sequences=[pt.arange(M.shape[0])],
         non_sequences=[M],
         allow_gc=False,
         mode="FAST_RUN",
diff --git a/tests/scan/test_rewriting.py b/tests/scan/test_rewriting.py
index 9dc6e698cf..864712a7c5 100644
--- a/tests/scan/test_rewriting.py
+++ b/tests/scan/test_rewriting.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function, scan, shared
 from pytensor.compile.builders import OpFromGraph
 from pytensor.compile.io import In
@@ -19,7 +19,7 @@
 from pytensor.tensor.blas import Dot22
 from pytensor.tensor.elemwise import Elemwise
 from pytensor.tensor.math import Dot, dot, sigmoid
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tanh
 from pytensor.tensor.shape import reshape, shape, specify_shape
 from pytensor.tensor.type import (
@@ -321,14 +321,14 @@ def init_K(i, X, Y):
                 return K.sum()
 
             beta, K_updts = scan(
-                init_K, sequences=at.arange(E), non_sequences=[inputs, targets]
+                init_K, sequences=pt.arange(E), non_sequences=[inputs, targets]
             )
 
             # mean
             def predict_mean_i(i, x_star, s_star, X, beta, h):
                 n, D = shape(X)
                 # rescale every dimension by the corresponding inverse lengthscale
-                iL = at.diag(h[i, :D])
+                iL = pt.diag(h[i, :D])
                 inp = (X - x_star).dot(iL)
 
                 # compute the mean
@@ -337,12 +337,12 @@ def predict_mean_i(i, x_star, s_star, X, beta, h):
 
                 lb = (inp * t).sum() + beta.sum()
 
-                Mi = at_sum(lb) * h[i, D]
+                Mi = pt_sum(lb) * h[i, D]
                 return Mi
 
             (M), M_updts = scan(
                 predict_mean_i,
-                sequences=at.arange(E),
+                sequences=pt.arange(E),
                 non_sequences=[x_star, s_star, inputs, beta, hyp],
             )
             return M
@@ -378,7 +378,7 @@ def predict_mean_i(i, x_star, s_star, X, beta, h):
         # equivalent code for the jacobian using scan
         dMdm, dMdm_updts = scan(
             lambda i, M, x: grad(M[i], x),
-            sequences=at.arange(M.shape[0]),
+            sequences=pt.arange(M.shape[0]),
             non_sequences=[M, x_star],
         )
         dfdm = function([inputs, targets, x_star, s_star], [dMdm[0], dMdm[1], dMdm[2]])
@@ -397,7 +397,7 @@ def predict_mean_i(i, x_star, s_star, X, beta, h):
     def test_pushout_seqs2(self):
         x = matrix()
         outputs, updates = scan(
-            lambda x: [x * x, at.constant(0).copy().copy()],
+            lambda x: [x * x, pt.constant(0).copy().copy()],
             n_steps=2,
             sequences=[],
             non_sequences=[],
@@ -585,7 +585,7 @@ def inner_func(x):
             out, _ = pytensor.scan(lambda: test_ofg(), n_steps=x)
             return out
 
-        out, _ = pytensor.scan(inner_func, sequences=[at.arange(1, 2)])
+        out, _ = pytensor.scan(inner_func, sequences=[pt.arange(1, 2)])
 
         _ = pytensor.function([], test_ofg())
 
@@ -612,7 +612,7 @@ def test_sum_dot(self):
         S, _ = scan(
             lambda x1, x2, u: u + dot(x1, x2),
             sequences=[A.dimshuffle(0, 1, "x"), B.dimshuffle(0, "x", 1)],
-            outputs_info=[at.zeros_like(A)],
+            outputs_info=[pt.zeros_like(A)],
         )
         f = function([A, B], S.owner.inputs[0][-1])
         rng = np.random.default_rng(utt.fetch_seed())
@@ -621,9 +621,9 @@ def test_sum_dot(self):
         utt.assert_allclose(f(vA, vB), np.dot(vA.T, vB))
 
     def test_pregreedy_optimizer(self, benchmark):
-        W = at.zeros((5, 4))
-        bv = at.zeros((5,))
-        bh = at.zeros((4,))
+        W = pt.zeros((5, 4))
+        bv = pt.zeros((5,))
+        bh = pt.zeros((4,))
         v = matrix("v")
         (bv_t, bh_t), _ = scan(
             lambda _: [bv, bh], sequences=v, outputs_info=[None, None]
@@ -674,7 +674,7 @@ def test_machine_translation(self):
         zi = tensor3("zi")
         zi_value = x_value
 
-        init = at.alloc(np.cast[config.floatX](0), batch_size, dim)
+        init = pt.alloc(np.cast[config.floatX](0), batch_size, dim)
 
         def rnn_step1(
             # sequences
@@ -758,7 +758,7 @@ def inner_fct(seq1, seq2, seq3, previous_output):
             dot_output = dot(temp1, temp2)
             return previous_output + dot_output
 
-        init = at.as_tensor_variable(np.random.normal(size=(3, 7)))
+        init = pt.as_tensor_variable(np.random.normal(size=(3, 7)))
 
         # Compile the function twice, once with the optimization and once
         # without
@@ -1011,7 +1011,7 @@ class TestScanInplaceOptimizer:
     def test_no_inplace(self):
         """Make sure the rewrite doesn't make unnecessary replacements."""
 
-        x = at.vector("x")
+        x = pt.vector("x")
 
         scan_out, _ = pytensor.scan(
             lambda x: (x + 1) / 2 + 1,
@@ -1031,7 +1031,7 @@ def test_no_inplace(self):
     def test_inplace_basic(self):
         scan_out, _ = pytensor.scan(
             lambda x: x + 1,
-            outputs_info=[at.zeros(1)],
+            outputs_info=[pt.zeros(1)],
             n_steps=3,
         )
 
@@ -1197,7 +1197,7 @@ def test_inplace3(self):
         )
         x0 = asarrayX(np.zeros((4,)))
         x0[0] = vx0
-        x0 = at.constant(x0)
+        x0 = pt.constant(x0)
 
         to_replace = outputs[0].owner.inputs[0].owner.inputs[1]
         outputs = clone_replace(outputs, replace=[(to_replace, x0)])
@@ -1369,7 +1369,7 @@ def f_rnn(u_t, x1_tm1, x1_tm3, x2_tm1, x3tm2, x3_tm1, x4_tm1):
         utt.assert_allclose(tx5, v_u[-1] + 5.0)
 
     def test_savemem_does_not_duplicate_number_of_scan_nodes(self):
-        var = at.ones(())
+        var = pt.ones(())
         values, _ = scan(
             lambda x: ([x], (), until(x)),
             outputs_info=[var],
@@ -1580,7 +1580,7 @@ def test_while_scan_taps_and_map(self):
     def test_vector_zeros_init(self):
         ys, _ = pytensor.scan(
             fn=lambda ytm2, ytm1: ytm1 + ytm2,
-            outputs_info=[{"initial": at.zeros(2), "taps": range(-2, 0)}],
+            outputs_info=[{"initial": pt.zeros(2), "taps": range(-2, 0)}],
             n_steps=100,
         )
 
@@ -1610,7 +1610,7 @@ def test_inner_replace_dot():
 
     o, _ = scan(
         lambda hi, him1, W: (hi, dot(hi + him1, W)),
-        outputs_info=[at.zeros([h.shape[1]]), None],
+        outputs_info=[pt.zeros([h.shape[1]]), None],
         sequences=[h],
         non_sequences=[W],
         mode=mode,
@@ -1635,7 +1635,7 @@ def lambda_fn(h, W1, W2):
     o, _ = scan(
         lambda_fn,
         outputs_info=h0,
-        non_sequences=[W1, at.zeros_like(W2)],
+        non_sequences=[W1, pt.zeros_like(W2)],
         n_steps=5,
     )
 
@@ -1667,9 +1667,9 @@ def lambda_fn(W1, h, W2):
 
     o, _ = scan(
         lambda_fn,
-        sequences=at.zeros_like(W1),
+        sequences=pt.zeros_like(W1),
         outputs_info=h0,
-        non_sequences=[at.zeros_like(W2)],
+        non_sequences=[pt.zeros_like(W2)],
         n_steps=5,
     )
 
@@ -1702,9 +1702,9 @@ def lambda_fn(W1, h, W2):
 
     o, _ = scan(
         lambda_fn,
-        sequences=at.zeros_like(W1),
+        sequences=pt.zeros_like(W1),
         outputs_info=h0,
-        non_sequences=[at.zeros_like(W2)],
+        non_sequences=[pt.zeros_like(W2)],
         n_steps=5,
     )
 
diff --git a/tests/scan/test_utils.py b/tests/scan/test_utils.py
index 158e626024..a26c2cbd4b 100644
--- a/tests/scan/test_utils.py
+++ b/tests/scan/test_utils.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pytensor
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.scan.utils import ScanArgs
 
 
@@ -15,23 +15,23 @@ def set_pytensor_flags():
 
 
 def create_test_hmm():
-    srng = at.random.RandomStream()
+    srng = pt.random.RandomStream()
 
-    N_tt = at.iscalar("N")
-    N_tt.tag.test_value = 10
-    M_tt = at.iscalar("M")
-    M_tt.tag.test_value = 2
+    N_pt = pt.iscalar("N")
+    N_pt.tag.test_value = 10
+    M_pt = pt.iscalar("M")
+    M_pt.tag.test_value = 2
 
-    mus_tt = at.matrix("mus")
-    mus_tt.tag.test_value = np.stack(
+    mus_pt = pt.matrix("mus")
+    mus_pt.tag.test_value = np.stack(
         [np.arange(0.0, 10), np.arange(0.0, -10, -1)], axis=-1
     ).astype(pytensor.config.floatX)
 
-    sigmas_tt = at.ones((N_tt,))
-    sigmas_tt.name = "sigmas"
+    sigmas_pt = pt.ones((N_pt,))
+    sigmas_pt.name = "sigmas"
 
-    pi_0_rv = srng.dirichlet(at.ones((M_tt,)), name="pi_0")
-    Gamma_rv = srng.dirichlet(at.ones((M_tt, M_tt)), name="Gamma")
+    pi_0_rv = srng.dirichlet(pt.ones((M_pt,)), name="pi_0")
+    Gamma_rv = srng.dirichlet(pt.ones((M_pt, M_pt)), name="Gamma")
 
     S_0_rv = srng.categorical(pi_0_rv, name="S_0")
 
@@ -42,7 +42,7 @@ def scan_fn(mus_t, sigma_t, S_tm1, Gamma_t):
 
     (S_rv, Y_rv), scan_updates = pytensor.scan(
         fn=scan_fn,
-        sequences=[mus_tt, sigmas_tt],
+        sequences=[mus_pt, sigmas_pt],
         non_sequences=[Gamma_rv],
         outputs_info=[{"initial": S_0_rv, "taps": [-1]}, {}],
         strict=True,
@@ -75,7 +75,7 @@ def scan_fn(mus_t, sigma_t, S_tm1, Gamma_t):
 
 def test_ScanArgs():
     with pytest.raises(TypeError):
-        ScanArgs.from_node(at.ones(2).owner)
+        ScanArgs.from_node(pt.ones(2).owner)
 
     hmm_model_env = create_test_hmm()
     scan_args = hmm_model_env["scan_args"]
@@ -134,23 +134,23 @@ def test_ScanArgs():
 
 
 def test_ScanArgs_basics_mit_sot():
-    srng = at.random.RandomStream()
+    srng = pt.random.RandomStream()
 
-    N_tt = at.iscalar("N")
-    N_tt.tag.test_value = 10
-    M_tt = at.iscalar("M")
-    M_tt.tag.test_value = 2
+    N_pt = pt.iscalar("N")
+    N_pt.tag.test_value = 10
+    M_pt = pt.iscalar("M")
+    M_pt.tag.test_value = 2
 
-    mus_tt = at.matrix("mus")
-    mus_tt.tag.test_value = np.stack(
+    mus_pt = pt.matrix("mus")
+    mus_pt.tag.test_value = np.stack(
         [np.arange(0.0, 10), np.arange(0.0, -10, -1)], axis=-1
     ).astype(pytensor.config.floatX)
 
-    sigmas_tt = at.ones((N_tt,))
-    sigmas_tt.name = "sigmas"
+    sigmas_pt = pt.ones((N_pt,))
+    sigmas_pt.name = "sigmas"
 
-    pi_0_rv = srng.dirichlet(at.ones((M_tt,)), name="pi_0")
-    Gamma_rv = srng.dirichlet(at.ones((M_tt, M_tt)), name="Gamma")
+    pi_0_rv = srng.dirichlet(pt.ones((M_pt,)), name="pi_0")
+    Gamma_rv = srng.dirichlet(pt.ones((M_pt, M_pt)), name="Gamma")
 
     S_0_rv = srng.categorical(pi_0_rv, name="S_0")
 
@@ -161,9 +161,9 @@ def scan_fn(mus_t, sigma_t, S_tm2, S_tm1, Gamma_t):
 
     (S_rv, Y_rv), scan_updates = pytensor.scan(
         fn=scan_fn,
-        sequences=[mus_tt, sigmas_tt],
+        sequences=[mus_pt, sigmas_pt],
         non_sequences=[Gamma_rv],
-        outputs_info=[{"initial": at.stack([S_0_rv, S_0_rv]), "taps": [-2, -1]}, {}],
+        outputs_info=[{"initial": pt.stack([S_0_rv, S_0_rv]), "taps": [-2, -1]}, {}],
         strict=True,
         name="scan_rv",
     )
@@ -187,7 +187,7 @@ def scan_fn(mus_t, sigma_t, S_tm2, S_tm1, Gamma_t):
     assert field_info.inner_index == 1
     assert field_info.agg_index == 3
 
-    rm_info = scan_args._remove_from_fields(at.ones(2))
+    rm_info = scan_args._remove_from_fields(pt.ones(2))
     assert rm_info is None
 
     rm_info = scan_args._remove_from_fields(test_v)
diff --git a/tests/scan/test_views.py b/tests/scan/test_views.py
index 35450cbe86..38c9b9cfcd 100644
--- a/tests/scan/test_views.py
+++ b/tests/scan/test_views.py
@@ -1,11 +1,11 @@
 import numpy as np
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import config, function, grad, shared
 from pytensor.compile.mode import FAST_RUN
 from pytensor.scan.views import foldl, foldr
-from pytensor.scan.views import map as at_map
-from pytensor.scan.views import reduce as at_reduce
+from pytensor.scan.views import map as pt_map
+from pytensor.scan.views import reduce as pt_reduce
 from pytensor.tensor.type import scalar, vector
 from tests import unittest_tools as utt
 from tests.scan.test_basic import clone_optimized_graph, grab_scan_node
@@ -14,7 +14,7 @@
 def test_reduce():
     v = vector("v")
     s = scalar("s")
-    result, updates = at_reduce(lambda x, y: x + y, v, s)
+    result, updates = pt_reduce(lambda x, y: x + y, v, s)
 
     f = function([v, s], result, updates=updates, allow_input_downcast=True)
     rng = np.random.default_rng(utt.fetch_seed())
@@ -24,7 +24,7 @@ def test_reduce():
 
 def test_map():
     v = vector("v")
-    abs_expr, abs_updates = at_map(
+    abs_expr, abs_updates = pt_map(
         lambda x: abs(x), v, [], truncate_gradient=-1, go_backwards=False
     )
 
@@ -39,10 +39,10 @@ def test_map():
 
 def test_reduce_memory_consumption():
     x = shared(np.asarray(np.random.uniform(size=(10,)), dtype=config.floatX))
-    o, _ = at_reduce(
+    o, _ = pt_reduce(
         lambda v, acc: acc + v,
         x,
-        at.constant(np.asarray(0.0, dtype=config.floatX)),
+        pt.constant(np.asarray(0.0, dtype=config.floatX)),
     )
     mode = FAST_RUN
     mode = mode.excluding("inplace")
@@ -74,7 +74,7 @@ def test_foldl_memory_consumption():
     o, _ = foldl(
         lambda v, acc: acc + v,
         x,
-        at.constant(np.asarray(0.0, dtype=config.floatX)),
+        pt.constant(np.asarray(0.0, dtype=config.floatX)),
     )
 
     mode = FAST_RUN
@@ -107,7 +107,7 @@ def test_foldr_memory_consumption():
     o, _ = foldr(
         lambda v, acc: acc + v,
         x,
-        at.constant(np.asarray(0.0, dtype=config.floatX)),
+        pt.constant(np.asarray(0.0, dtype=config.floatX)),
     )
 
     mode = FAST_RUN
diff --git a/tests/sparse/test_basic.py b/tests/sparse/test_basic.py
index 0c6d59b064..16fd5fef04 100644
--- a/tests/sparse/test_basic.py
+++ b/tests/sparse/test_basic.py
@@ -6,7 +6,7 @@
 from packaging import version
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import sparse
 from pytensor.compile.function import function
 from pytensor.compile.io import In, Out
@@ -92,7 +92,7 @@
 )
 from pytensor.tensor.basic import MakeVector
 from pytensor.tensor.elemwise import DimShuffle, Elemwise
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.shape import Shape_i
 from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
@@ -561,12 +561,12 @@ def test_dot_broadcast(self):
             (vector()[None, :], SparseTensorType("csr", "float32")()),
             (matrix(), SparseTensorType("csr", "float32")()),
         ]:
-            sparse_out = at.dot(x, y)
+            sparse_out = pt.dot(x, y)
             if isinstance(x, sparse.SparseVariable):
                 x = matrix()
             if isinstance(y, sparse.SparseVariable):
                 y = matrix()
-            dense_out = at.dot(x, y)
+            dense_out = pt.dot(x, y)
             assert dense_out.broadcastable == sparse_out.broadcastable
 
     def test_structured_dot(self):
@@ -801,7 +801,7 @@ def _testSD(
         for mtype in _mtypes:
             for a in [
                 np.array(array1),
-                at.as_tensor_variable(array1),
+                pt.as_tensor_variable(array1),
                 pytensor.shared(array1),
             ]:
                 for dtype1, dtype2 in [
@@ -859,7 +859,7 @@ def _testDS(
         for mtype in _mtypes:
             for b in [
                 np.asarray(array2),
-                at.as_tensor_variable(array2),
+                pt.as_tensor_variable(array2),
                 pytensor.shared(array2),
             ]:
                 for dtype1, dtype2 in [
@@ -1020,13 +1020,13 @@ def test_equality_case(self):
 class TestConversion:
     def test_basic(self):
         test_val = np.random.random((5,)).astype(config.floatX)
-        a = at.as_tensor_variable(test_val)
+        a = pt.as_tensor_variable(test_val)
         s = csc_from_dense(a)
         val = eval_outputs([s])
         assert str(val.dtype) == config.floatX
         assert val.format == "csc"
 
-        a = at.as_tensor_variable(test_val)
+        a = pt.as_tensor_variable(test_val)
         s = csr_from_dense(a)
         val = eval_outputs([s])
         assert str(val.dtype) == config.floatX
@@ -1035,7 +1035,7 @@ def test_basic(self):
         test_val = np.eye(3).astype(config.floatX)
         a = sp.sparse.csr_matrix(test_val)
         s = as_sparse_or_tensor_variable(a)
-        res = at.as_tensor_variable(s)
+        res = pt.as_tensor_variable(s)
         assert isinstance(res, SparseConstant)
 
         a = sp.sparse.csr_matrix(test_val)
@@ -1043,7 +1043,7 @@ def test_basic(self):
         from pytensor.tensor.exceptions import NotScalarConstantError
 
         with pytest.raises(NotScalarConstantError):
-            at.get_underlying_scalar_constant_value(s, only_process_constants=True)
+            pt.get_underlying_scalar_constant_value(s, only_process_constants=True)
 
     # TODO:
     # def test_sparse_as_tensor_variable(self):
@@ -1207,10 +1207,10 @@ def test_csm_unsorted(self):
                 assert not a.has_sorted_indices
 
                 def my_op(x):
-                    y = at.constant(a.indices)
-                    z = at.constant(a.indptr)
-                    s = at.constant(a.shape)
-                    return at_sum(dense_from_sparse(CSM(format)(x, y, z, s) * a))
+                    y = pt.constant(a.indices)
+                    z = pt.constant(a.indptr)
+                    s = pt.constant(a.shape)
+                    return pt_sum(dense_from_sparse(CSM(format)(x, y, z, s) * a))
 
                 verify_grad_sparse(my_op, [a.data])
 
@@ -1374,7 +1374,7 @@ def test_dot_sparse_sparse(self):
             for sparse_format_b in ["csc", "csr", "bsr"]:
                 a = SparseTensorType(sparse_format_a, dtype=sparse_dtype)()
                 b = SparseTensorType(sparse_format_b, dtype=sparse_dtype)()
-                d = at.dot(a, b)
+                d = pt.dot(a, b)
                 f = pytensor.function([a, b], Out(d, borrow=True))
                 for M, N, K, nnz in [
                     (4, 3, 2, 3),
@@ -1396,7 +1396,7 @@ def test_csc_correct_output_faster_than_scipy(self):
 
         a = SparseTensorType("csc", dtype=sparse_dtype)()
         b = matrix(dtype=dense_dtype)
-        d = at.dot(a, b)
+        d = pt.dot(a, b)
         f = pytensor.function([a, b], Out(d, borrow=True))
 
         for M, N, K, nnz in [
@@ -1443,7 +1443,7 @@ def test_csr_correct_output_faster_than_scipy(self):
 
         a = SparseTensorType("csr", dtype=sparse_dtype)()
         b = matrix(dtype=dense_dtype)
-        d = at.dot(a, b)
+        d = pt.dot(a, b)
         f = pytensor.function([a, b], d)
 
         for M, N, K, nnz in [
@@ -1598,8 +1598,8 @@ def test_int32_dtype(self):
         I = matrix("I", dtype=intX)
 
         fI = I.flatten()
-        data = at.ones_like(fI)
-        indptr = at.arange(data.shape[0] + 1, dtype="int32")
+        data = pt.ones_like(fI)
+        indptr = pt.arange(data.shape[0] + 1, dtype="int32")
 
         m1 = sparse.CSR(data, fI, indptr, (8, size))
         m2 = sparse.dot(m1, C)
@@ -1614,28 +1614,28 @@ def test_int32_dtype(self):
 
     def test_tensor_dot_types(self):
         x = sparse.csc_matrix("x")
-        x_d = at.matrix("x_d")
+        x_d = pt.matrix("x_d")
         y = sparse.csc_matrix("y")
 
-        res = at.dot(x, y)
+        res = pt.dot(x, y)
         op_types = {type(n.op) for n in applys_between([x, y], [res])}
         assert sparse.basic.StructuredDot in op_types
-        assert at.math.Dot not in op_types
+        assert pt.math.Dot not in op_types
 
-        res = at.dot(x_d, y)
+        res = pt.dot(x_d, y)
         op_types = {type(n.op) for n in applys_between([x, y], [res])}
         assert sparse.basic.StructuredDot in op_types
-        assert at.math.Dot not in op_types
+        assert pt.math.Dot not in op_types
 
-        res = at.dot(x, x_d)
+        res = pt.dot(x, x_d)
         op_types = {type(n.op) for n in applys_between([x, y], [res])}
         assert sparse.basic.StructuredDot in op_types
-        assert at.math.Dot not in op_types
+        assert pt.math.Dot not in op_types
 
-        res = at.dot(at.second(1, x), y)
+        res = pt.dot(pt.second(1, x), y)
         op_types = {type(n.op) for n in applys_between([x, y], [res])}
         assert sparse.basic.StructuredDot in op_types
-        assert at.math.Dot not in op_types
+        assert pt.math.Dot not in op_types
 
     def test_csr_dense_grad(self):
         # shortcut: testing csc in float32, testing csr in float64
diff --git a/tests/sparse/test_rewriting.py b/tests/sparse/test_rewriting.py
index 2c500d91d5..2ceb904213 100644
--- a/tests/sparse/test_rewriting.py
+++ b/tests/sparse/test_rewriting.py
@@ -8,7 +8,7 @@
 from pytensor.configdefaults import config
 from pytensor.sparse.rewriting import SamplingDotCSR, sd_csc
 from pytensor.tensor.basic import as_tensor_variable
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.type import ivector, matrix, vector
 from tests import unittest_tools as utt
 from tests.sparse.test_basic import random_lil
@@ -53,7 +53,7 @@ def test_local_csm_grad_c():
         (sparse.CSC, sp.sparse.csc_matrix),
         (sparse.CSR, sp.sparse.csr_matrix),
     ]:
-        cost = at_sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
+        cost = pt_sum(sparse.DenseFromSparse()(CS(data, indices, indptr, shape)))
         f = pytensor.function(
             [data, indices, indptr, shape], pytensor.grad(cost, data), mode=mode
         )
diff --git a/tests/sparse/test_var.py b/tests/sparse/test_var.py
index 92a9701a45..4c897184ee 100644
--- a/tests/sparse/test_var.py
+++ b/tests/sparse/test_var.py
@@ -6,7 +6,7 @@
 
 import pytensor
 import pytensor.sparse as sparse
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.sparse.type import SparseTensorType
 from pytensor.tensor.type import DenseTensorType
 
@@ -62,7 +62,7 @@ class TestSparseVariable:
             ("nonzero", DenseTensorType, ExitStack(), None),
             ("nonzero_values", DenseTensorType, None, None),
             ("argsort", DenseTensorType, ExitStack(), None),
-            ("conj", SparseTensorType, ExitStack(), at.cmatrix("x")),
+            ("conj", SparseTensorType, ExitStack(), pt.cmatrix("x")),
             ("round", DenseTensorType, None, None),
             ("trace", DenseTensorType, None, None),
             ("zeros_like", SparseTensorType, ExitStack(), None),
@@ -76,7 +76,7 @@ class TestSparseVariable:
     )
     def test_unary(self, method, exp_type, cm, x):
         if x is None:
-            x = at.dmatrix("x")
+            x = pt.dmatrix("x")
 
         x = sparse.csr_from_dense(x)
 
@@ -134,8 +134,8 @@ def test_unary(self, method, exp_type, cm, x):
         ],
     )
     def test_binary(self, method, exp_type):
-        x = at.lmatrix("x")
-        y = at.lmatrix("y")
+        x = pt.lmatrix("x")
+        y = pt.lmatrix("y")
         x = sparse.csr_from_dense(x)
         y = sparse.csr_from_dense(y)
 
@@ -172,7 +172,7 @@ def test_binary(self, method, exp_type):
         assert all(isinstance(out, exp_res_type) for out in res_outs)
 
     def test_reshape(self):
-        x = at.dmatrix("x")
+        x = pt.dmatrix("x")
         x = sparse.csr_from_dense(x)
 
         with pytest.warns(UserWarning, match=".*converted to dense.*"):
@@ -185,7 +185,7 @@ def test_reshape(self):
         assert isinstance(exp_res, np.ndarray)
 
     def test_dimshuffle(self):
-        x = at.dmatrix("x")
+        x = pt.dmatrix("x")
         x = sparse.csr_from_dense(x)
 
         with pytest.warns(UserWarning, match=".*converted to dense.*"):
@@ -198,7 +198,7 @@ def test_dimshuffle(self):
         assert isinstance(exp_res, np.ndarray)
 
     def test_getitem(self):
-        x = at.dmatrix("x")
+        x = pt.dmatrix("x")
         x = sparse.csr_from_dense(x)
 
         z = x[:, :2]
@@ -209,8 +209,8 @@ def test_getitem(self):
         assert isinstance(exp_res, csr_matrix)
 
     def test_dot(self):
-        x = at.lmatrix("x")
-        y = at.lmatrix("y")
+        x = pt.lmatrix("x")
+        y = pt.lmatrix("y")
         x = sparse.csr_from_dense(x)
         y = sparse.csr_from_dense(y)
 
@@ -225,7 +225,7 @@ def test_dot(self):
         assert isinstance(exp_res, csr_matrix)
 
     def test_repeat(self):
-        x = at.dmatrix("x")
+        x = pt.dmatrix("x")
         x = sparse.csr_from_dense(x)
 
         with pytest.warns(UserWarning, match=".*converted to dense.*"):
diff --git a/tests/tensor/conv/test_abstract_conv.py b/tests/tensor/conv/test_abstract_conv.py
index 3208f63129..5f6e2afc92 100644
--- a/tests/tensor/conv/test_abstract_conv.py
+++ b/tests/tensor/conv/test_abstract_conv.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.mode import Mode
 from pytensor.configdefaults import config
 from pytensor.graph.rewriting.basic import check_stack_trace
@@ -1219,15 +1219,15 @@ def test_constant_shapes():
     # Check that the `imshp` and `kshp` parameters of the AbstractConv Ops
     # are rejected if not constant or None
     dummy_t4 = ftensor4()
-    alloc_dummy_t4 = at.zeros((3, 5, 7, 11), dtype="float32")
+    alloc_dummy_t4 = pt.zeros((3, 5, 7, 11), dtype="float32")
 
     dummy_shape = lvector()
-    dummy_one_shape = at.ones(4, dtype="int64")
-    constant_vec_shape = at.constant([3, 5, 7, 11])
+    dummy_one_shape = pt.ones(4, dtype="int64")
+    constant_vec_shape = pt.constant([3, 5, 7, 11])
 
     tuple_shape = (3, 5, 7, 11)
     list_shape = list(tuple_shape)
-    constant_list_shape = [at.constant(i, dtype="int64") for i in tuple_shape]
+    constant_list_shape = [pt.constant(i, dtype="int64") for i in tuple_shape]
     constant_tuple_shape = tuple(constant_list_shape)
 
     bad_shapes = (
@@ -1677,7 +1677,7 @@ def test_fractional_bilinear_upsampling_shape(self):
         x = np.random.random((1, 1, 200, 200)).astype(config.floatX)
         resize = (24, 20)
         z = bilinear_upsampling(
-            at.as_tensor_variable(x), frac_ratio=resize, use_1D_kernel=False
+            pt.as_tensor_variable(x), frac_ratio=resize, use_1D_kernel=False
         )
         out = pytensor.function([], z.shape, mode="FAST_RUN")()
         utt.assert_allclose(out, (1, 1, 240, 240))
@@ -1704,8 +1704,8 @@ def test_interface(self):
         output = pytensor.function(
             inputs=[],
             outputs=conv2d_transpose(
-                input=at.ones((2, 2, 4, 4)),
-                filters=at.ones((2, 1, 4, 4)),
+                input=pt.ones((2, 2, 4, 4)),
+                filters=pt.ones((2, 1, 4, 4)),
                 output_shape=(2, 1, 10, 10),
                 input_dilation=(2, 2),
             ),
@@ -1980,7 +1980,7 @@ def test_gradweights(self):
                 num_groups=groups,
             )
             grouped_conv_output = grouped_convgrad_op(
-                img_sym, top_sym, at.as_tensor_variable(kshp[-self.convdim :])
+                img_sym, top_sym, pt.as_tensor_variable(kshp[-self.convdim :])
             )
             grouped_func = pytensor.function(
                 [img_sym, top_sym], grouped_conv_output, mode=self.mode
@@ -2014,7 +2014,7 @@ def conv_gradweight(inputs_val, output_val):
                 return grouped_convgrad_op(
                     inputs_val,
                     output_val,
-                    at.as_tensor_variable(kshp[-self.convdim :]),
+                    pt.as_tensor_variable(kshp[-self.convdim :]),
                 )
 
             utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
@@ -2041,7 +2041,7 @@ def test_gradinputs(self):
                 num_groups=groups,
             )
             grouped_conv_output = grouped_convgrad_op(
-                kern_sym, top_sym, at.as_tensor_variable(imshp[-self.convdim :])
+                kern_sym, top_sym, pt.as_tensor_variable(imshp[-self.convdim :])
             )
             grouped_func = pytensor.function(
                 [kern_sym, top_sym], grouped_conv_output, mode=self.mode
@@ -2075,7 +2075,7 @@ def conv_gradinputs(filters_val, output_val):
                 return grouped_convgrad_op(
                     filters_val,
                     output_val,
-                    at.as_tensor_variable(imshp[-self.convdim :]),
+                    pt.as_tensor_variable(imshp[-self.convdim :]),
                 )
 
             utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
@@ -2437,7 +2437,7 @@ def test_gradweight(self):
                 unshared=True,
             )
             unshared_out_sym = unshared_conv_op(
-                img_sym, top_sym, at.as_tensor_variable(kshp[-2:])
+                img_sym, top_sym, pt.as_tensor_variable(kshp[-2:])
             )
             unshared_func = pytensor.function(
                 [img_sym, top_sym], unshared_out_sym, mode=self.mode
@@ -2458,7 +2458,7 @@ def test_gradweight(self):
                 unshared=False,
             )
             ref_out_sym = ref_conv_op(
-                img_sym, top_sym, at.as_tensor_variable(single_kshp[-2:])
+                img_sym, top_sym, pt.as_tensor_variable(single_kshp[-2:])
             )
             ref_func = pytensor.function(
                 [img_sym, top_sym], ref_out_sym, mode=self.mode
@@ -2473,7 +2473,7 @@ def test_gradweight(self):
 
             def conv_gradweight(inputs_val, output_val):
                 return unshared_conv_op(
-                    inputs_val, output_val, at.as_tensor_variable(kshp[-2:])
+                    inputs_val, output_val, pt.as_tensor_variable(kshp[-2:])
                 )
 
             if verify:
@@ -2507,7 +2507,7 @@ def test_gradinput(self):
                 unshared=True,
             )
             unshared_out_sym = unshared_conv_op(
-                kern_sym, top_sym, at.as_tensor_variable(imshp[-2:])
+                kern_sym, top_sym, pt.as_tensor_variable(imshp[-2:])
             )
             unshared_func = pytensor.function(
                 [kern_sym, top_sym], unshared_out_sym, mode=self.mode
@@ -2526,7 +2526,7 @@ def test_gradinput(self):
                 unshared=False,
             )
             ref_out_sym = ref_conv_op(
-                ref_kern_sym, top_sym, at.as_tensor_variable(imshp[-2:])
+                ref_kern_sym, top_sym, pt.as_tensor_variable(imshp[-2:])
             )
             ref_func = pytensor.function(
                 [ref_kern_sym, top_sym], ref_out_sym, mode=self.mode
@@ -2545,7 +2545,7 @@ def test_gradinput(self):
 
             def conv_gradinputs(filters_val, output_val):
                 return unshared_conv_op(
-                    filters_val, output_val, at.as_tensor_variable(imshp[-2:])
+                    filters_val, output_val, pt.as_tensor_variable(imshp[-2:])
                 )
 
             if verify:
@@ -2669,7 +2669,7 @@ def test_gradweight(self):
 
             def conv_gradweight(inputs_val, output_val):
                 return asymmetric_conv_op(
-                    inputs_val, output_val, at.as_tensor_variable(kshp[-2:])
+                    inputs_val, output_val, pt.as_tensor_variable(kshp[-2:])
                 )
 
             utt.verify_grad(conv_gradweight, [img, top], mode=self.mode, eps=1)
@@ -2723,7 +2723,7 @@ def test_gradinput(self):
 
             def conv_gradinputs(filters_val, output_val):
                 return asymmetric_conv_op(
-                    filters_val, output_val, at.as_tensor_variable(imshp[-2:])
+                    filters_val, output_val, pt.as_tensor_variable(imshp[-2:])
                 )
 
             utt.verify_grad(conv_gradinputs, [kern, top], mode=self.mode, eps=1)
diff --git a/tests/tensor/random/rewriting/test_basic.py b/tests/tensor/random/rewriting/test_basic.py
index 337cd6cf91..441f17534e 100644
--- a/tests/tensor/random/rewriting/test_basic.py
+++ b/tests/tensor/random/rewriting/test_basic.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import config, shared
 from pytensor.compile.function import function
 from pytensor.compile.mode import Mode
@@ -37,26 +37,26 @@
 def apply_local_rewrite_to_rv(
     rewrite, op_fn, dist_op, dist_params, size, rng, name=None
 ):
-    dist_params_at = []
+    dist_params_pt = []
     for i, p in enumerate(dist_params):
-        p_at = at.as_tensor(p).type(f"p_{i}")
-        p_at.tag.test_value = p
-        dist_params_at.append(p_at)
+        p_pt = pt.as_tensor(p).type(f"p_{i}")
+        p_pt.tag.test_value = p
+        dist_params_pt.append(p_pt)
 
-    size_at = []
+    size_pt = []
     for s in size:
         # To test DimShuffle with dropping dims we need that size dimension to be constant
         if s == 1:
-            s_at = constant(np.array(1, dtype="int32"))
+            s_pt = constant(np.array(1, dtype="int32"))
         else:
-            s_at = iscalar()
-        s_at.tag.test_value = s
-        size_at.append(s_at)
+            s_pt = iscalar()
+        s_pt.tag.test_value = s
+        size_pt.append(s_pt)
 
-    dist_st = op_fn(dist_op(*dist_params_at, size=size_at, rng=rng, name=name))
+    dist_st = op_fn(dist_op(*dist_params_pt, size=size_pt, rng=rng, name=name))
 
     f_inputs = [
-        p for p in dist_params_at + size_at if not isinstance(p, (slice, Constant))
+        p for p in dist_params_pt + size_pt if not isinstance(p, (slice, Constant))
     ]
 
     mode = Mode(
@@ -202,7 +202,7 @@ def test_local_rv_size_lift(dist_op, dist_params, size):
         rng,
     )
 
-    assert at.get_vector_length(new_out.owner.inputs[1]) == 0
+    assert pt.get_vector_length(new_out.owner.inputs[1]) == 0
 
 
 @pytest.mark.parametrize(
@@ -796,16 +796,16 @@ def test_Subtensor_lift(indices, lifted, dist_op, dist_params, size):
 
     rng = shared(np.random.default_rng(1233532), borrow=False)
 
-    indices_at = ()
+    indices_pt = ()
     for i in indices:
-        i_at = as_index_constant(i)
-        if not isinstance(i_at, slice):
-            i_at.tag.test_value = i
-        indices_at += (i_at,)
+        i_pt = as_index_constant(i)
+        if not isinstance(i_pt, slice):
+            i_pt.tag.test_value = i
+        indices_pt += (i_pt,)
 
     new_out, f_inputs, dist_st, f_rewritten = apply_local_rewrite_to_rv(
         local_subtensor_rv_lift,
-        lambda rv: rv[indices_at],
+        lambda rv: rv[indices_pt],
         dist_op,
         dist_params,
         size,
@@ -843,7 +843,7 @@ def test_Subtensor_lift_restrictions():
 
     std = vector("std")
     std.tag.test_value = np.array([1e-5, 2e-5, 3e-5], dtype=config.floatX)
-    x = normal(at.arange(2), at.ones(2), rng=rng)
+    x = normal(pt.arange(2), pt.ones(2), rng=rng)
     y = x[1]
     # The non-`Subtensor` client depends on the RNG state, so we can't perform
     # the lift
@@ -857,7 +857,7 @@ def test_Subtensor_lift_restrictions():
     assert isinstance(subtensor_node.op, Subtensor)
     assert subtensor_node.inputs[0].owner.op == normal
 
-    z = at.ones(x.shape) - x[1]
+    z = pt.ones(x.shape) - x[1]
 
     # We add `x` as an output to make sure that `is_rv_used_in_graph` handles
     # `"output"` "nodes" correctly.
@@ -881,7 +881,7 @@ def test_Subtensor_lift_restrictions():
 def test_Dimshuffle_lift_restrictions():
     rng = shared(np.random.default_rng(1233532), borrow=False)
 
-    x = normal(at.arange(2).reshape((2,)), 100, size=(2, 2, 2), rng=rng)
+    x = normal(pt.arange(2).reshape((2,)), 100, size=(2, 2, 2), rng=rng)
     y = x.dimshuffle(1, 0, 2)
     # The non-`Dimshuffle` client depends on the RNG state, so we can't
     # perform the lift
@@ -897,7 +897,7 @@ def test_Dimshuffle_lift_restrictions():
     assert isinstance(dimshuffle_node.op, DimShuffle)
     assert dimshuffle_node.inputs[0].owner.op == normal
 
-    z = at.ones(x.shape) - y
+    z = pt.ones(x.shape) - y
 
     # We add `x` as an output to make sure that `is_rv_used_in_graph` handles
     # `"output"` "nodes" correctly.
diff --git a/tests/tensor/random/test_basic.py b/tests/tensor/random/test_basic.py
index 10da939d07..ee3ca6c13d 100644
--- a/tests/tensor/random/test_basic.py
+++ b/tests/tensor/random/test_basic.py
@@ -6,7 +6,7 @@
 import pytest
 import scipy.stats as stats
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function, shared
 from pytensor.compile.mode import Mode
 from pytensor.compile.sharedvalue import SharedVariable
@@ -109,11 +109,11 @@ def test_fn(*args, random_state=None, **kwargs):
         for k, v in kwargs.items()
     }
 
-    at_rng = shared(rng, borrow=True)
+    pt_rng = shared(rng, borrow=True)
 
     numpy_res = np.asarray(test_fn(*param_vals, random_state=copy(rng), **kwargs_vals))
 
-    pytensor_res = rv(*params, rng=at_rng, **kwargs)
+    pytensor_res = rv(*params, rng=pt_rng, **kwargs)
 
     assert pytensor_res.type.numpy_dtype.kind == numpy_res.dtype.kind
 
@@ -200,31 +200,31 @@ def test_beta_samples(a, b, size):
     compare_sample_values(beta, a, b, size=size)
 
 
-M_at = iscalar("M")
-M_at.tag.test_value = 3
-sd_at = scalar("sd")
-sd_at.tag.test_value = np.array(1.0, dtype=config.floatX)
+M_pt = iscalar("M")
+M_pt.tag.test_value = 3
+sd_pt = scalar("sd")
+sd_pt.tag.test_value = np.array(1.0, dtype=config.floatX)
 
 
 @pytest.mark.parametrize(
     "M, sd, size",
     [
-        (at.as_tensor_variable(np.array(1.0, dtype=config.floatX)), sd_at, ()),
+        (pt.as_tensor_variable(np.array(1.0, dtype=config.floatX)), sd_pt, ()),
         (
-            at.as_tensor_variable(np.array(1.0, dtype=config.floatX)),
-            sd_at,
-            (M_at,),
+            pt.as_tensor_variable(np.array(1.0, dtype=config.floatX)),
+            sd_pt,
+            (M_pt,),
         ),
         (
-            at.as_tensor_variable(np.array(1.0, dtype=config.floatX)),
-            sd_at,
-            (2, M_at),
+            pt.as_tensor_variable(np.array(1.0, dtype=config.floatX)),
+            sd_pt,
+            (2, M_pt),
         ),
-        (at.zeros((M_at,)), sd_at, ()),
-        (at.zeros((M_at,)), sd_at, (M_at,)),
-        (at.zeros((M_at,)), sd_at, (2, M_at)),
-        (at.zeros((M_at,)), at.ones((M_at,)), ()),
-        (at.zeros((M_at,)), at.ones((M_at,)), (2, M_at)),
+        (pt.zeros((M_pt,)), sd_pt, ()),
+        (pt.zeros((M_pt,)), sd_pt, (M_pt,)),
+        (pt.zeros((M_pt,)), sd_pt, (2, M_pt)),
+        (pt.zeros((M_pt,)), pt.ones((M_pt,)), ()),
+        (pt.zeros((M_pt,)), pt.ones((M_pt,)), (2, M_pt)),
         (
             create_pytensor_param(
                 np.array([[-1, 20], [300, -4000]], dtype=config.floatX)
@@ -250,7 +250,7 @@ def test_normal_infer_shape(M, sd, size):
         if not isinstance(i, (Constant, SharedVariable))
     ]
     pytensor_fn = function(
-        fn_inputs, [at.as_tensor(o) for o in rv_shape + [rv]], mode=py_mode
+        fn_inputs, [pt.as_tensor(o) for o in rv_shape + [rv]], mode=py_mode
     )
 
     *rv_shape_val, rv_val = pytensor_fn(
@@ -266,12 +266,12 @@ def test_normal_infer_shape(M, sd, size):
 
 @config.change_flags(compute_test_value="raise")
 def test_normal_ShapeFeature():
-    M_at = iscalar("M")
-    M_at.tag.test_value = 3
-    sd_at = scalar("sd")
-    sd_at.tag.test_value = np.array(1.0, dtype=config.floatX)
+    M_pt = iscalar("M")
+    M_pt.tag.test_value = 3
+    sd_pt = scalar("sd")
+    sd_pt.tag.test_value = np.array(1.0, dtype=config.floatX)
 
-    d_rv = normal(at.ones((M_at,)), sd_at, size=(2, M_at))
+    d_rv = normal(pt.ones((M_pt,)), sd_pt, size=(2, M_pt))
     d_rv.tag.test_value
 
     fg = FunctionGraph(
@@ -609,10 +609,10 @@ def test_mvnormal_default_args():
 
 @config.change_flags(compute_test_value="raise")
 def test_mvnormal_ShapeFeature():
-    M_at = iscalar("M")
-    M_at.tag.test_value = 2
+    M_pt = iscalar("M")
+    M_pt.tag.test_value = 2
 
-    d_rv = multivariate_normal(at.ones((M_at,)), at.eye(M_at), size=2)
+    d_rv = multivariate_normal(pt.ones((M_pt,)), pt.eye(M_pt), size=2)
 
     fg = FunctionGraph(
         [i for i in graph_inputs([d_rv]) if not isinstance(i, Constant)],
@@ -624,7 +624,7 @@ def test_mvnormal_ShapeFeature():
     s1, s2 = fg.shape_feature.shape_of[d_rv]
 
     assert get_test_value(s1) == 2
-    assert M_at in graph_inputs([s2])
+    assert M_pt in graph_inputs([s2])
 
     # Test broadcasted shapes
     mean = tensor(dtype=config.floatX, shape=(1, None))
@@ -632,7 +632,7 @@ def test_mvnormal_ShapeFeature():
 
     test_covar = np.diag(np.array([1, 10, 100], dtype=config.floatX))
     test_covar = np.stack([test_covar, test_covar * 10.0])
-    cov = at.as_tensor(test_covar).type()
+    cov = pt.as_tensor(test_covar).type()
     cov.tag.test_value = test_covar
 
     d_rv = multivariate_normal(mean, cov, size=[2, 3, 2])
@@ -689,19 +689,19 @@ def test_dirichlet_rng():
         dirichlet.rng_fn(None, np.broadcast_to(alphas, (1, 3, 3)), size=(3,))
 
 
-M_at = iscalar("M")
-M_at.tag.test_value = 3
+M_pt = iscalar("M")
+M_pt.tag.test_value = 3
 
 
 @pytest.mark.parametrize(
     "M, size",
     [
-        (at.ones((M_at,)), ()),
-        (at.ones((M_at,)), (M_at + 1,)),
-        (at.ones((M_at,)), (2, M_at)),
-        (at.ones((M_at, M_at + 1)), ()),
-        (at.ones((M_at, M_at + 1)), (M_at + 2, M_at)),
-        (at.ones((M_at, M_at + 1)), (2, M_at + 2, M_at + 3, M_at)),
+        (pt.ones((M_pt,)), ()),
+        (pt.ones((M_pt,)), (M_pt + 1,)),
+        (pt.ones((M_pt,)), (2, M_pt)),
+        (pt.ones((M_pt, M_pt + 1)), ()),
+        (pt.ones((M_pt, M_pt + 1)), (M_pt + 2, M_pt)),
+        (pt.ones((M_pt, M_pt + 1)), (2, M_pt + 2, M_pt + 3, M_pt)),
     ],
 )
 def test_dirichlet_infer_shape(M, size):
@@ -715,7 +715,7 @@ def test_dirichlet_infer_shape(M, size):
         if not isinstance(i, (Constant, SharedVariable))
     ]
     pytensor_fn = function(
-        fn_inputs, [at.as_tensor(o) for o in rv_shape + [rv]], mode=py_mode
+        fn_inputs, [pt.as_tensor(o) for o in rv_shape + [rv]], mode=py_mode
     )
 
     *rv_shape_val, rv_val = pytensor_fn(
@@ -732,12 +732,12 @@ def test_dirichlet_infer_shape(M, size):
 @config.change_flags(compute_test_value="raise")
 def test_dirichlet_ShapeFeature():
     """Make sure `RandomVariable.infer_shape` works with `ShapeFeature`."""
-    M_at = iscalar("M")
-    M_at.tag.test_value = 2
-    N_at = iscalar("N")
-    N_at.tag.test_value = 3
+    M_pt = iscalar("M")
+    M_pt.tag.test_value = 2
+    N_pt = iscalar("N")
+    N_pt.tag.test_value = 3
 
-    d_rv = dirichlet(at.ones((M_at, N_at)), name="Gamma")
+    d_rv = dirichlet(pt.ones((M_pt, N_pt)), name="Gamma")
 
     fg = FunctionGraph(
         outputs=[d_rv],
@@ -747,8 +747,8 @@ def test_dirichlet_ShapeFeature():
 
     s1, s2 = fg.shape_feature.shape_of[d_rv]
 
-    assert M_at in graph_inputs([s1])
-    assert N_at in graph_inputs([s2])
+    assert M_pt in graph_inputs([s1])
+    assert N_pt in graph_inputs([s2])
 
 
 @pytest.mark.parametrize(
@@ -1347,12 +1347,12 @@ def test_randint_samples():
     compare_sample_values(randint, [0, 1, 2], 5, rng=rng)
     compare_sample_values(randint, [0, 1, 2], 5, size=[3, 3], rng=rng)
     compare_sample_values(randint, [0], [5], size=[1], rng=rng)
-    compare_sample_values(randint, at.as_tensor_variable([-1]), [1], size=[1], rng=rng)
+    compare_sample_values(randint, pt.as_tensor_variable([-1]), [1], size=[1], rng=rng)
     compare_sample_values(
         randint,
-        at.as_tensor_variable([-1]),
+        pt.as_tensor_variable([-1]),
         [1],
-        size=at.as_tensor_variable([1]),
+        size=pt.as_tensor_variable([1]),
         rng=rng,
     )
 
@@ -1368,12 +1368,12 @@ def test_integers_samples():
     compare_sample_values(integers, [0, 1, 2], 5, rng=rng)
     compare_sample_values(integers, [0, 1, 2], 5, size=[3, 3], rng=rng)
     compare_sample_values(integers, [0], [5], size=[1], rng=rng)
-    compare_sample_values(integers, at.as_tensor_variable([-1]), [1], size=[1], rng=rng)
+    compare_sample_values(integers, pt.as_tensor_variable([-1]), [1], size=[1], rng=rng)
     compare_sample_values(
         integers,
-        at.as_tensor_variable([-1]),
+        pt.as_tensor_variable([-1]),
         [1],
-        size=at.as_tensor_variable([1]),
+        size=pt.as_tensor_variable([1]),
         rng=rng,
     )
 
@@ -1393,7 +1393,7 @@ def test_choice_samples():
     compare_sample_values(choice, [1, 2, 3], 1)
 
     compare_sample_values(
-        choice, [1, 2, 3], 1, p=at.as_tensor([1 / 3.0, 1 / 3.0, 1 / 3.0])
+        choice, [1, 2, 3], 1, p=pt.as_tensor([1 / 3.0, 1 / 3.0, 1 / 3.0])
     )
 
     # p must be 1-dimensional.
@@ -1401,11 +1401,11 @@ def test_choice_samples():
     # time in some situations using static shape analysis.
     with pytest.raises(ValueError):
         rng = np.random.default_rng()
-        rng_at = shared(rng, borrow=True)
-        choice(a=[1, 2], p=at.as_tensor([[0.1, 0.9], [0.3, 0.7]]), rng=rng_at).eval()
+        rng_pt = shared(rng, borrow=True)
+        choice(a=[1, 2], p=pt.as_tensor([[0.1, 0.9], [0.3, 0.7]]), rng=rng_pt).eval()
 
     compare_sample_values(choice, [1, 2, 3], (10, 2), replace=True)
-    compare_sample_values(choice, at.as_tensor_variable([1, 2, 3]), 2, replace=True)
+    compare_sample_values(choice, pt.as_tensor_variable([1, 2, 3]), 2, replace=True)
 
 
 def test_choice_infer_shape():
diff --git a/tests/tensor/random/test_op.py b/tests/tensor/random/test_op.py
index 63661bd177..604dede988 100644
--- a/tests/tensor/random/test_op.py
+++ b/tests/tensor/random/test_op.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import config, function
 from pytensor.gradient import NullTypeGradError, grad
 from pytensor.graph.replace import vectorize_node
@@ -80,8 +80,8 @@ def test_RandomVariable_basics():
         rv.make_node(rng=1)
 
     # `RandomVariable._infer_shape` should handle no parameters
-    rv_shape = rv._infer_shape(at.constant([]), (), [])
-    assert rv_shape.equals(at.constant([], dtype="int64"))
+    rv_shape = rv._infer_shape(pt.constant([]), (), [])
+    assert rv_shape.equals(pt.constant([], dtype="int64"))
 
     # Integer-specified `dtype`
     dtype_1 = all_dtypes[1]
@@ -114,28 +114,28 @@ def test_RandomVariable_bcast():
     res = rv(mu, sd, size=(s1, s2, s3))
     assert res.broadcastable == (False,) * 3
 
-    size = at.as_tensor((1, 2, 3), dtype=np.int32).astype(np.int64)
+    size = pt.as_tensor((1, 2, 3), dtype=np.int32).astype(np.int64)
     res = rv(mu, sd, size=size)
     assert res.broadcastable == (True, False, False)
 
-    res = rv(0, 1, size=at.as_tensor(1, dtype=np.int64))
+    res = rv(0, 1, size=pt.as_tensor(1, dtype=np.int64))
     assert res.broadcastable == (True,)
 
-    res = rv(0, 1, size=(at.as_tensor(1, dtype=np.int32), s3))
+    res = rv(0, 1, size=(pt.as_tensor(1, dtype=np.int32), s3))
     assert res.broadcastable == (True, False)
 
 
 def test_RandomVariable_bcast_specify_shape():
     rv = RandomVariable("normal", 0, [0, 0], config.floatX, inplace=True)
 
-    s1 = at.as_tensor(1, dtype=np.int64)
+    s1 = pt.as_tensor(1, dtype=np.int64)
     s2 = iscalar()
     s2.tag.test_value = 2
     s3 = iscalar()
     s3.tag.test_value = 3
     s3 = Assert("testing")(s3, eq(s1, 1))
 
-    size = specify_shape(at.as_tensor([s1, s3, s2, s2, s1]), (5,))
+    size = specify_shape(pt.as_tensor([s1, s3, s2, s2, s1]), (5,))
     mu = tensor(dtype=config.floatX, shape=(None, None, 1))
     mu.tag.test_value = np.random.normal(size=(2, 2, 1)).astype(config.floatX)
 
@@ -173,7 +173,7 @@ def test_RandomVariable_floatX():
     ],
 )
 def test_random_maker_op(seed, maker_op, numpy_res):
-    seed = at.as_tensor_variable(seed)
+    seed = pt.as_tensor_variable(seed)
     z = function(inputs=[], outputs=[maker_op(seed)])()
     aes_res = z[0]
     assert maker_op.random_type.values_eq(aes_res, numpy_res)
diff --git a/tests/tensor/rewriting/test_basic.py b/tests/tensor/rewriting/test_basic.py
index cd5d3cc255..c62398e36f 100644
--- a/tests/tensor/rewriting/test_basic.py
+++ b/tests/tensor/rewriting/test_basic.py
@@ -4,8 +4,8 @@
 import pytest
 
 import pytensor
-import pytensor.scalar as aes
-import pytensor.tensor as at
+import pytensor.scalar as ps
+import pytensor.tensor as pt
 from pytensor import shared
 from pytensor.compile import optdb
 from pytensor.compile.function import function
@@ -51,9 +51,9 @@
     mul,
     neq,
 )
-from pytensor.tensor.math import pow as at_pow
+from pytensor.tensor.math import pow as pt_pow
 from pytensor.tensor.math import softplus, sqrt, sub
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import true_div
 from pytensor.tensor.rewriting.basic import (
     assert_op,
@@ -196,42 +196,42 @@ def test_local_useless_fill():
     z_ = (np.random.random((5,)) * 5).astype("int64")
 
     # basic case
-    f = function([x], at.fill(x, x) * 2, mode=rewrite_mode)
+    f = function([x], pt.fill(x, x) * 2, mode=rewrite_mode)
     assert [node.op for node in f.maker.fgraph.toposort()] == [mul]
     res = f(x_)
     exp_res = np.broadcast_to(x_, x_.shape) * 2
     assert np.array_equal(res, exp_res)
 
     # basic case
-    f = function([x, y], at.second(y, x) * 2, mode=rewrite_mode)
+    f = function([x, y], pt.second(y, x) * 2, mode=rewrite_mode)
     assert [node.op for node in f.maker.fgraph.toposort()] == [mul]
     res = f(x_, y_)
     exp_res = np.broadcast_to(x_, y_.shape) * 2
     assert np.array_equal(res, exp_res)
 
     # basic case
-    f = function([x, y], at.fill(x, y) * 2, mode=rewrite_mode)
+    f = function([x, y], pt.fill(x, y) * 2, mode=rewrite_mode)
     assert [node.op for node in f.maker.fgraph.toposort()] == [mul]
     res = f(x_, y_)
     exp_res = np.broadcast_to(y_, x_.shape) * 2
     assert np.array_equal(res, exp_res)
 
     # now with different type(cast)
-    f = function([x, z], at.fill(z, x) * 2, mode=rewrite_mode)
+    f = function([x, z], pt.fill(z, x) * 2, mode=rewrite_mode)
     assert [node.op for node in f.maker.fgraph.toposort()] == [mul]
     res = f(x_, z_)
     exp_res = np.broadcast_to(x_, z_.shape) * 2
     assert np.array_equal(res, exp_res)
 
     # now with different type(cast)
-    f = function([x, z], at.fill(x, z) * 2, mode=rewrite_mode)
+    f = function([x, z], pt.fill(x, z) * 2, mode=rewrite_mode)
     assert [node.op for node in f.maker.fgraph.toposort()] == [mul]
     res = f(x_, z_)
     exp_res = np.broadcast_to(z_, x_.shape) * 2
     assert np.array_equal(res, exp_res)
 
     # now cutting out the input ??
-    f = function([x, y], at.fill(x, y) * 2, mode=rewrite_mode)
+    f = function([x, y], pt.fill(x, y) * 2, mode=rewrite_mode)
     assert [node.op for node in f.maker.fgraph.toposort()] == [mul]
     res = f(x_, y_)
     exp_res = np.broadcast_to(y_, x_.shape) * 2
@@ -245,7 +245,7 @@ def test_local_fill_to_alloc():
     x_ = np.random.random((5,))
     m_ = np.random.random((5, 5))
 
-    y = at.fill(m, x)
+    y = pt.fill(m, x)
 
     mode = rewrite_mode.including("stabilize", "local_fill_to_alloc").excluding(
         "useless", "local_useless_fill"
@@ -258,7 +258,7 @@ def test_local_fill_to_alloc():
     exp_res = np.broadcast_to(x_, m_.shape)
     assert np.array_equal(res, exp_res)
 
-    y = at.fill(x, m)
+    y = pt.fill(x, m)
 
     f = function([m, x], y, mode=mode)
 
@@ -276,7 +276,7 @@ def setup_method(self):
     def test_inconsistent_shared(self, shape_unsafe):
         # These shapes don't match!
         x = shared(self.rng.standard_normal((3, 7)))
-        a = at.alloc(x, 6, 7)
+        a = pt.alloc(x, 6, 7)
 
         assert a.owner and isinstance(a.owner.op, Alloc)
 
@@ -310,16 +310,16 @@ def test_inconsistent_shared(self, shape_unsafe):
 
     def test_basic_fill(self):
         x = matrix("x")
-        y = at.fill(x, x)
+        y = pt.fill(x, x)
 
-        # The rewrite `locall_fill_to_alloc` should call `at.alloc`,
+        # The rewrite `locall_fill_to_alloc` should call `pt.alloc`,
         # which should return `x` and not `alloc(x, ...)`
         f = function([x], [y], mode=rewrite_mode.including("local_fill_to_alloc"))
         assert not any(isinstance(node.op, Alloc) for node in f.maker.fgraph.toposort())
 
     def test_basic_tile(self):
         x = matrix("x")
-        y = at.tile(x, (1,) * 2)
+        y = pt.tile(x, (1,) * 2)
 
         mode = rewrite_mode.including(
             "local_dimshuffle_lift",
@@ -333,10 +333,10 @@ def test_basic_tile(self):
     @pytest.mark.parametrize(
         "x, has_alloc",
         [
-            (at.alloc(np.ones((2,)), 1, 3, 2), True),
-            (at.alloc(np.array(1.0), 1, 1), False),
-            (at.alloc(np.ones((1, 1)), 1, 1, 2), True),
-            (at.alloc(np.ones((1, 1)), 1, 2), True),
+            (pt.alloc(np.ones((2,)), 1, 3, 2), True),
+            (pt.alloc(np.array(1.0), 1, 1), False),
+            (pt.alloc(np.ones((1, 1)), 1, 1, 2), True),
+            (pt.alloc(np.ones((1, 1)), 1, 2), True),
         ],
     )
     def test_useless_alloc_with_shape_one(self, x, has_alloc):
@@ -368,7 +368,7 @@ def test_advanced_inc_subtensor(self):
         x = vector("x")
         y = scalar("y")
         i = matrix("i", dtype="int64")
-        z = advanced_inc_subtensor(x, at.alloc(y, *i.shape), i)
+        z = advanced_inc_subtensor(x, pt.alloc(y, *i.shape), i)
         mode1 = self.mode.excluding(self.rewrite_name)
         mode2 = self.mode.including(self.rewrite_name)
         f1 = function([x, i, y], z, mode=mode1)
@@ -400,7 +400,7 @@ def test_advanced_inc_subtensor1(self):
         x = vector("x")
         y = scalar("y")
         i = vector("i", dtype="int64")
-        z = advanced_inc_subtensor1(x, at.alloc(y, *i.shape), i)
+        z = advanced_inc_subtensor1(x, pt.alloc(y, *i.shape), i)
         mode1 = self.mode.excluding(self.rewrite_name)
         mode2 = self.mode.including(self.rewrite_name)
         f1 = function([x, i, y], z, mode=mode1)
@@ -431,7 +431,7 @@ def test_incsubtensor(self):
         x = vector("x")
         y = scalar("y")
         i = scalar("i", dtype="int64")
-        z = inc_subtensor(x[:i], at.alloc(y, i))
+        z = inc_subtensor(x[:i], pt.alloc(y, i))
         mode1 = self.mode.excluding(self.rewrite_name)
         mode2 = self.mode.including(self.rewrite_name)
         f1 = function([x, i, y], z, mode=mode1)
@@ -573,7 +573,7 @@ def test_local_useless_unbroadcast(self):
 
     def test_local_unbroadcast_lift(self):
         x = tensor(dtype="float64", shape=(1, 1))
-        y = unbroadcast(at.exp(unbroadcast(x, 0)), 1)
+        y = unbroadcast(pt.exp(unbroadcast(x, 0)), 1)
 
         assert (
             sum(
@@ -606,7 +606,7 @@ def test_eq(self):
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 1
         assert isinstance(topo[0].op, Elemwise)
-        assert isinstance(topo[0].op.scalar_op, aes.EQ)
+        assert isinstance(topo[0].op.scalar_op, ps.EQ)
         f2 = function([x], eq(x, x), mode=self.mode)
         assert np.all(f2(vx) == np.ones((5, 4)))
         topo2 = f2.maker.fgraph.toposort()
@@ -626,7 +626,7 @@ def test_neq(self):
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 1
         assert isinstance(topo[0].op, Elemwise)
-        assert isinstance(topo[0].op.scalar_op, aes.NEQ)
+        assert isinstance(topo[0].op.scalar_op, ps.NEQ)
         f2 = function([x], neq(x, x), mode=self.mode)
         assert np.all(f2(vx) == np.zeros((5, 4)))
         topo2 = f2.maker.fgraph.toposort()
@@ -648,7 +648,7 @@ def test_mul(self):
         topo2 = f2.maker.fgraph.toposort()
         assert len(topo2) == 1
         assert isinstance(topo2[0].op, Elemwise)
-        assert isinstance(topo2[0].op.scalar_op, aes.Mul)
+        assert isinstance(topo2[0].op.scalar_op, ps.Mul)
 
     def test_add(self):
         x = dmatrix()
@@ -665,13 +665,13 @@ def test_add(self):
         topo2 = f2.maker.fgraph.toposort()
         assert len(topo2) == 1
         assert isinstance(topo2[0].op, Elemwise)
-        assert isinstance(topo2[0].op.scalar_op, aes.Add)
+        assert isinstance(topo2[0].op.scalar_op, ps.Add)
 
     def test_identity(self):
-        # aes.identity is used in 2 Elemwise functions:
+        # ps.identity is used in 2 Elemwise functions:
         # tensor_copy, and view
         x = matrix()
-        f = function([x], at.tensor_copy(x), mode=self.mode)
+        f = function([x], pt.tensor_copy(x), mode=self.mode)
         vx = np.random.random((5, 4)).astype(config.floatX)
         f(vx)
         topo = f.maker.fgraph.toposort()
@@ -686,37 +686,37 @@ def setup_method(self):
 
     def test_consecutive(self):
         x = fmatrix()
-        o = Elemwise(aes.Cast(aes.ScalarType("float64")))(x.astype("float64"))
+        o = Elemwise(ps.Cast(ps.ScalarType("float64")))(x.astype("float64"))
         f = function([x], o, mode=self.mode)
         dx = np.random.random((5, 4)).astype("float32")
         f(dx)
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 1
-        assert isinstance(topo[0].op.scalar_op, aes.basic.Cast)
+        assert isinstance(topo[0].op.scalar_op, ps.basic.Cast)
 
         x = dmatrix()
-        o = Elemwise(aes.Cast(aes.ScalarType("float32")))(x.astype("float32"))
+        o = Elemwise(ps.Cast(ps.ScalarType("float32")))(x.astype("float32"))
         f = function([x], o, mode=self.mode)
         dx = np.random.random((5, 4))
         f(dx)
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 1
-        assert isinstance(topo[0].op.scalar_op, aes.basic.Cast)
+        assert isinstance(topo[0].op.scalar_op, ps.basic.Cast)
 
     def test_upcast(self):
         # Upcast followed by any other cast
         x = fmatrix()
-        o = Elemwise(aes.Cast(aes.ScalarType("complex128")))(x.astype("complex64"))
+        o = Elemwise(ps.Cast(ps.ScalarType("complex128")))(x.astype("complex64"))
         f = function([x], o, mode=self.mode)
         dx = np.random.random((5, 4)).astype("float32")
         f(dx)
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 1
-        assert isinstance(topo[0].op.scalar_op, aes.basic.Cast)
+        assert isinstance(topo[0].op.scalar_op, ps.basic.Cast)
 
         # Upcast followed by a downcast back to the base type
         x = fmatrix()
-        o = Elemwise(aes.Cast(aes.ScalarType("float32")))(x.astype("float64"))
+        o = Elemwise(ps.Cast(ps.ScalarType("float32")))(x.astype("float64"))
         f = function([x], o, mode=self.mode)
         dx = np.random.random((5, 4)).astype("float32")
         f(dx)
@@ -727,13 +727,13 @@ def test_upcast(self):
         # Downcast followed by an upcast back to the base type
         # The rewrite shouldn't be applied
         x = dmatrix()
-        o = Elemwise(aes.Cast(aes.ScalarType("float64")))(x.astype("float32"))
+        o = Elemwise(ps.Cast(ps.ScalarType("float64")))(x.astype("float32"))
         f = function([x], o, mode=self.mode)
         dx = np.random.random((5, 4))
         f(dx)
         topo = f.maker.fgraph.toposort()
         assert (
-            len(topo) == 1 and isinstance(topo[0].op.scalar_op, aes.basic.Composite)
+            len(topo) == 1 and isinstance(topo[0].op.scalar_op, ps.basic.Composite)
         ) or (len(topo) > 1)
 
 
@@ -749,7 +749,7 @@ def test_constant_folding():
     # Test that we do not crash when constant folding elemwise scalar
     # as they should not generate c code.
 
-    x = at.constant(3)
+    x = pt.constant(3)
     assert x.ndim == 0
     mode = get_mode("FAST_COMPILE").excluding("fusion")
     f = function([], [x * 2, x + x], mode=mode)
@@ -782,7 +782,7 @@ def test_constant_get_stabilized():
     assert f2.maker.fgraph.toposort()[0].op == softplus
     assert f2(800) == 800
 
-    x = at.as_tensor_variable(800)
+    x = pt.as_tensor_variable(800)
     y = log(1 + exp(x))
     f = function([], y, mode=mode)
     # When this error is fixed, the following line should be ok.
@@ -855,8 +855,8 @@ def test_local_mul_switch_sink(self):
                 (dscalar("x"), self.xs),
             ]:
                 y = mul(
-                    at.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
-                    at.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
                 )
                 f = self.function_remove_nan(
                     [condition[0], x[0], c], [y], mode=self.mode
@@ -876,7 +876,7 @@ def test_local_mul_switch_sink(self):
 
         # This case caused a missed rewrite in the past.
         x = dscalar("x")
-        y = at.switch(x < 7, x, sqrt(x - 7))
+        y = pt.switch(x < 7, x, sqrt(x - 7))
         f = self.function_remove_nan([x], pytensor.gradient.grad(y, x), self.mode)
         assert f(5) == 1, f(5)
 
@@ -895,8 +895,8 @@ def test_local_div_switch_sink(self):
                 (dscalar("x"), self.xs),
             ]:
                 y = true_div(
-                    at.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
-                    at.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
                 )
                 f = self.function_remove_nan(
                     [condition[0], x[0], c], [y], mode=self.mode
@@ -934,14 +934,14 @@ def setup_method(self):
     def test_const(self, dtype1, dtype2, cond):
         x = matrix("x", dtype=dtype1)
         y = matrix("y", dtype=dtype2)
-        z = at.switch(cond, x, y)
+        z = pt.switch(cond, x, y)
         f = function([x, y], z, mode=self.mode)
         assert not any(
             node.op
             for node in f.maker.fgraph.toposort()
             if (
                 isinstance(node.op, Elemwise)
-                and isinstance(node.op.scalar_op, aes.basic.Switch)
+                and isinstance(node.op.scalar_op, ps.basic.Switch)
             )
         )
         vx = np.array([[1, 2, 3], [4, 5, 6]], dtype=dtype1)
@@ -956,9 +956,9 @@ def test_const(self, dtype1, dtype2, cond):
     def test_left_is_right(self, dtype1):
         x = matrix("x", dtype=dtype1)
         varc = matrix("varc", dtype=dtype1)
-        z1 = at.switch(1, x, x)
-        z0 = at.switch(0, x, x)
-        z2 = at.switch(varc, x, x)
+        z1 = pt.switch(1, x, x)
+        z0 = pt.switch(0, x, x)
+        z2 = pt.switch(varc, x, x)
         f1 = function([x], z1, mode=self.mode)
         f0 = function([x], z0, mode=self.mode)
         f2 = function([x, varc], z2, mode=self.mode)
@@ -987,11 +987,11 @@ def test_left_is_right(self, dtype1):
     )
     def test_shape_le_0(self, dtype1):
         x = matrix("x", dtype=dtype1)
-        z0 = at.switch(le(x.shape[0], 0), 0, x.shape[0])
+        z0 = pt.switch(le(x.shape[0], 0), 0, x.shape[0])
         f0 = function([x], z0, mode=self.mode)
         assert isinstance(f0.maker.fgraph.toposort()[0].op, Shape_i)
 
-        z1 = at.switch(le(x.shape[1], 0), 0, x.shape[1])
+        z1 = pt.switch(le(x.shape[1], 0), 0, x.shape[1])
         f1 = function([x], z1, mode=self.mode)
         assert isinstance(f1.maker.fgraph.toposort()[0].op, Shape_i)
 
@@ -1004,25 +1004,25 @@ def test_broadcasting_1(self):
         x = matrix("x", dtype="int32")
         y = vector("y", dtype="int64")
 
-        z = at.switch(1, x, y)
+        z = pt.switch(1, x, y)
         f = function([x, y], z, mode=self.mode)
 
         start_var = f.maker.fgraph.outputs[0]
         assert isinstance(start_var.owner.op, Elemwise)
-        assert isinstance(start_var.owner.op.scalar_op, aes.basic.Cast)
-        assert not any(node.op == at.switch for node in f.maker.fgraph.toposort())
+        assert isinstance(start_var.owner.op.scalar_op, ps.basic.Cast)
+        assert not any(node.op == pt.switch for node in f.maker.fgraph.toposort())
 
         vx = np.array([[1, 2, 3], [4, 5, 6]], dtype="int32")
         vy = np.array([10, 11, 12], dtype="int64")
         np_res = np.where(1, vx, vy)
         assert np.array_equal(f(vx, vy), np_res)
 
-        z = at.switch(0, x, y)
+        z = pt.switch(0, x, y)
         f = function([x, y], z, mode=self.mode)
 
         assert isinstance(f.maker.fgraph.outputs[0].owner.op, Alloc)
         assert f.maker.fgraph.inputs[1] == f.maker.fgraph.outputs[0].owner.inputs[0]
-        assert not any(node.op == at.switch for node in f.maker.fgraph.toposort())
+        assert not any(node.op == pt.switch for node in f.maker.fgraph.toposort())
 
         vx = np.array([[1, 2, 3], [4, 5, 6]], dtype="int32")
         vy = np.array([10, 11, 12], dtype="int64")
@@ -1035,22 +1035,22 @@ def test_broadcasting_2(self):
         x = vector("x", dtype="int32")
         y = matrix("y", dtype="int64")
 
-        z = at.switch(1, x, y)
+        z = pt.switch(1, x, y)
         f = function([x, y], z, mode=self.mode)
 
         assert isinstance(f.maker.fgraph.outputs[0].owner.op, Alloc)
-        assert not any(node.op == at.switch for node in f.maker.fgraph.toposort())
+        assert not any(node.op == pt.switch for node in f.maker.fgraph.toposort())
 
         vx = np.array([4, 5, 6], dtype="int32")
         vy = np.array([[7, 8, 9], [10, 11, 12]], dtype="int64")
         np_res = np.where(1, vx, vy)
         assert np.array_equal(f(vx, vy), np_res)
 
-        z = at.switch(0, x, y)
+        z = pt.switch(0, x, y)
         f = function([x, y], z, mode=self.mode)
 
         assert isinstance(f.maker.fgraph.outputs[0].owner.op, DeepCopyOp)
-        assert not any(node.op == at.switch for node in f.maker.fgraph.toposort())
+        assert not any(node.op == pt.switch for node in f.maker.fgraph.toposort())
 
         vx = np.array([4, 5, 6], dtype="int32")
         vy = np.array([[7, 8, 9], [10, 11, 12]], dtype="int64")
@@ -1062,14 +1062,14 @@ def test_broadcasting_3(self):
 
         x = matrix("x", dtype="int32")
         y = vector("y", dtype="int64")
-        z = at.switch(x, y, y)
+        z = pt.switch(x, y, y)
         f = function([x, y], z, mode=self.mode)
         vx = np.array([[0, 1], [1, 0]], dtype="int32")
         vy = np.array([7, 8], dtype="int64")
         utt.assert_allclose(f(vx, vy), np.where(vx, vy, vy))
 
         assert isinstance(f.maker.fgraph.outputs[0].owner.op, Alloc)
-        assert not any(node.op == at.switch for node in f.maker.fgraph.toposort())
+        assert not any(node.op == pt.switch for node in f.maker.fgraph.toposort())
 
 
 class TestLocalMergeSwitchSameCond:
@@ -1090,15 +1090,15 @@ class TestLocalMergeSwitchSameCond:
             le,
             eq,
             neq,
-            at_pow,
+            pt_pow,
         ],
     )
     def test_elemwise_float_ops(self, op):
         # float Ops
         mats = matrices("cabxy")
         c, a, b, x, y = mats
-        s1 = at.switch(c, a, b)
-        s2 = at.switch(c, x, y)
+        s1 = pt.switch(c, a, b)
+        s2 = pt.switch(c, x, y)
 
         g = rewrite(FunctionGraph(mats, [op(s1, s2)]))
         assert debugprint(g, file="str").count("Switch") == 1
@@ -1115,8 +1115,8 @@ def test_elemwise_int_ops(self, op):
         # integer Ops
         mats = imatrices("cabxy")
         c, a, b, x, y = mats
-        s1 = at.switch(c, a, b)
-        s2 = at.switch(c, x, y)
+        s1 = pt.switch(c, a, b)
+        s2 = pt.switch(c, x, y)
         g = rewrite(FunctionGraph(mats, [op(s1, s2)]))
         assert debugprint(g, file="str").count("Switch") == 1
 
@@ -1125,10 +1125,10 @@ def test_elemwise_multi_inputs(self, op):
         # add/mul with more than two inputs
         mats = imatrices("cabxy")
         c, a, b, x, y = mats
-        s1 = at.switch(c, a, b)
-        s2 = at.switch(c, x, y)
+        s1 = pt.switch(c, a, b)
+        s2 = pt.switch(c, x, y)
         u, v = matrices("uv")
-        s3 = at.switch(c, u, v)
+        s3 = pt.switch(c, u, v)
         g = rewrite(FunctionGraph(mats + [u, v], [op(s1, s2, s3)]))
         assert debugprint(g, file="str").count("Switch") == 1
 
@@ -1142,14 +1142,14 @@ class TestLocalOptAlloc:
 
     def test_sum_upcast(self):
         s = lscalar()
-        a = at.alloc(np.asarray(5, dtype=self.dtype), s, s)
+        a = pt.alloc(np.asarray(5, dtype=self.dtype), s, s)
         with config.change_flags(warn_float64="raise"):
             f = function([s], a.sum())
             f(5)
 
     def test_prod_upcast(self):
         s = lscalar()
-        a = at.alloc(np.asarray(5, dtype=self.dtype), s, s)
+        a = pt.alloc(np.asarray(5, dtype=self.dtype), s, s)
 
         with config.change_flags(warn_float64="raise"):
             f = function([s], a.prod())
@@ -1158,7 +1158,7 @@ def test_prod_upcast(self):
     @config.change_flags(on_opt_error="raise")
     def test_sum_bool_upcast(self):
         s = lscalar()
-        a = at.alloc(np.asarray(True, dtype="bool"), s, s)
+        a = pt.alloc(np.asarray(True, dtype="bool"), s, s)
         f = function([s], a.sum())
         f(5)
         # test with user specified dtype
@@ -1176,7 +1176,7 @@ class TestLocalOptAllocF16(TestLocalOptAlloc):
 def test_local_join_1():
     # test for vector
     a = vector("a")
-    s = at.stack([a])
+    s = pt.stack([a])
     f = function([a], s, mode=rewrite_mode)
     val = f([1])
     assert np.all(val == [1])
@@ -1217,7 +1217,7 @@ def test_local_join_empty():
     # test for vector, vector, empty to vector
     empty_vec = np.asarray([], dtype=config.floatX)
     a = vector("a")
-    s = at.join(0, a, a, empty_vec)
+    s = pt.join(0, a, a, empty_vec)
     f = function([a], s, mode=rewrite_mode)
     val = f([1])
     assert np.all(val == [1])
@@ -1247,7 +1247,7 @@ def test_local_join_empty():
     assert f.maker.fgraph.outputs[0].dtype == config.floatX
     # test for vector, vector, empty to matrix
     # We can't rewrite this case.
-    s = at.stack([a, a, empty_vec])
+    s = pt.stack([a, a, empty_vec])
     f = function([a], s, mode=rewrite_mode)
     val = f([])
     assert np.all(val == [1])
@@ -1279,7 +1279,7 @@ def test_local_join_make_vector():
     a, b, c, d, e = scalars("abcde")
     v = vector("v")
     mv = MakeVector(config.floatX)
-    s = at.join(0, mv(a), v, mv(b, c), mv(d, e))
+    s = pt.join(0, mv(a), v, mv(b, c), mv(d, e))
     f = function([a, b, c, d, e, v], s, mode=rewrite_mode)
     val = f(1, 2, 3, 4, 6, [7, 8])
     assert np.all(val == [1, 7, 8, 2, 3, 4, 6])
@@ -1353,8 +1353,8 @@ def test_local_sum_make_vector():
 def test_local_tensor_scalar_tensor(dtype):
     t_type = TensorType(dtype=dtype, shape=())
     t = t_type()
-    s = at.scalar_from_tensor(t)
-    t2 = at.tensor_from_scalar(s)
+    s = pt.scalar_from_tensor(t)
+    t2 = pt.tensor_from_scalar(s)
 
     f = function([t], t2, mode=rewrite_mode)
     e = f.maker.fgraph.toposort()
@@ -1381,10 +1381,10 @@ def test_local_tensor_scalar_tensor(dtype):
     ],
 )
 def test_local_scalar_tensor_scalar(dtype):
-    s_type = aes.ScalarType(dtype=dtype)
+    s_type = ps.ScalarType(dtype=dtype)
     s = s_type()
-    t = at.tensor_from_scalar(s)
-    s2 = at.scalar_from_tensor(t)
+    t = pt.tensor_from_scalar(s)
+    s2 = pt.scalar_from_tensor(t)
 
     f = function([s], s2, mode=rewrite_mode)
     e = f.maker.fgraph.toposort()
@@ -1396,8 +1396,8 @@ def test_local_scalar_tensor_scalar(dtype):
 def test_local_useless_split():
     x = matrix("x")
     splits = ivector("splits")
-    rewritten = at.split(x, splits, n_splits=1)
-    not_rewritten = at.split(x, splits, n_splits=3)
+    rewritten = pt.split(x, splits, n_splits=1)
+    not_rewritten = pt.split(x, splits, n_splits=3)
 
     mode = get_default_mode().including("local_useless_split")
     f_rewritten = function([x, splits], rewritten, mode=mode)
@@ -1432,7 +1432,7 @@ def test_local_useless_split():
 @pytest.mark.parametrize("i", list(range(1, 4)))
 def test_local_flatten_lift(i):
     x = tensor4()
-    out = at.flatten(exp(x), i)
+    out = pt.flatten(exp(x), i)
     assert out.ndim == i
     mode = get_default_mode()
     mode = mode.including("local_reshape_lift")
@@ -1444,7 +1444,7 @@ def test_local_flatten_lift(i):
     assert shape_out_np == out_np.shape
 
     reshape_nodes = [n for n in topo if isinstance(n.op, Reshape)]
-    assert len(reshape_nodes) == 1 and at.is_flat(reshape_nodes[0].outputs[0], ndim=i)
+    assert len(reshape_nodes) == 1 and pt.is_flat(reshape_nodes[0].outputs[0], ndim=i)
     assert isinstance(topo[-1].op, Elemwise)
 
 
@@ -1484,7 +1484,7 @@ def test_matrix_col(self):
 
 def test_local_upcast_elemwise_constant_inputs():
     s = dvector("s")
-    x = at_sum(log(10**s))
+    x = pt_sum(log(10**s))
     f = function([s], [pytensor.gradient.grad(x, s)])
     f([-42, -2.1, -1, -0.5, 0, 0.2, 1, 2, 12])
 
@@ -1497,7 +1497,7 @@ def test_local_upcast_elemwise_constant_inputs():
 def test_assert_op_gradient():
     x = vector("x")
     assert_op = Assert()
-    cost = at_sum(assert_op(x, x.size < 2))
+    cost = pt_sum(assert_op(x, x.size < 2))
     grad = pytensor.gradient.grad(cost, x)
     func = function([x], grad)
 
@@ -1518,7 +1518,7 @@ def test_local_merge_alloc():
     m = fscalar("m")
     # case 1
     # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
-    output = at.alloc(at.alloc(m, 1, y, 1, 1), x, y, z, w)
+    output = pt.alloc(pt.alloc(m, 1, y, 1, 1), x, y, z, w)
     f = function([m, x, y, z, w], output, mode=rewrite_mode)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
@@ -1528,7 +1528,7 @@ def test_local_merge_alloc():
 
     # case 2
     # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
-    output = at.alloc(at.alloc(m, y, 1, 1), x, y, z, w)
+    output = pt.alloc(pt.alloc(m, y, 1, 1), x, y, z, w)
     f = function([m, x, y, z, w], output, mode=rewrite_mode)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 1
@@ -1539,7 +1539,7 @@ def test_local_merge_alloc():
     # case 3
     # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
     #   Alloc(m, x, assert(y1, y1==y2), z, w)
-    output = at.alloc(at.alloc(m, y, 1, 1), x, y2, z, w)
+    output = pt.alloc(pt.alloc(m, y, 1, 1), x, y2, z, w)
     f = function([m, x, y, y2, z, w], output, mode=rewrite_mode)
     topo = f.maker.fgraph.toposort()
     assert len(topo) == 3
@@ -1564,7 +1564,7 @@ def test_local_useless_alloc():
 
     # case 1
     # Alloc(Alloc(m, x, 1, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
-    output = at.alloc(at.alloc(m, 1, y, 1, 1), x, y, z, w)
+    output = pt.alloc(pt.alloc(m, 1, y, 1, 1), x, y, z, w)
     g = FunctionGraph([m, x, y, z, w], [output])
 
     useless_alloc.rewrite(g)
@@ -1577,7 +1577,7 @@ def test_local_useless_alloc():
 
     # case 2
     # Alloc(Alloc(m, y, 1, 1), x, y, z, w) -> Alloc(m, x, y, z, w)
-    output = at.alloc(at.alloc(m, y, 1, 1), x, y, z, w)
+    output = pt.alloc(pt.alloc(m, y, 1, 1), x, y, z, w)
     g = FunctionGraph([m, x, y, z, w], [output])
 
     useless_alloc.rewrite(g)
@@ -1591,7 +1591,7 @@ def test_local_useless_alloc():
     # case 3
     # Alloc(Alloc(m, y1, 1, 1), x, y2, z, w) ->
     #   Alloc(m, x, assert(y1, y1==y2), z, w)
-    output = at.alloc(at.alloc(m, y, 1, 1), x, y2, z, w)
+    output = pt.alloc(pt.alloc(m, y, 1, 1), x, y2, z, w)
     g = FunctionGraph([m, x, y, y2, z, w], [output])
 
     useless_alloc.rewrite(g)
@@ -1606,7 +1606,7 @@ def test_local_useless_alloc():
 
 def test_local_merge_consecutive_specify_shape():
     x = matrix()
-    s = at.as_tensor([iscalar(), iscalar()])
+    s = pt.as_tensor([iscalar(), iscalar()])
     y = specify_shape(specify_shape(x, s), s)
 
     y_fg = FunctionGraph(outputs=[y], copy_inputs=False)
@@ -1662,23 +1662,23 @@ def setup_method(self):
         self.mat = matrix("mat", dtype=self.dtype)
         self.tens = tensor3("tens", dtype=self.dtype)
 
-        self.alloc_wo_dep = at.alloc(self.vec, 2, 2)
-        self.alloc_wo_dep_broad = at.alloc(self.vec, 1, 2)
-        self.alloc_w_dep = at.alloc(self.vec, *self.mat.shape)
-        self.alloc_w_dep_broad = at.alloc(self.vec, 1, *self.mat.shape)
-        self.alloc_w_dep_broad2 = at.alloc(
+        self.alloc_wo_dep = pt.alloc(self.vec, 2, 2)
+        self.alloc_wo_dep_broad = pt.alloc(self.vec, 1, 2)
+        self.alloc_w_dep = pt.alloc(self.vec, *self.mat.shape)
+        self.alloc_w_dep_broad = pt.alloc(self.vec, 1, *self.mat.shape)
+        self.alloc_w_dep_broad2 = pt.alloc(
             self.vec, self.mat.shape[0], self.mat.shape[1], 1
         )
-        self.alloc_w_dep_tens = at.alloc(
+        self.alloc_w_dep_tens = pt.alloc(
             self.vec, self.tens.shape[0], self.tens.shape[1]
         )
-        self.tv_wo_dep = at.alloc(self.vec, 5, 5)
-        self.tm_wo_dep = at.alloc(self.mat, 5, 5, 5)
+        self.tv_wo_dep = pt.alloc(self.vec, 5, 5)
+        self.tm_wo_dep = pt.alloc(self.mat, 5, 5, 5)
         self.s = iscalar("s")
-        self.tv_w_dep = at.alloc(self.vec, self.s, self.s)
-        self.tm_w_dep = at.alloc(self.mat, 5, 5, 5)
+        self.tv_w_dep = pt.alloc(self.vec, self.s, self.s)
+        self.tm_w_dep = pt.alloc(self.mat, 5, 5, 5)
         self.row = row(dtype=self.dtype)
-        self.o = at.alloc(self.row, 5, 5)
+        self.o = pt.alloc(self.row, 5, 5)
 
     @staticmethod
     def verify_op_count(f, count, cls):
@@ -1694,41 +1694,41 @@ def verify_op_count(f, count, cls):
     @pytest.mark.parametrize(
         "expr, x_shape, y_shape, needs_alloc",
         [
-            (lambda x, y: at.mul(at.alloc(1, *y.shape), x), (1, 2), (3, 2), True),
-            (lambda x, y: at.mul(at.alloc(1, *y.shape), x), (1, 1), (1, 1), False),
-            (lambda x, y: at.mul(x, at.alloc(y, 2, 3)), (1, 3), (2, 3), False),
+            (lambda x, y: pt.mul(pt.alloc(1, *y.shape), x), (1, 2), (3, 2), True),
+            (lambda x, y: pt.mul(pt.alloc(1, *y.shape), x), (1, 1), (1, 1), False),
+            (lambda x, y: pt.mul(x, pt.alloc(y, 2, 3)), (1, 3), (2, 3), False),
             (
-                lambda x, y: at.mul(
-                    at.alloc(x, 3).dimshuffle("x", 0), y.dimshuffle("x", "x")
+                lambda x, y: pt.mul(
+                    pt.alloc(x, 3).dimshuffle("x", 0), y.dimshuffle("x", "x")
                 ),
                 (),
                 (),
                 True,
             ),
-            (lambda x, y: at.mul(y, at.alloc(1, x)), (), (), True),
-            (lambda x, y: at.mul(at.alloc(x, 15, 1), y), (15, 1), (15, 1), False),
-            (lambda x, y: at.mul(at.alloc(x, 15, 2), y), (15, 2), (15, 2), False),
+            (lambda x, y: pt.mul(y, pt.alloc(1, x)), (), (), True),
+            (lambda x, y: pt.mul(pt.alloc(x, 15, 1), y), (15, 1), (15, 1), False),
+            (lambda x, y: pt.mul(pt.alloc(x, 15, 2), y), (15, 2), (15, 2), False),
             (
-                lambda x, y: at.mul(at.alloc(x, 15, 1), at.alloc(y, 15, 1)),
+                lambda x, y: pt.mul(pt.alloc(x, 15, 1), pt.alloc(y, 15, 1)),
                 (15, 1),
                 (15, 1),
                 False,
             ),
             (
-                lambda x, y: at.mul(at.alloc(x, 15, 2), at.alloc(y, 15, 2)),
+                lambda x, y: pt.mul(pt.alloc(x, 15, 2), pt.alloc(y, 15, 2)),
                 (15, 2),
                 (15, 2),
                 False,
             ),
             (
-                lambda x, y: at.mul(at.alloc(x, 15, 2).dimshuffle(1, 0), y),
+                lambda x, y: pt.mul(pt.alloc(x, 15, 2).dimshuffle(1, 0), y),
                 (15, 2),
                 (2, 15),
                 False,
             ),
-            (lambda x, y: at.mul(at.alloc(x, 1, 15, 2), y), (15, 2), (15, 2), False),
+            (lambda x, y: pt.mul(pt.alloc(x, 1, 15, 2), y), (15, 2), (15, 2), False),
             (
-                lambda x, y: at.mul(at.alloc(x, 1, 15, 2).dimshuffle(0, 2, 1), y),
+                lambda x, y: pt.mul(pt.alloc(x, 1, 15, 2).dimshuffle(0, 2, 1), y),
                 (15, 2),
                 (2, 15),
                 False,
@@ -1736,10 +1736,10 @@ def verify_op_count(f, count, cls):
         ],
     )
     def test_basic(self, expr, x_shape, y_shape, needs_alloc):
-        x = at.tensor(
+        x = pt.tensor(
             dtype="int64", shape=(1 if val == 1 else None for val in x_shape), name="x"
         )
-        y = at.tensor(
+        y = pt.tensor(
             dtype="int64", shape=(1 if val == 1 else None for val in y_shape), name="y"
         )
         z = expr(x, y)
@@ -1777,8 +1777,8 @@ def test_basic(self, expr, x_shape, y_shape, needs_alloc):
 
     def test_single_input(self):
         """Test that rewrite is not triggered when there is only one `Alloc` in an `Elemwise`."""
-        x = at.matrix("x")
-        z = at.exp(at.alloc(x, 15, 1))
+        x = pt.matrix("x")
+        z = pt.exp(pt.alloc(x, 15, 1))
 
         z_fg = FunctionGraph(outputs=[z], copy_inputs=False, features=[ShapeFeature()])
 
@@ -1924,7 +1924,7 @@ def test_misc(self):
         x = row("x", dtype=self.dtype)
         y = tensor("y", dtype=self.dtype, shape=(None, None, 1))
 
-        out = at.alloc(x, 5, 5).dimshuffle(0, 1, "x") + y
+        out = pt.alloc(x, 5, 5).dimshuffle(0, 1, "x") + y
         func = function([y, x], out, mode=self.fast_run_mode)
 
         self.verify_op_count(func, 0, Alloc)
diff --git a/tests/tensor/rewriting/test_elemwise.py b/tests/tensor/rewriting/test_elemwise.py
index 8e7c754d5e..7b25192a89 100644
--- a/tests/tensor/rewriting/test_elemwise.py
+++ b/tests/tensor/rewriting/test_elemwise.py
@@ -5,9 +5,9 @@
 
 import pytensor
 from pytensor import In
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor import shared
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.compile.function import function
 from pytensor.compile.mode import Mode, get_default_mode
 from pytensor.configdefaults import config
@@ -22,9 +22,9 @@
 from pytensor.scalar.basic import Composite, float64
 from pytensor.tensor.basic import MakeVector
 from pytensor.tensor.elemwise import DimShuffle, Elemwise
-from pytensor.tensor.math import abs as at_abs
+from pytensor.tensor.math import abs as pt_abs
 from pytensor.tensor.math import add
-from pytensor.tensor.math import all as at_all
+from pytensor.tensor.math import all as pt_all
 from pytensor.tensor.math import (
     bitwise_and,
     bitwise_or,
@@ -45,11 +45,11 @@
     neg,
     neq,
 )
-from pytensor.tensor.math import pow as at_pow
+from pytensor.tensor.math import pow as pt_pow
 from pytensor.tensor.math import reciprocal
-from pytensor.tensor.math import round as at_round
+from pytensor.tensor.math import round as pt_round
 from pytensor.tensor.math import sin, sinh, sqr, sqrt
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tan, tanh, true_div, xor
 from pytensor.tensor.rewriting.elemwise import FusionOptimizer, local_dimshuffle_lift
 from pytensor.tensor.rewriting.shape import local_useless_dimshuffle_in_reshape
@@ -151,7 +151,7 @@ def test_useless_dimshuffle(self):
 
     def test_dimshuffle_on_broadcastable(self):
         x, y, z = inputs([False, True], [True, False, True], [False, False, True])
-        u = at.constant(1)
+        u = pt.constant(1)
         ds_x = ds(x, (0, "x"))  # useless
         ds_y = ds(y, (2, 1, 0))  # useless
         ds_z = ds(z, (2, 1, 0))  # useful
@@ -268,17 +268,17 @@ def large_fuseable_graph(self, n):
         sd = dscalar()
         means = dvector()
 
-        cst_05 = at.constant(0.5)
-        cst_m05 = at.constant(-0.5)
-        cst_2 = at.constant(2)
-        cst_m2 = at.constant(-2)
-        ones = at.constant(np.ones(10))
+        cst_05 = pt.constant(0.5)
+        cst_m05 = pt.constant(-0.5)
+        cst_2 = pt.constant(2)
+        cst_m2 = pt.constant(-2)
+        ones = pt.constant(np.ones(10))
 
         for i in range(n):
             f = cst_m05 * sd**cst_m2 * (ones - means[i]) ** cst_2 + cst_05 * log(
                 cst_05 * (sd**cst_m2) / np.pi
             )
-            factors.append(at_sum(f))
+            factors.append(pt_sum(f))
 
         logp = add(*factors)
 
@@ -747,7 +747,7 @@ def large_fuseable_graph(self, n):
                 "float32",
             ),
             (
-                fx - fy + at_round(fz),
+                fx - fy + pt_round(fz),
                 (fx, fy, fz),
                 (fxv, fyv, fzv),
                 1,
@@ -812,7 +812,7 @@ def large_fuseable_graph(self, n):
                 },
             ),
             (
-                fx - at.cast(fy, dtype="float64"),
+                fx - pt.cast(fy, dtype="float64"),
                 (fx, fy),
                 (fxv, fyv),
                 1,
@@ -820,7 +820,7 @@ def large_fuseable_graph(self, n):
                 "float64",
             ),
             (
-                at_pow(fx * fy + fz, fx * fy),
+                pt_pow(fx * fy + fz, fx * fy),
                 (fx, fy, fz),
                 (fxv, fyv, fzv),
                 1,
@@ -893,9 +893,9 @@ def large_fuseable_graph(self, n):
             (
                 (
                     # sum(logp)
-                    at_sum(-((fx - fy) ** 2) / 2),
+                    pt_sum(-((fx - fy) ** 2) / 2),
                     # grad(logp)
-                    at.grad(at_sum(-((fx - fy) ** 2) / 2), wrt=fx),
+                    pt.grad(pt_sum(-((fx - fy) ** 2) / 2), wrt=fx),
                 ),
                 (fx, fy),
                 (fxv, fyv),
@@ -913,8 +913,8 @@ def large_fuseable_graph(self, n):
                     log(
                         ge(
                             assert_op(
-                                at_abs(fx),
-                                at_all(ge(at_abs(fx), 0)),
+                                pt_abs(fx),
+                                pt_all(ge(pt_abs(fx), 0)),
                             ),
                             0,
                         )
@@ -932,7 +932,7 @@ def large_fuseable_graph(self, n):
                 (
                     true_div(
                         mul(
-                            at_sum(fx + 5),  # breaks fusion
+                            pt_sum(fx + 5),  # breaks fusion
                             exp(fx),
                         ),
                         (fx + 5),
@@ -1036,7 +1036,7 @@ def test_fusion_35_inputs(self):
         composite_nodes = [
             node
             for node in f.maker.fgraph.toposort()
-            if isinstance(getattr(node.op, "scalar_op", None), aes.basic.Composite)
+            if isinstance(getattr(node.op, "scalar_op", None), ps.basic.Composite)
         ]
         assert not any(len(node.inputs) > 31 for node in composite_nodes)
 
@@ -1048,7 +1048,7 @@ def test_big_fusion(self):
 
         # Make sure something was fused
         assert any(
-            isinstance(getattr(node.op, "scalar_op", None), aes.basic.Composite)
+            isinstance(getattr(node.op, "scalar_op", None), ps.basic.Composite)
             for node in dlogp.maker.fgraph.toposort()
         )
 
@@ -1069,11 +1069,11 @@ def test_add_mul_fusion_precedence(self):
         assert isinstance(scalar_op, Composite)
         assert [node.op for node in scalar_op.fgraph.toposort()] == [
             # There should be a single mul
-            aes.mul,
+            ps.mul,
             # There should be a single add
-            aes.add,
-            aes.true_div,
-            aes.log,
+            ps.add,
+            ps.true_div,
+            ps.log,
         ]
 
     def test_add_mul_fusion_inplace(self):
@@ -1087,7 +1087,7 @@ def test_add_mul_fusion_inplace(self):
 
         new_out = f.maker.fgraph.outputs[0]
         assert isinstance(new_out.owner.op, Elemwise)
-        assert isinstance(new_out.owner.op.scalar_op, aes.basic.Add)
+        assert isinstance(new_out.owner.op.scalar_op, ps.basic.Add)
         assert len(new_out.owner.inputs) == 4
 
         # TODO: Do we really need to do this?
@@ -1099,9 +1099,9 @@ def test_fusion_multiout_inplace(self):
         x = vector("x")
 
         # Create Composite where inplacing the first non-constant output would corrupt the second output
-        xs = aes.float64("xs")
+        xs = ps.float64("xs")
         outs = (
-            Elemwise(Composite([xs], [xs + 1, aes.cos(xs + 1) + xs]))
+            Elemwise(Composite([xs], [xs + 1, ps.cos(xs + 1) + xs]))
             .make_node(x)
             .outputs
         )
@@ -1128,11 +1128,11 @@ def test_no_c_code(self):
         r"""Make sure we avoid fusions for `Op`\s without C code implementations."""
 
         # This custom `Op` has no `c_code` method
-        class NoCCodeOp(aes.basic.UnaryScalarOp):
+        class NoCCodeOp(ps.basic.UnaryScalarOp):
             def impl(self, x):
                 return x * 2
 
-        no_c_code_op = Elemwise(NoCCodeOp(aes.basic.upgrade_to_float))
+        no_c_code_op = Elemwise(NoCCodeOp(ps.basic.upgrade_to_float))
 
         mode = Mode(linker="cvm")
         mode._optimizer = mode._optimizer.including(
@@ -1146,7 +1146,7 @@ def impl(self, x):
         f = function([x], out, mode=mode)
 
         assert not any(
-            isinstance(getattr(n.op, "scalar_op"), aes.basic.Composite)
+            isinstance(getattr(n.op, "scalar_op"), ps.basic.Composite)
             for n in f.maker.fgraph.toposort()
         )
 
@@ -1182,9 +1182,9 @@ def test_test_values(self, test_value):
     @pytest.mark.parametrize(
         "careduce_op, numpy_op",
         [
-            (at_sum, np.sum),
+            (pt_sum, np.sum),
             pytest.param(
-                at_all,
+                pt_all,
                 np.all,
                 marks=pytest.mark.xfail(
                     reason="Rewrite logic does not support all CAReduce"
@@ -1211,7 +1211,7 @@ def test_CAReduce_single_input(
 
         if linker != "py":
             (out_node,) = out_fn.maker.fgraph.toposort()
-            assert isinstance(getattr(out_node.op, "scalar_op"), aes.basic.Composite)
+            assert isinstance(getattr(out_node.op, "scalar_op"), ps.basic.Composite)
 
             rng = np.random.default_rng(2320)
             x_val = rng.random((4, 3, 2)).astype(x.type.dtype)
@@ -1224,7 +1224,7 @@ def test_CAReduce_single_input(
         else:
             out_nodes = out_fn.maker.fgraph.toposort()
             assert not any(
-                isinstance(out_node.op.scalar_op, aes.basic.Composite)
+                isinstance(out_node.op.scalar_op, ps.basic.Composite)
                 for out_node in out_nodes
                 if hasattr(out_node.op, "scalar_op")
             )
@@ -1237,7 +1237,7 @@ def test_CAReduce_single_input(
         out_fn = function([x], out, mode=mode)
         out_nodes = out_fn.maker.fgraph.toposort()
         assert not any(
-            isinstance(out_node.op.scalar_op, aes.basic.Composite)
+            isinstance(out_node.op.scalar_op, ps.basic.Composite)
             for out_node in out_nodes
             if hasattr(out_node.op, "scalar_op")
         )
@@ -1262,7 +1262,7 @@ def test_CAReduce_multiple_inputs(self, linker, axis):
         out_fn = function([x, y], out, mode=mode)
         (out_node,) = out_fn.maker.fgraph.toposort()
 
-        assert isinstance(getattr(out_node.op, "scalar_op"), aes.basic.Composite)
+        assert isinstance(getattr(out_node.op, "scalar_op"), ps.basic.Composite)
 
         rng = np.random.default_rng(2320)
         x_val = rng.random((4, 3, 2), dtype=config.floatX)
@@ -1292,15 +1292,15 @@ def test_not_fusing_broadcasted_subgraphs(self):
         # Inner Vector output Composite
         assert isinstance(apply_nodes[1].op.scalar_op, Composite)
         assert {node.op for node in apply_nodes[1].op.scalar_op.fgraph.apply_nodes} == {
-            aes.add,
-            aes.log,
+            ps.add,
+            ps.log,
         }
         # Outer Matrix output Composite
         assert isinstance(apply_nodes[2].op.scalar_op, Composite)
         assert {node.op for node in apply_nodes[2].op.scalar_op.fgraph.apply_nodes} == {
-            aes.sub,
-            aes.exp,
-            aes.mul,
+            ps.sub,
+            ps.exp,
+            ps.mul,
         }
 
     def test_multiple_outputs_fused_root_elemwise(self):
@@ -1308,16 +1308,16 @@ def test_multiple_outputs_fused_root_elemwise(self):
         there is another fused output"""
 
         # By default, we do not introduce Composite for single layers of Elemwise
-        x = at.vector("x")
-        out1 = at.cos(x)
+        x = pt.vector("x")
+        out1 = pt.cos(x)
         f = pytensor.function([x], out1, mode=self.mode)
         nodes = tuple(f.maker.fgraph.apply_nodes)
         assert len(nodes) == 1
-        assert isinstance(nodes[0].op.scalar_op, aes.Cos)
+        assert isinstance(nodes[0].op.scalar_op, ps.Cos)
 
         # However, when it can be composed with another output, we should not
         # compute that root Elemwise twice
-        out2 = at.log(out1)
+        out2 = pt.log(out1)
         f = pytensor.function([x], [out1, out2], mode=self.mode)
         nodes = tuple(f.maker.fgraph.apply_nodes)
         assert len(nodes) == 1
@@ -1363,7 +1363,7 @@ def test_no_warning_from_old_client(self):
             )
 
 
-class TimesN(aes.basic.UnaryScalarOp):
+class TimesN(ps.basic.UnaryScalarOp):
     """
     Used in test TestCompositeCodegen
 
@@ -1379,7 +1379,7 @@ def __hash__(self):
 
     def __init__(self, n, *args, **kwargs):
         self.n = n
-        aes.basic.UnaryScalarOp.__init__(self, *args, **kwargs)
+        ps.basic.UnaryScalarOp.__init__(self, *args, **kwargs)
 
     def impl(self, x):
         return x * self.n
@@ -1406,7 +1406,7 @@ class TestCompositeCodegen:
     """
 
     def setup_method(self):
-        upgrade_to_float = aes.basic.upgrade_to_float
+        upgrade_to_float = ps.basic.upgrade_to_float
 
         self.scal_times_2 = TimesN(2, upgrade_to_float, name="times_2")
         self.times_2 = Elemwise(self.scal_times_2, name="times_2")
@@ -1427,10 +1427,10 @@ def test_nested_composite(self):
 
 
 def test_local_useless_composite_outputs():
-    x = aes.float32()
-    y = aes.float32()
-    z = aes.float32()
-    c = aes.Composite([x, y, z], [x + 1, y - 1])
+    x = ps.float32()
+    y = ps.float32()
+    z = ps.float32()
+    c = ps.Composite([x, y, z], [x + 1, y - 1])
     X = matrix("X")
     Y = matrix("Y")
     Z = matrix("Z")
@@ -1462,12 +1462,12 @@ def test_local_useless_composite_outputs():
 
 
 @pytest.mark.parametrize("const_shape", [(), (1,), (5,), (1, 5), (2, 5)])
-@pytest.mark.parametrize("op, np_op", [(at.pow, np.power), (at.add, np.add)])
+@pytest.mark.parametrize("op, np_op", [(pt.pow, np.power), (pt.add, np.add)])
 def test_local_inline_composite_constants(op, np_op, const_shape):
     const = np.full(shape=const_shape, fill_value=2.5).astype(config.floatX)
     x = vector("x")
     y = vector("y")
-    out = at.exp(op(x, const)) + y
+    out = pt.exp(op(x, const)) + y
 
     fn = pytensor.function(
         [x, y], out, mode=get_default_mode().including("specialize", "fusion")
diff --git a/tests/tensor/rewriting/test_extra_ops.py b/tests/tensor/rewriting/test_extra_ops.py
index 15f5870e5b..95ae438c2f 100644
--- a/tests/tensor/rewriting/test_extra_ops.py
+++ b/tests/tensor/rewriting/test_extra_ops.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-import pytensor.scalar as aes
+import pytensor.scalar as ps
 from pytensor.compile.function import function
 from pytensor.compile.mode import OPT_NONE, Mode, get_default_mode
 from pytensor.graph.fg import FunctionGraph
@@ -209,7 +209,7 @@ def test_local_Unique_second(
 
     assert y_rewritten_start == x
     assert not any(
-        isinstance(node.op.scalar_op, aes.Second)
+        isinstance(node.op.scalar_op, ps.Second)
         for node in y_rewritten_fg.apply_nodes
         if isinstance(node.op, Elemwise)
     )
@@ -222,7 +222,7 @@ def test_local_Unique_second(
     # Make sure that the original `BroadcastTo` is used to compute the
     # reference `y` result
     assert any(
-        isinstance(node.op.scalar_op, aes.Second)
+        isinstance(node.op.scalar_op, ps.Second)
         for node in y_fn.maker.fgraph.apply_nodes
         if isinstance(node.op, Elemwise)
     )
diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py
index e4c636f87c..9cdb69ce6b 100644
--- a/tests/tensor/rewriting/test_linalg.py
+++ b/tests/tensor/rewriting/test_linalg.py
@@ -7,7 +7,7 @@
 
 import pytensor
 from pytensor import function
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.compile import get_default_mode
 from pytensor.configdefaults import config
 from pytensor.tensor import swapaxes
@@ -42,7 +42,7 @@ def test_rop_lop():
 
     sy, _ = pytensor.scan(
         lambda i, y, x, v: (pytensor.gradient.grad(y[i], x) * v).sum(),
-        sequences=at.arange(y.shape[0]),
+        sequences=pt.arange(y.shape[0]),
         non_sequences=[y, mx, mv],
     )
     scan_f = function([mx, mv], sy)
@@ -227,8 +227,8 @@ def test_cholesky_ldotlt(tag, cholesky_form, product, op):
 
 def test_local_det_chol():
     X = matrix("X")
-    L = at.linalg.cholesky(X)
-    det_X = at.linalg.det(X)
+    L = pt.linalg.cholesky(X)
+    det_X = pt.linalg.det(X)
 
     f = function([X], [L, det_X])
 
diff --git a/tests/tensor/rewriting/test_math.py b/tests/tensor/rewriting/test_math.py
index 4bc7ae3ad3..dc7927db05 100644
--- a/tests/tensor/rewriting/test_math.py
+++ b/tests/tensor/rewriting/test_math.py
@@ -7,8 +7,8 @@
 import pytest
 
 import pytensor
-import pytensor.scalar as aes
-import pytensor.tensor as at
+import pytensor.scalar as ps
+import pytensor.tensor as pt
 from pytensor import pprint, shared
 from pytensor.compile import optdb
 from pytensor.compile.debugmode import DebugMode
@@ -36,10 +36,10 @@
 from pytensor.tensor.blas_c import CGemv
 from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
 from pytensor.tensor.math import Dot, MaxAndArgmax, Prod, Sum, _conj
-from pytensor.tensor.math import abs as at_abs
+from pytensor.tensor.math import abs as pt_abs
 from pytensor.tensor.math import add
-from pytensor.tensor.math import all as at_all
-from pytensor.tensor.math import any as at_any
+from pytensor.tensor.math import all as pt_all
+from pytensor.tensor.math import any as pt_any
 from pytensor.tensor.math import (
     arccosh,
     arcsinh,
@@ -66,9 +66,9 @@
     log1p,
     lt,
 )
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.math import maximum
-from pytensor.tensor.math import min as at_min
+from pytensor.tensor.math import min as pt_min
 from pytensor.tensor.math import minimum, mul, neg, neq, polygamma
 from pytensor.tensor.math import pow as pt_pow
 from pytensor.tensor.math import (
@@ -83,7 +83,7 @@
     sqrt,
     sub,
 )
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tanh, true_div, xor
 from pytensor.tensor.rewriting.elemwise import local_dimshuffle_lift
 from pytensor.tensor.rewriting.math import (
@@ -183,8 +183,8 @@ def test_add_canonizer_problem0():
     f(3)
 
     # This was crashing in the past.
-    c0 = at.constant([True])
-    c1 = at.constant([True])
+    c0 = pt.constant([True])
+    c1 = pt.constant([True])
     function([], c0 + c1)
 
 
@@ -215,7 +215,7 @@ def test_kording_bug(self):
         eps = scalar("eps")
         s = scalar("s")
 
-        # r = mul(at.fill(x, 2.*a), x/a , (y+z) , a)
+        # r = mul(pt.fill(x, 2.*a), x/a , (y+z) , a)
         # r = mul((x/a+y) , a, z)
         r = mul(s - 1, eps + x / s, eps + y / s, s)
 
@@ -252,7 +252,7 @@ class TestAlgebraicCanonizer:
             # ((x / x) * (y / y), None),
             (
                 (-1 * x) / y / (-2 * z),
-                (at.as_tensor([[0.5]], dtype="floatX") * x) / (y * z),
+                (pt.as_tensor([[0.5]], dtype="floatX") * x) / (y * z),
             ),
         ],
     )
@@ -600,7 +600,7 @@ def test_mul_div_cases(self):
             assert out_dtype == out.dtype
             utt.assert_allclose(out, val_inputs[1])
             topo = f.maker.fgraph.toposort()
-            assert not any(node.op == at.true_div for node in topo)
+            assert not any(node.op == pt.true_div for node in topo)
 
         # test x / y / x -> 1 / y
         for id, (g, sym_inputs, val_inputs, nb_elemwise, out_dtype) in enumerate(
@@ -626,7 +626,7 @@ def test_mul_div_cases(self):
             assert any(
                 isinstance(
                     el.op.scalar_op,
-                    (aes.basic.Reciprocal, aes.basic.TrueDiv),
+                    (ps.basic.Reciprocal, ps.basic.TrueDiv),
                 )
                 for el in elem
             )
@@ -703,7 +703,7 @@ def test_mul_div_cases(self):
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 1
             assert isinstance(topo[0].op, (Elemwise,))
-            assert isinstance(topo[0].op.scalar_op, aes.basic.TrueDiv)
+            assert isinstance(topo[0].op.scalar_op, ps.basic.TrueDiv)
             assert len(topo[0].inputs) == 2
             assert out_dtype == out.dtype
 
@@ -753,10 +753,10 @@ def test_mul_div_cases(self):
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, (Elemwise,))
-            assert isinstance(topo[0].op.scalar_op, aes.basic.Mul)
+            assert isinstance(topo[0].op.scalar_op, ps.basic.Mul)
             assert len(topo[0].inputs) == 2
             assert isinstance(topo[1].op, (Elemwise,))
-            assert isinstance(topo[1].op.scalar_op, aes.basic.TrueDiv)
+            assert isinstance(topo[1].op.scalar_op, ps.basic.TrueDiv)
             assert len(topo[1].inputs) == 2
             assert out_dtype == out.dtype
 
@@ -866,7 +866,7 @@ def test_abs_mul_div(self):
         """Test that ``4 * x / abs(2*x)`` gets "simplified" during canonicalization."""
 
         x = dscalar()
-        # a = at.at_abs(x)
+        # a = pt.pt_abs(x)
 
         if config.mode == "FAST_COMPILE":
             mode = get_mode("FAST_RUN").excluding("local_elemwise_fusion")
@@ -926,7 +926,7 @@ def test_multiple_case_that_fail(self):
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, (Elemwise,))
-            assert isinstance(topo[0].op.scalar_op, aes.basic.Reciprocal)
+            assert isinstance(topo[0].op.scalar_op, ps.basic.Reciprocal)
             assert len(topo[0].inputs) == 1
             assert out_dtype == out.dtype
 
@@ -941,7 +941,7 @@ def test_multiple_case_that_fail(self):
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, (Elemwise,))
-            assert isinstance(topo[0].op.scalar_op, aes.basic.Reciprocal)
+            assert isinstance(topo[0].op.scalar_op, ps.basic.Reciprocal)
             assert len(topo[0].inputs) == 1
             assert out_dtype == out.dtype
 
@@ -966,7 +966,7 @@ def test_canonicalize_nan(self):
         assert not sio.getvalue()
 
     def test_mismatching_types(self):
-        a = at.as_tensor([[0.0]], dtype=np.float64)
+        a = pt.as_tensor([[0.0]], dtype=np.float64)
         b = tensor(dtype="float64", shape=(None,)).dimshuffle("x", 0)
         z = add(a, b)
         # Construct a node with the wrong output `Type`
@@ -1015,12 +1015,12 @@ def test_local_merge_abs():
 
     f = function([y, z], (abs(y * z * -2)), mode=mode)
     f(y_val, z_val)
-    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, aes.Abs)
+    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, ps.Abs)
     assert len(f.maker.fgraph.toposort()) == 2
 
     f = function([x, y], abs(x / y), mode=mode)
     f(x_val, y_val)
-    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, aes.Abs)
+    assert isinstance(f.maker.fgraph.toposort()[1].op.scalar_op, ps.Abs)
     assert len(f.maker.fgraph.toposort()) == 2
 
 
@@ -1034,7 +1034,7 @@ def test_merge_abs_bugfix():
     # normalize on rows
     step2 = step1 / step1.sum(1)
     # get l1 norm
-    l1_norm = at_abs(step2).sum()
+    l1_norm = pt_abs(step2).sum()
     function([input], pytensor.gradient.grad(l1_norm, input))
 
 
@@ -1081,7 +1081,7 @@ def test_cast_in_mul_canonizer():
     x, y = vectors("xy")
     m = minimum(x, y)
     o = m.sum()
-    go = at.fill(o, 1)
+    go = pt.fill(o, 1)
     e = eq(go, x)
     o1 = (1 - e) * go
     o2 = e * go
@@ -1093,12 +1093,12 @@ def test_cast_in_mul_canonizer():
             [
                 n
                 for n in nodes
-                if isinstance(getattr(n.op, "scalar_op", None), aes.Identity)
+                if isinstance(getattr(n.op, "scalar_op", None), ps.Identity)
             ]
         )
         == 0
     )
-    assert len([n for n in nodes if isinstance(n.op.scalar_op, aes.Cast)]) == 1
+    assert len([n for n in nodes if isinstance(n.op.scalar_op, ps.Cast)]) == 1
     f([1], [1])
 
 
@@ -1127,20 +1127,20 @@ def test_log1p():
 
     # check trickier cases (and use different dtype)
     y = fmatrix()
-    f = function([x, y], log(at.fill(y, 1) + (x)), mode=m)
+    f = function([x, y], log(pt.fill(y, 1) + (x)), mode=m)
     # the first three ops are Shape_i, Shape_i, and Dimshuffle
     topo = f.maker.fgraph.toposort()
-    assert topo[-1].op == at.alloc
+    assert topo[-1].op == pt.alloc
     assert log1p in [node.op for node in topo]
 
-    f = function([x, y], log(0 + (x) + at.fill(y, 1.0)), mode=m)
+    f = function([x, y], log(0 + (x) + pt.fill(y, 1.0)), mode=m)
     topo = f.maker.fgraph.toposort()
-    assert topo[-1].op == at.alloc
+    assert topo[-1].op == pt.alloc
     assert log1p in [node.op for node in topo]
 
-    f = function([x, y], log(2 + (x) - at.fill(y, 1.0)), mode=m)
+    f = function([x, y], log(2 + (x) - pt.fill(y, 1.0)), mode=m)
     topo = f.maker.fgraph.toposort()
-    assert topo[-1].op == at.alloc
+    assert topo[-1].op == pt.alloc
     assert log1p in [node.op for node in topo]
 
     f([1e-7, 10], [[0, 0], [0, 0]])  # debugmode will verify values
@@ -1260,7 +1260,7 @@ def test_local_elemwise_sub_zeros():
     # Test scalar minus scalar
     f = function([scal], scal - scal, mode=mode)
     assert isinstance(f.maker.fgraph.toposort()[0].op, Elemwise)
-    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op, aes.Second)
+    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op, ps.Second)
     assert isinstance(
         f.maker.fgraph.toposort()[0].inputs[1], TensorConstant
     ) or isinstance(f.maker.fgraph.toposort()[0].inputs[1], TensorConstant)
@@ -1270,7 +1270,7 @@ def test_local_elemwise_sub_zeros():
     # Test vector minus vector
     f = function([vect], vect - vect, mode=mode)
     assert isinstance(f.maker.fgraph.toposort()[0].op, Elemwise)
-    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op, aes.Second)
+    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op, ps.Second)
     assert isinstance(
         f.maker.fgraph.toposort()[0].inputs[1], TensorConstant
     ) or isinstance(f.maker.fgraph.toposort()[0].inputs[1], TensorConstant)
@@ -1280,7 +1280,7 @@ def test_local_elemwise_sub_zeros():
     # Test vector minus vector
     f = function([mat], mat - mat, mode=mode)
     assert isinstance(f.maker.fgraph.toposort()[0].op, Elemwise)
-    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op, aes.Second)
+    assert isinstance(f.maker.fgraph.toposort()[0].op.scalar_op, ps.Second)
     assert isinstance(
         f.maker.fgraph.toposort()[0].inputs[1], TensorConstant
     ) or isinstance(f.maker.fgraph.toposort()[0].inputs[1], TensorConstant)
@@ -1370,11 +1370,11 @@ def assert_eqs_const(self, f, val, op=deep_copy_op):
         if op == deep_copy_op:
             assert len(elem.inputs) == 1, elem.inputs
             assert isinstance(elem.inputs[0], TensorConstant), elem
-            assert at.extract_constant(elem.inputs[0]) == val, val
+            assert pt.extract_constant(elem.inputs[0]) == val, val
         else:
             assert len(elem.inputs) == 2, elem.inputs
             assert isinstance(elem.inputs[0], TensorConstant), elem
-            assert at.extract_constant(elem.inputs[0]) == val, val
+            assert pt.extract_constant(elem.inputs[0]) == val, val
 
     def assert_identity(self, f):
         topo = f.maker.fgraph.toposort()
@@ -1491,7 +1491,7 @@ def test_equality_shapes(self):
         assert (f([]) == 1).all()
 
         f = function([x], eq(g, -1))
-        self.assert_eqs_const(f, 0, op=at.alloc)
+        self.assert_eqs_const(f, 0, op=pt.alloc)
         assert (f([3, 3]) == 0).all()
 
     def test_and(self):
@@ -1590,7 +1590,7 @@ def test_local_mul_specialize():
 
     f = function([v], v * 0, mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
-    assert nodes == [Shape_i(0), at.alloc]
+    assert nodes == [Shape_i(0), pt.alloc]
 
     f = function([v], v * (-1), mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
@@ -1602,7 +1602,7 @@ def test_local_mul_specialize():
 
     f = function([v, m], v * 0 * (-m), mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
-    assert nodes == [Shape_i(0), at.alloc]
+    assert nodes == [Shape_i(0), pt.alloc]
 
     f = function([v, m], v * (-1) * (-m), mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
@@ -1657,7 +1657,7 @@ def test_local_pow_specialize():
 
     f = function([v], v**0, mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
-    assert nodes == [Shape_i(0), at.alloc]
+    assert nodes == [Shape_i(0), pt.alloc]
     utt.assert_allclose(f(val), val**0)
 
     f = function([v], v**1, mode=mode)
@@ -1679,7 +1679,7 @@ def test_local_pow_specialize():
     nodes = [node.op for node in f.maker.fgraph.toposort()]
     assert len(nodes) == 2
     assert nodes[0] == sqr
-    assert isinstance(nodes[1].scalar_op, aes.basic.Reciprocal)
+    assert isinstance(nodes[1].scalar_op, ps.basic.Reciprocal)
     utt.assert_allclose(f(val_no0), val_no0 ** (-2))
 
     f = function([v], v ** (0.5), mode=mode)
@@ -1691,7 +1691,7 @@ def test_local_pow_specialize():
     nodes = [node.op for node in f.maker.fgraph.toposort()]
     assert len(nodes) == 2
     assert nodes[0] == sqrt
-    assert isinstance(nodes[1].scalar_op, aes.basic.Reciprocal)
+    assert isinstance(nodes[1].scalar_op, ps.basic.Reciprocal)
     utt.assert_allclose(f(val_no0), val_no0 ** (-0.5))
 
     twos = np.full(shape=(10,), fill_value=2.0).astype(config.floatX)
@@ -1721,30 +1721,30 @@ def test_local_pow_to_nested_squaring():
     nodes = [node.op for node in f.maker.fgraph.toposort()]
     assert len(nodes) == 1
     assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 6
-    assert isinstance(nodes[0].scalar_op, aes.Composite)
+    assert isinstance(nodes[0].scalar_op, ps.Composite)
     utt.assert_allclose(f(val), val**15)
 
     f = function([v], v ** (-15), mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
     assert len(nodes) == 2
     assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 6
-    assert isinstance(nodes[0].scalar_op, aes.Composite)
-    assert isinstance(nodes[-1].scalar_op, aes.basic.Reciprocal)
+    assert isinstance(nodes[0].scalar_op, ps.Composite)
+    assert isinstance(nodes[-1].scalar_op, ps.basic.Reciprocal)
     utt.assert_allclose(f(val_no0), val_no0 ** (-15))
 
     f = function([v], v ** (16), mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
     assert len(nodes) == 1
     assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 4
-    assert isinstance(nodes[0].scalar_op, aes.Composite)
+    assert isinstance(nodes[0].scalar_op, ps.Composite)
     utt.assert_allclose(f(val), val**16)
 
     f = function([v], v ** (-16), mode=mode)
     nodes = [node.op for node in f.maker.fgraph.toposort()]
     assert len(nodes) == 2
     assert len(f.maker.fgraph.toposort()[0].op.scalar_op.fgraph.apply_nodes) == 4
-    assert isinstance(nodes[0].scalar_op, aes.Composite)
-    assert isinstance(nodes[-1].scalar_op, aes.basic.Reciprocal)
+    assert isinstance(nodes[0].scalar_op, ps.Composite)
+    assert isinstance(nodes[-1].scalar_op, ps.basic.Reciprocal)
     utt.assert_allclose(f(val_no0), val_no0 ** (-16))
 
 
@@ -1859,7 +1859,7 @@ def test_log_exp(self):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, (aes.Log, aes.Exp))
+            and isinstance(node.op.scalar_op, (ps.Log, ps.Exp))
         ]
         assert len(ops_graph) == 0
         np.testing.assert_array_equal(f(data), data)
@@ -1871,7 +1871,7 @@ def test_log_exp_integer_upcast(self):
             node
             for node in f.maker.fgraph.toposort()
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, (aes.Log, aes.Exp))
+            and isinstance(node.op.scalar_op, (ps.Log, ps.Exp))
         ]
         assert len(ops_graph) == 0
 
@@ -1886,7 +1886,7 @@ def test_log1p_expm1(self, dtype):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, (aes.Log, aes.Exp, aes.Log1p, aes.Expm1))
+            and isinstance(node.op.scalar_op, (ps.Log, ps.Exp, ps.Log1p, ps.Expm1))
         ]
         assert len(ops_graph) == 0
         np.testing.assert_array_equal(f(data), data)
@@ -1906,7 +1906,7 @@ def test_exp_log(self, exp_op):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, (aes.Log, aes.Log1p, aes.Exp, aes.Expm1))
+            and isinstance(node.op.scalar_op, (ps.Log, ps.Log1p, ps.Exp, ps.Expm1))
         ]
         assert len(ops_graph) == 0
 
@@ -1932,7 +1932,7 @@ def test_exp_log1p(self, exp_op):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, (aes.Log, aes.Log1p, aes.Exp, aes.Expm1))
+            and isinstance(node.op.scalar_op, (ps.Log, ps.Log1p, ps.Exp, ps.Expm1))
         ]
         assert len(ops_graph) == 0
 
@@ -1958,9 +1958,7 @@ def test_exp_log1mexp(self, exp_op):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(
-                node.op.scalar_op, (aes.Log, aes.Log1p, aes.Log1mexp, aes.Expm1)
-            )
+            and isinstance(node.op.scalar_op, (ps.Log, ps.Log1p, ps.Log1mexp, ps.Expm1))
         ]
         assert len(ops_graph) == 0
 
@@ -1986,7 +1984,7 @@ def test_exp_softplus(self, exp_op):
             if isinstance(node.op, Elemwise)
             and isinstance(
                 node.op.scalar_op,
-                (aes.Log, aes.Log1p, aes.Softplus, aes.Expm1, aes.Switch),
+                (ps.Log, ps.Log1p, ps.Softplus, ps.Expm1, ps.Switch),
             )
         ]
         assert len(ops_graph) == 0
@@ -2017,7 +2015,7 @@ def test_exp_log_nested(self, nested_expression, expected_switches):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, aes.Switch)
+            and isinstance(node.op.scalar_op, ps.Switch)
         ]
         assert len(ops_graph) == expected_switches
 
@@ -2087,8 +2085,8 @@ def test_local_mul_switch_sink(self):
                 (dscalar("x"), self.xs),
             ]:
                 y = mul(
-                    at.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
-                    at.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
                 )
                 f = self.function_remove_nan(
                     [condition[0], x[0], c], [y], mode=self.mode
@@ -2108,7 +2106,7 @@ def test_local_mul_switch_sink(self):
 
         # This case prevented a rewrite from being applied in the past
         x = dscalar("x")
-        y = at.switch(x < 7, x, sqrt(x - 7))
+        y = pt.switch(x < 7, x, sqrt(x - 7))
         f = self.function_remove_nan([x], pytensor.gradient.grad(y, x), self.mode)
         assert f(5) == 1, f(5)
 
@@ -2127,8 +2125,8 @@ def test_local_div_switch_sink(self):
                 (dscalar("x"), self.xs),
             ]:
                 y = true_div(
-                    at.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
-                    at.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], 0.0 * x[0]),
+                    pt.switch(condition[0] > 0, 1.0 * x[0], log(c) * x[0]),
                 )
                 f = self.function_remove_nan(
                     [condition[0], x[0], c], [y], mode=self.mode
@@ -2176,7 +2174,7 @@ def test_local_one_plus_erf(self):
         assert len(topo) == 2
         assert topo[0].op == erf
         assert isinstance(topo[1].op, Elemwise)
-        assert isinstance(topo[1].op.scalar_op, aes.Add)
+        assert isinstance(topo[1].op.scalar_op, ps.Add)
         f(val)
 
     def test_local_one_minus_erf(self):
@@ -2201,8 +2199,8 @@ def test_local_one_minus_erf(self):
         assert len(topo) == 2
         assert topo[0].op == erf
         assert isinstance(topo[1].op, Elemwise)
-        assert isinstance(topo[1].op.scalar_op, aes.Add) or isinstance(
-            topo[1].op.scalar_op, aes.Sub
+        assert isinstance(topo[1].op.scalar_op, ps.Add) or isinstance(
+            topo[1].op.scalar_op, ps.Sub
         )
 
     def test_local_erf_minus_one(self):
@@ -2224,8 +2222,8 @@ def test_local_erf_minus_one(self):
         assert len(topo) == 2
         assert topo[0].op == erf
         assert isinstance(topo[1].op, Elemwise)
-        assert isinstance(topo[1].op.scalar_op, aes.Add) or isinstance(
-            topo[1].op.scalar_op, aes.Sub
+        assert isinstance(topo[1].op.scalar_op, ps.Add) or isinstance(
+            topo[1].op.scalar_op, ps.Sub
         )
 
 
@@ -2263,7 +2261,7 @@ def test_local_one_minus_erfc(self):
         assert len(topo) == 2
         assert topo[0].op == erfc
         assert isinstance(topo[1].op, Elemwise)
-        assert isinstance(topo[1].op.scalar_op, aes.Sub)
+        assert isinstance(topo[1].op.scalar_op, ps.Sub)
 
     def test_local_erf_neg_minus_one(self):
         """Test the rewrite ``-1 + erfc(-x) -> erf(x)``."""
@@ -2457,8 +2455,8 @@ def test_elemwise(self):
         # float Ops
         mats = matrices("cabxy")
         c, a, b, x, y = mats
-        s1 = at.switch(c, a, b)
-        s2 = at.switch(c, x, y)
+        s1 = pt.switch(c, a, b)
+        s2 = pt.switch(c, x, y)
         for op in (
             add,
             sub,
@@ -2481,8 +2479,8 @@ def test_elemwise(self):
         # integer Ops
         mats = imatrices("cabxy")
         c, a, b, x, y = mats
-        s1 = at.switch(c, a, b)
-        s2 = at.switch(c, x, y)
+        s1 = pt.switch(c, a, b)
+        s2 = pt.switch(c, x, y)
         for op in (
             bitwise_and,
             bitwise_or,
@@ -2492,7 +2490,7 @@ def test_elemwise(self):
             assert debugprint(g, file="str").count("Switch") == 1
         # add/mul with more than two inputs
         u, v = matrices("uv")
-        s3 = at.switch(c, u, v)
+        s3 = pt.switch(c, u, v)
         for op in (add, mul):
             g = rewrite(FunctionGraph(mats + [u, v], [op(s1, s2, s3)]))
             assert debugprint(g, file="str").count("Switch") == 1
@@ -2947,8 +2945,8 @@ def test_local_sum_prod_alloc(self):
         mode = self.mode.including("specialize").excluding("fusion")
 
         for t_like, n_like, nb_nodes in [
-            (at.zeros_like, np.zeros_like, (1, 3, 3, 2)),
-            (at.ones_like, np.ones_like, (5, 5, 5, 6)),
+            (pt.zeros_like, np.zeros_like, (1, 3, 3, 2)),
+            (pt.ones_like, np.ones_like, (5, 5, 5, 6)),
         ]:
             # test sum
             f = function([a], t_like(a).sum(None), mode=mode)
@@ -2964,14 +2962,14 @@ def test_local_sum_prod_alloc(self):
                 utt.assert_allclose(f(input), n_like(input).sum(d))
                 assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
                 topo = f.maker.fgraph.toposort()
-                assert topo[-1].op == at.alloc
+                assert topo[-1].op == pt.alloc
                 assert not any(isinstance(node.op, Sum) for node in topo)
             for i in range(3):
                 f = function([a], t_like(a).sum(i), mode=mode)
                 utt.assert_allclose(f(input), n_like(input).sum(i))
                 assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
                 topo = f.maker.fgraph.toposort()
-                assert topo[-1].op == at.alloc
+                assert topo[-1].op == pt.alloc
                 assert not any(isinstance(node.op, Sum) for node in topo)
 
             # test prod
@@ -2988,14 +2986,14 @@ def test_local_sum_prod_alloc(self):
                 utt.assert_allclose(f(input), n_like(input).prod(d))
                 # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[1]
                 topo = f.maker.fgraph.toposort()
-                assert topo[-1].op == at.alloc
+                assert topo[-1].op == pt.alloc
                 assert not any(isinstance(node.op, Prod) for node in topo)
             for i in range(3):
                 f = function([a], t_like(a).prod(i), mode=mode)
                 utt.assert_allclose(f(input), n_like(input).prod(i))
                 # assert len(f.maker.fgraph.apply_nodes) == nb_nodes[2]
                 topo = f.maker.fgraph.toposort()
-                assert topo[-1].op == at.alloc
+                assert topo[-1].op == pt.alloc
                 assert not any(isinstance(node.op, Prod) for node in topo)
 
             for d, dd in [(0, 0), (1, 0), (2, 0), (0, 1), (1, 1), (2, 1)]:
@@ -3003,7 +3001,7 @@ def test_local_sum_prod_alloc(self):
                 utt.assert_allclose(f(input), n_like(input).sum(d).sum(dd))
                 assert len(f.maker.fgraph.apply_nodes) == nb_nodes[3]
                 topo = f.maker.fgraph.toposort()
-                assert topo[-1].op == at.alloc
+                assert topo[-1].op == pt.alloc
                 assert not any(isinstance(node.op, Sum) for node in topo)
 
     def test_local_sum_sum_int8(self):
@@ -3041,10 +3039,10 @@ def test_local_sum_prod_mul_by_scalar_stack_trace(self):
         mat = dmatrix()
         ds = dscalar()
 
-        f = function([vect, ds], at_sum(vect * ds), mode=m0)
+        f = function([vect, ds], pt_sum(vect * ds), mode=m0)
         assert check_stack_trace(f, ops_to_check="all")
 
-        f = function([vect], at_sum(-vect), mode=m0)
+        f = function([vect], pt_sum(-vect), mode=m0)
         assert check_stack_trace(f, ops_to_check=[Sum])
 
         f = function([vect, ds], Prod()(vect * ds), mode=m0)
@@ -3053,10 +3051,10 @@ def test_local_sum_prod_mul_by_scalar_stack_trace(self):
         f = function([vect], Prod()(-vect), mode=m0)
         assert check_stack_trace(f, ops_to_check=[Prod])
 
-        f = function([mat, ds], at_sum(mat * ds), mode=m0)
+        f = function([mat, ds], pt_sum(mat * ds), mode=m0)
         assert check_stack_trace(f, ops_to_check="all")
 
-        f = function([mat], at_sum(-mat), mode=m0)
+        f = function([mat], pt_sum(-mat), mode=m0)
         assert check_stack_trace(f, ops_to_check=[Sum])
 
     def test_local_sum_of_div(self):
@@ -3064,7 +3062,7 @@ def test_local_sum_of_div(self):
         b = vector("b")
         c = tensor3("c")
         d = scalar("d")
-        sum = at_sum
+        sum = pt_sum
         sums = [
             sum(a / d),
             sum(a / d.dimshuffle("x", "x")),
@@ -3109,7 +3107,7 @@ def test_local_sum_of_div(self):
         for i, s in enumerate(sums):
             f = function([a, b, c, d], s, mode=self.mode, on_unused_input="ignore")
             g = f.maker.fgraph.toposort()
-            assert isinstance(g[-1].op.scalar_op, aes.basic.TrueDiv)
+            assert isinstance(g[-1].op.scalar_op, ps.basic.TrueDiv)
             f(a_val, b_val, c_val, d_val)
 
     def test_local_prod_of_div(self):
@@ -3197,14 +3195,14 @@ def test_local_prod_of_div(self):
         ]
 
         expected_outer_operator = [
-            aes.basic.Mul,
-            aes.basic.Composite,
-            aes.basic.Composite,
-            aes.basic.TrueDiv,
-            aes.basic.Composite,
-            aes.basic.Mul,
-            aes.basic.Composite,
-            aes.basic.Mul,
+            ps.basic.Mul,
+            ps.basic.Composite,
+            ps.basic.Composite,
+            ps.basic.TrueDiv,
+            ps.basic.Composite,
+            ps.basic.Mul,
+            ps.basic.Composite,
+            ps.basic.Mul,
         ]
 
         for i, s in enumerate(prods):
@@ -3224,12 +3222,12 @@ def setup_method(self):
 
     def test_local_reduce_broadcast_all_0(self):
         for fct in [
-            at_sum,
-            at_all,
-            at_any,
+            pt_sum,
+            pt_all,
+            pt_any,
             prod,
-            at_max,
-            at_min,
+            pt_max,
+            pt_min,
         ]:
             x = TensorType("int64", shape=(1, 1, 1))()
             f = function([x], [fct(x)], mode=self.mode)
@@ -3239,12 +3237,12 @@ def test_local_reduce_broadcast_all_0(self):
 
     def test_local_reduce_broadcast_all_1(self):
         for fct in [
-            at_sum,
-            at_all,
-            at_any,
+            pt_sum,
+            pt_all,
+            pt_any,
             prod,
-            at_max,
-            at_min,
+            pt_max,
+            pt_min,
         ]:
             x = TensorType("int64", shape=(1, 1))()
             f = function([x], [fct(x, axis=[0, 1])], mode=self.mode)
@@ -3254,12 +3252,12 @@ def test_local_reduce_broadcast_all_1(self):
 
     def test_local_reduce_broadcast_some_0(self):
         for fct in [
-            at_sum,
-            at_all,
-            at_any,
+            pt_sum,
+            pt_all,
+            pt_any,
             prod,
-            at_max,
-            at_min,
+            pt_max,
+            pt_min,
         ]:
             x = TensorType("int64", shape=(1, None, 1))()
             f = function([x], [fct(x, axis=[0, 1])], mode=self.mode)
@@ -3279,12 +3277,12 @@ def test_local_reduce_broadcast_some_0(self):
 
     def test_local_reduce_broadcast_some_1(self):
         for fct in [
-            at_sum,
-            at_all,
-            at_any,
+            pt_sum,
+            pt_all,
+            pt_any,
             prod,
-            at_max,
-            at_min,
+            pt_max,
+            pt_min,
         ]:
             x = TensorType("int64", shape=(1, 1, 1))()
             f = function([x], [fct(x, axis=[0, 2])], mode=self.mode)
@@ -3301,9 +3299,9 @@ def test_local_reduce_join(self):
         z = np.asarray([[5, 0], [1, 2]], dtype=config.floatX)
         # Test different reduction scalar operation
         for out, res in [
-            (at_max((vx, vy), 0), np.max((x, y), 0)),
-            (at_min((vx, vy), 0), np.min((x, y), 0)),
-            (at_sum((vx, vy, vz), 0), np.sum((x, y, z), 0)),
+            (pt_max((vx, vy), 0), np.max((x, y), 0)),
+            (pt_min((vx, vy), 0), np.min((x, y), 0)),
+            (pt_sum((vx, vy, vz), 0), np.sum((x, y, z), 0)),
             (prod((vx, vy, vz), 0), np.prod((x, y, z), 0)),
             (prod((vx, vy.T, vz), 0), np.prod((x, y.T, z), 0)),
         ]:
@@ -3318,13 +3316,13 @@ def test_local_reduce_join(self):
         # on 32 bit systems
         A = shared(np.array([1, 2, 3, 4, 5], dtype="int64"))
 
-        f = function([], at_sum(at.stack([A, A]), axis=0), mode=self.mode)
+        f = function([], pt_sum(pt.stack([A, A]), axis=0), mode=self.mode)
         utt.assert_allclose(f(), [2, 4, 6, 8, 10])
         topo = f.maker.fgraph.toposort()
         assert isinstance(topo[-1].op, Elemwise)
 
         # Test a case that was bugged in a old PyTensor bug
-        f = function([], at_sum(at.stack([A, A]), axis=1), mode=self.mode)
+        f = function([], pt_sum(pt.stack([A, A]), axis=1), mode=self.mode)
 
         utt.assert_allclose(f(), [15, 15])
         topo = f.maker.fgraph.toposort()
@@ -3332,13 +3330,13 @@ def test_local_reduce_join(self):
 
         # This case could be rewritten
         A = shared(np.array([1, 2, 3, 4, 5]).reshape(5, 1))
-        f = function([], at_sum(at.concatenate((A, A), axis=1), axis=1), mode=self.mode)
+        f = function([], pt_sum(pt.concatenate((A, A), axis=1), axis=1), mode=self.mode)
         utt.assert_allclose(f(), [2, 4, 6, 8, 10])
         topo = f.maker.fgraph.toposort()
         assert not isinstance(topo[-1].op, Elemwise)
 
         A = shared(np.array([1, 2, 3, 4, 5]).reshape(5, 1))
-        f = function([], at_sum(at.concatenate((A, A), axis=1), axis=0), mode=self.mode)
+        f = function([], pt_sum(pt.concatenate((A, A), axis=1), axis=0), mode=self.mode)
         utt.assert_allclose(f(), [15, 15])
         topo = f.maker.fgraph.toposort()
         assert not isinstance(topo[-1].op, Elemwise)
@@ -3346,7 +3344,7 @@ def test_local_reduce_join(self):
         # Test that the rewrite does not crash in one case where it
         # is not applied.  Reported at
         # https://groups.google.com/d/topic/theano-users/EDgyCU00fFA/discussion
-        out = at_sum([vx, vy, vz], axis=None)
+        out = pt_sum([vx, vy, vz], axis=None)
         f = function([vx, vy, vz], out)
 
 
@@ -3355,14 +3353,14 @@ def test_local_useless_adds():
 
     # Test for all zeros
     a = scalar()
-    s = add(at.zeros_like(a))
+    s = add(pt.zeros_like(a))
     mode_with_rewrite = default_mode.including("canonicalization", "local_useless_fill")
     f = function([a], s, mode=mode_with_rewrite)
     assert not any(node.op == add for node in f.maker.fgraph.apply_nodes)
 
     # test of non-zero dimension
     a = vector()
-    s = add(at.zeros_like(a))
+    s = add(pt.zeros_like(a))
     mode_with_rewrite = default_mode.including(
         "canonicalization", "local_useless_elemwise"
     )
@@ -3371,7 +3369,7 @@ def test_local_useless_adds():
 
     # test of 0-d
     a = scalar()
-    s = add(at.zeros_like(a))
+    s = add(pt.zeros_like(a))
     mode_with_rewrite = default_mode.including(
         "canonicalization", "local_useless_fill", "local_useless_elemwise"
     )
@@ -3379,8 +3377,8 @@ def test_local_useless_adds():
     assert not any(node.op == add for node in f.maker.fgraph.apply_nodes)
 
     # Test when the 0 input is forcing upcasting
-    a = at.constant(0, dtype="int64")
-    b = at.constant(1, dtype="int32")
+    a = pt.constant(0, dtype="int64")
+    b = pt.constant(1, dtype="int32")
     s = a + b
     mode_with_rewrite = default_mode.including(
         "canonicalization", "local_add_canonizer"
@@ -3396,7 +3394,7 @@ def test_local_div_to_reciprocal():
     num_len_s = lscalar("num_len")
     denom_s = scalar("denom")
 
-    num_v = at.alloc(1, num_len_s)
+    num_v = pt.alloc(1, num_len_s)
     denom_m = denom_s.dimshuffle("x", "x")
 
     out = num_v / denom_m
@@ -3425,7 +3423,7 @@ def test_remove_floor(self):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, aes.IntDiv)
+            and isinstance(node.op.scalar_op, ps.IntDiv)
         ]
         assert len(divs) == 0
 
@@ -3439,7 +3437,7 @@ def test2(self):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, aes.IntDiv)
+            and isinstance(node.op.scalar_op, ps.IntDiv)
         ]
         assert len(divs) == 0
 
@@ -3453,7 +3451,7 @@ def test3(self):
             node
             for node in graph
             if isinstance(node.op, Elemwise)
-            and isinstance(node.op.scalar_op, aes.IntDiv)
+            and isinstance(node.op.scalar_op, ps.IntDiv)
         ]
         assert len(divs) == 0
 
@@ -3537,8 +3535,8 @@ def test_local_mul_exp_to_exp_add():
     utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 4 + 5 + 6))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Add) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Add) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
     # e^x * e^y * e^z / e^w = e^(x+y+z-w)
     op = expx * expy * expz / expw
@@ -3546,10 +3544,10 @@ def test_local_mul_exp_to_exp_add():
     utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 4 + 5 - 6))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Add) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Sub) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.TrueDiv) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Add) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Sub) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.TrueDiv) for n in graph)
 
     # e^x * e^y / e^z * e^w = e^(x+y-z+w)
     op = expx * expy / expz * expw
@@ -3557,10 +3555,10 @@ def test_local_mul_exp_to_exp_add():
     utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 4 - 5 + 6))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Add) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Sub) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.TrueDiv) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Add) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Sub) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.TrueDiv) for n in graph)
 
     # e^x / e^y / e^z = (e^x / e^y) / e^z = e^(x-y-z)
     op = expx / expy / expz
@@ -3568,8 +3566,8 @@ def test_local_mul_exp_to_exp_add():
     utt.assert_allclose(f(3, 4, 5), np.exp(3 - 4 - 5))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Sub) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.TrueDiv) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Sub) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.TrueDiv) for n in graph)
 
     # e^x * y * e^z * w = e^(x+z) * y * w
     op = expx * y * expz * w
@@ -3577,8 +3575,8 @@ def test_local_mul_exp_to_exp_add():
     utt.assert_allclose(f(3, 4, 5, 6), np.exp(3 + 5) * 4 * 6)
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Add) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Add) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
     # expect same for matrices as well
     mx = matrix("mx")
@@ -3589,8 +3587,8 @@ def test_local_mul_exp_to_exp_add():
     utt.assert_allclose(f(M1, M2), np.exp(M1 + M2))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Add) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Add) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
     # checking whether further rewrites can proceed after this one as one would expect
     # e^x * e^(-x) = e^(x-x) = e^0 = 1
@@ -3635,8 +3633,8 @@ def test_local_mul_pow_to_pow_add():
     utt.assert_allclose(f(3, 4, 5, 6), 2 ** (3 + 4 + 5 + 6))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert any(isinstance(n.op.scalar_op, aes.Add) for n in graph)
-    assert not any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert any(isinstance(n.op.scalar_op, ps.Add) for n in graph)
+    assert not any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
     # 2^x * a^y * 2^z * b^w * c^v * a^u * s * b^t = 2^(x+z) * a^(y+u) * b^(w+t) * c^v * s
     op = 2**x * a**y * 2**z * b**w * c**v * a**u * s * b**t
@@ -3647,9 +3645,9 @@ def test_local_mul_pow_to_pow_add():
     )
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert len([True for n in graph if isinstance(n.op.scalar_op, aes.Add)]) == 3
-    assert len([True for n in graph if isinstance(n.op.scalar_op, aes.Pow)]) == 4
-    assert any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert len([True for n in graph if isinstance(n.op.scalar_op, ps.Add)]) == 3
+    assert len([True for n in graph if isinstance(n.op.scalar_op, ps.Pow)]) == 4
+    assert any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
     # (2^x / 2^y) * (a^z / a^w) = 2^(x-y) * a^(z-w)
     op = 2**x / 2**y * (a**z / a**w)
@@ -3657,8 +3655,8 @@ def test_local_mul_pow_to_pow_add():
     utt.assert_allclose(f(3, 5, 6, 4, 7), 2 ** (3 - 5) * 7 ** (6 - 4))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert len([True for n in graph if isinstance(n.op.scalar_op, aes.Sub)]) == 2
-    assert any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert len([True for n in graph if isinstance(n.op.scalar_op, ps.Sub)]) == 2
+    assert any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
     # a^x * a^y * exp(z) * exp(w) = a^(x+y) * exp(z+w)
     op = a**x * a**y * exp(z) * exp(w)
@@ -3666,8 +3664,8 @@ def test_local_mul_pow_to_pow_add():
     utt.assert_allclose(f(3, 4, 5, 6, 2), 2 ** (3 + 4) * np.exp(5 + 6))
     graph = f.maker.fgraph.toposort()
     assert all(isinstance(n.op, Elemwise) for n in graph)
-    assert len([True for n in graph if isinstance(n.op.scalar_op, aes.Add)]) == 2
-    assert any(isinstance(n.op.scalar_op, aes.Mul) for n in graph)
+    assert len([True for n in graph if isinstance(n.op.scalar_op, ps.Add)]) == 2
+    assert any(isinstance(n.op.scalar_op, ps.Mul) for n in graph)
 
 
 def test_local_expm1():
@@ -3690,17 +3688,17 @@ def test_local_expm1():
     utt.assert_allclose(f_val, f_test(x_val))
 
     assert any(
-        isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, aes.basic.Expm1)
+        isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, ps.basic.Expm1)
         for n in f.maker.fgraph.toposort()
     )
 
     assert not any(
-        isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, aes.basic.Expm1)
+        isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, ps.basic.Expm1)
         for n in g.maker.fgraph.toposort()
     )
 
     assert not any(
-        isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, aes.basic.Expm1)
+        isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, ps.basic.Expm1)
         for n in h.maker.fgraph.toposort()
     )
 
@@ -3708,7 +3706,7 @@ def test_local_expm1():
     expect_rewrite = config.mode != "FAST_COMPILE"
     assert (
         any(
-            isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, aes.basic.Expm1)
+            isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, ps.basic.Expm1)
             for n in r.maker.fgraph.toposort()
         )
         == expect_rewrite
@@ -3716,7 +3714,7 @@ def test_local_expm1():
 
 
 def compile_graph_log_sum_exp(x, axis, dimshuffle_op=None):
-    sum_exp = at_sum(exp(x), axis=axis)
+    sum_exp = pt_sum(exp(x), axis=axis)
     if dimshuffle_op:
         sum_exp = dimshuffle_op(sum_exp)
     y = log(sum_exp)
@@ -3731,7 +3729,7 @@ def check_max_log_sum_exp(x, axis, dimshuffle_op=None):
     for node in fgraph:
         if (
             hasattr(node.op, "scalar_op")
-            and node.op.scalar_op == aes.basic.scalar_maximum
+            and node.op.scalar_op == ps.basic.scalar_maximum
         ):
             return
 
@@ -3806,7 +3804,7 @@ def test_local_log_sum_exp_inf():
 
 def test_local_reciprocal_1_plus_exp():
     x = vector("x")
-    y = at.reciprocal(1 + exp(x))
+    y = pt.reciprocal(1 + exp(x))
     z = rewrite_graph(y, include=["canonicalization", "stabilize", "specialize"])
     assert z.owner.op == sigmoid
 
@@ -3860,23 +3858,23 @@ def test_exp_over_1_plus_exp(self):
         f(data)
 
         # tests inv_1_plus_exp
-        f = pytensor.function([x], at.fill(x, 1.0) / (1 + exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, 1.0) / (1 + exp(-x)), mode=m)
         # todo: solve issue #4589 first
         # assert check_stack_trace(f, ops_to_check=sigmoid)
         assert [node.op for node in f.maker.fgraph.toposort()] == [sigmoid]
         f(data)
-        f = pytensor.function([x], at.fill(x, 1.0) / (2 + exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, 1.0) / (2 + exp(-x)), mode=m)
         assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
         f(data)
-        f = pytensor.function([x], at.fill(x, 1.0) / (1 - exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, 1.0) / (1 - exp(-x)), mode=m)
         assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
         f(data)
-        f = pytensor.function([x], at.fill(x, 1.1) / (1 + exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, 1.1) / (1 + exp(-x)), mode=m)
         assert [node.op for node in f.maker.fgraph.toposort()] != [sigmoid]
         f(data)
 
         # tests inv_1_plus_exp with neg
-        f = pytensor.function([x], at.fill(x, -1.0) / (1 + exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, -1.0) / (1 + exp(-x)), mode=m)
         # todo: solve issue #4589 first
         # assert check_stack_trace(
         #     f, ops_to_check=[sigmoid, neg_inplace])
@@ -3885,19 +3883,19 @@ def test_exp_over_1_plus_exp(self):
             inplace.neg_inplace,
         ]
         f(data)
-        f = pytensor.function([x], at.fill(x, -1.0) / (1 - exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, -1.0) / (1 - exp(-x)), mode=m)
         assert [node.op for node in f.maker.fgraph.toposort()] != [
             sigmoid,
             inplace.neg_inplace,
         ]
         f(data)
-        f = pytensor.function([x], at.fill(x, -1.0) / (2 + exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, -1.0) / (2 + exp(-x)), mode=m)
         assert [node.op for node in f.maker.fgraph.toposort()] != [
             sigmoid,
             inplace.neg_inplace,
         ]
         f(data)
-        f = pytensor.function([x], at.fill(x, -1.1) / (1 + exp(-x)), mode=m)
+        f = pytensor.function([x], pt.fill(x, -1.1) / (1 + exp(-x)), mode=m)
         assert [node.op for node in f.maker.fgraph.toposort()] != [
             sigmoid,
             inplace.neg_inplace,
@@ -3910,7 +3908,7 @@ def test_exp_over_1_plus_exp(self):
         # = - (sigm(x) * sigm(x))
         f = pytensor.function(
             [x],
-            (at.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
+            (pt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
             mode=m,
         )
         # todo: solve issue #4589 first
@@ -3919,7 +3917,7 @@ def test_exp_over_1_plus_exp(self):
         f(data)
         f = pytensor.function(
             [x],
-            (at.fill(x, -1.1) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
+            (pt.fill(x, -1.1) * exp(x)) / ((1 + exp(x)) * (1 + exp(-x))),
             mode=m,
         )
         assert [node.op for node in f.maker.fgraph.toposort()] != [
@@ -3930,7 +3928,7 @@ def test_exp_over_1_plus_exp(self):
         f(data)
         f = pytensor.function(
             [x],
-            (at.fill(x, -1.0) * exp(x)) / ((2 + exp(x)) * (1 + exp(-x))),
+            (pt.fill(x, -1.0) * exp(x)) / ((2 + exp(x)) * (1 + exp(-x))),
             mode=m,
         )
         assert [node.op for node in f.maker.fgraph.toposort()] != [
@@ -3941,7 +3939,7 @@ def test_exp_over_1_plus_exp(self):
         f(data)
         f = pytensor.function(
             [x],
-            (at.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
+            (pt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
             mode=m,
         )
         assert [node.op for node in f.maker.fgraph.toposort()] != [
@@ -3952,7 +3950,7 @@ def test_exp_over_1_plus_exp(self):
         f(data)
         f = pytensor.function(
             [x],
-            (at.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(x))),
+            (pt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (1 + exp(x))),
             mode=m,
         )
         assert [node.op for node in f.maker.fgraph.toposort()] != [
@@ -3963,7 +3961,7 @@ def test_exp_over_1_plus_exp(self):
         f(data)
         f = pytensor.function(
             [x],
-            (at.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
+            (pt.fill(x, -1.0) * exp(x)) / ((1 + exp(x)) * (2 + exp(-x))),
             mode=m,
         )
         assert [node.op for node in f.maker.fgraph.toposort()] != [
@@ -3985,7 +3983,7 @@ def test_local_1msigmoid(self):
         assert [node.op for node in f.maker.fgraph.toposort()] == [neg, sigmoid]
 
         # Test `inv_1_plus_exp`
-        f = pytensor.function([x], 1 - at.fill(x, 1.0) / (1 + exp(-x)), mode=m)
+        f = pytensor.function([x], 1 - pt.fill(x, 1.0) / (1 + exp(-x)), mode=m)
         # assert check_stack_trace(f, ops_to_check=[neg, sigmoid])
         assert [node.op for node in f.maker.fgraph.toposort()] == [neg, sigmoid]
 
@@ -4149,13 +4147,13 @@ def test_log1msigm_to_softplus(self):
         assert isinstance(topo[1].op.scalar_op, pytensor.scalar.Neg)
 
         # Same test with a flatten
-        out = log(1 - at.flatten(sigmoid(x)))
+        out = log(1 - pt.flatten(sigmoid(x)))
         f = pytensor.function([x], out, mode=self.m)
 
         # assert check_stack_trace(f, ops_to_check='all')
         topo = f.maker.fgraph.toposort()
         assert len(topo) == 3
-        assert at.is_flat(topo[0].outputs[0])
+        assert pt.is_flat(topo[0].outputs[0])
         assert isinstance(topo[1].op.scalar_op, pytensor.scalar.Softplus)
         assert isinstance(topo[2].op.scalar_op, pytensor.scalar.Neg)
         f(np.random.random((54, 11)).astype(config.floatX))
@@ -4292,7 +4290,7 @@ def test_local_sub_neg_to_add():
         for node in f.maker.fgraph.toposort()
         if not isinstance(node.op, DimShuffle)
     ]
-    assert nodes == [at.add]
+    assert nodes == [pt.add]
 
     x_test = np.full((), 1.0, dtype=config.floatX)
     y_test = np.full(5, 2.0, dtype=config.floatX)
@@ -4311,7 +4309,7 @@ def test_local_sub_neg_to_add_const():
         for node in f.maker.fgraph.toposort()
         if not isinstance(node.op, DimShuffle)
     ]
-    assert nodes == [at.add]
+    assert nodes == [pt.add]
 
     x_test = np.array([3, 4], dtype=config.floatX)
     assert np.allclose(f(x_test), x_test - (-const))
@@ -4330,7 +4328,7 @@ def test_local_add_neg_to_sub(first_negative):
         for node in f.maker.fgraph.toposort()
         if not isinstance(node.op, DimShuffle)
     ]
-    assert nodes == [at.sub]
+    assert nodes == [pt.sub]
 
     x_test = np.full((), 1.0, dtype=config.floatX)
     y_test = np.full(5, 2.0, dtype=config.floatX)
@@ -4349,7 +4347,7 @@ def test_local_add_neg_to_sub_const():
         for node in f.maker.fgraph.toposort()
         if not isinstance(node.op, DimShuffle)
     ]
-    assert nodes == [at.sub]
+    assert nodes == [pt.sub]
 
     x_test = np.array([3, 4], dtype=config.floatX)
     assert np.allclose(f(x_test), x_test + (-const))
@@ -4362,7 +4360,7 @@ def test_log1mexp_stabilization():
     f = function([x], log(1 - exp(x)), mode=mode)
 
     nodes = [node.op for node in f.maker.fgraph.toposort()]
-    assert nodes == [at.log1mexp]
+    assert nodes == [pt.log1mexp]
 
     # Check values that would under or overflow without rewriting
     assert f([-(2.0**-55)]) != -np.inf
@@ -4391,7 +4389,7 @@ def test_logdiffexp():
                 node
                 for node in graph
                 if isinstance(node.op, Elemwise)
-                and isinstance(node.op.scalar_op, (aes.Exp, aes.Log))
+                and isinstance(node.op.scalar_op, (ps.Exp, ps.Log))
             ]
         )
         == 0
@@ -4402,7 +4400,7 @@ def test_logdiffexp():
                 node
                 for node in graph
                 if isinstance(node.op, Elemwise)
-                and isinstance(node.op.scalar_op, aes.Log1mexp)
+                and isinstance(node.op.scalar_op, ps.Log1mexp)
             ]
         )
         == 1
diff --git a/tests/tensor/rewriting/test_shape.py b/tests/tensor/rewriting/test_shape.py
index 604d9943b3..c0fd7513b3 100644
--- a/tests/tensor/rewriting/test_shape.py
+++ b/tests/tensor/rewriting/test_shape.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import shared
 from pytensor.compile.function import function
 from pytensor.compile.mode import get_default_mode, get_mode
@@ -104,7 +104,7 @@ def last_pool(im_shp, p_shp, p_strd):
         last_pool_c = last_pool(img_shp, pool_shp, pool_stride) * pool_stride
         required_c = last_pool_c + pool_shp
 
-        wide_infinity = at.alloc(
+        wide_infinity = pt.alloc(
             -np.inf, c01b.shape[0], required_r, required_c, c01b.shape[3]
         )
 
@@ -143,9 +143,9 @@ def test_constant_merge(self):
         # This test the error in gh-1122 that is a caused by the
         # combination of merge rewriter and ShapeFeature.
 
-        x = at.constant([0, 0])
+        x = pt.constant([0, 0])
         y = x[1:]
-        x1 = x - at.join(0, y, y)
+        x1 = x - pt.join(0, y, y)
         x1.eval()
 
     def test_local_track_shape_i(self):
@@ -264,7 +264,7 @@ def setup_method(self):
     def test_0(self):
         mode = get_default_mode().including("local_useless_reshape")
         i = iscalar("i")
-        m = at.mgrid[0:i,]
+        m = pt.mgrid[0:i,]
         f = function([i], m, mode=mode)
         topo = f.maker.fgraph.toposort()
         assert not any(isinstance(n.op, Reshape) for n in topo)
@@ -399,7 +399,7 @@ def test_infer_shape(self):
 class TestSameShape:
     def test_scalar(self):
         x = scalar()
-        cst = at.constant(1)
+        cst = pt.constant(1)
         o = x + cst
         fgraph = FunctionGraph([x], [o], clone=False)
         shape_feature = ShapeFeature()
@@ -408,7 +408,7 @@ def test_scalar(self):
 
     def test_vector(self):
         x = vector()
-        cst = at.constant(1)
+        cst = pt.constant(1)
         o = x + cst
         fgraph = FunctionGraph([x], [o], clone=False)
         shape_feature = ShapeFeature()
@@ -435,8 +435,8 @@ def test_no_static_shapes(self):
         [2, None],
     )
     def test_vector_dim(self, y_dim_0):
-        x = at.tensor(dtype="floatX", shape=(2, None))
-        y = at.tensor(dtype="floatX", shape=(y_dim_0, None))
+        x = pt.tensor(dtype="floatX", shape=(2, None))
+        y = pt.tensor(dtype="floatX", shape=(y_dim_0, None))
         o = x + y
         fgraph = FunctionGraph([x, y], [o], clone=False)
         shape_feature = ShapeFeature()
diff --git a/tests/tensor/rewriting/test_subtensor.py b/tests/tensor/rewriting/test_subtensor.py
index b77cdbe315..f855a06194 100644
--- a/tests/tensor/rewriting/test_subtensor.py
+++ b/tests/tensor/rewriting/test_subtensor.py
@@ -2,8 +2,8 @@
 import pytest
 
 import pytensor
-import pytensor.scalar as aes
-import pytensor.tensor as at
+import pytensor.scalar as ps
+import pytensor.tensor as pt
 from pytensor import shared
 from pytensor.compile.function import function
 from pytensor.compile.mode import Mode, get_default_mode, get_mode
@@ -99,22 +99,22 @@ def test_local_replace_AdvancedSubtensor(indices, is_none):
 
     Y = X[indices]
 
-    res_at = local_replace_AdvancedSubtensor.transform(None, Y.owner)
+    res_pt = local_replace_AdvancedSubtensor.transform(None, Y.owner)
 
     if is_none:
-        assert res_at is None
+        assert res_pt is None
     else:
-        (res_at,) = res_at
+        (res_pt,) = res_pt
 
         assert not any(
             isinstance(v.owner.op, AdvancedSubtensor)
-            for v in ancestors([res_at])
+            for v in ancestors([res_pt])
             if v.owner
         )
 
         inputs = [X] + [i for i in indices if isinstance(i, Variable)]
 
-        res_fn = function(inputs, res_at, mode=Mode("py", None, None))
+        res_fn = function(inputs, res_pt, mode=Mode("py", None, None))
         exp_res_fn = function(inputs, Y, mode=Mode("py", None, None))
 
         # Make sure that the expected result graph has an `AdvancedSubtensor`
@@ -161,7 +161,7 @@ def test_local_useless_inc_subtensor_increment_zeros():
     r"""Make sure we remove `IncSubtensor`\s that are increments on entire zero arrays."""
     y = matrix("y")
 
-    s = at.zeros((2, 2))[:, :]
+    s = pt.zeros((2, 2))[:, :]
     o_shape = inc_subtensor(s, specify_shape(y, s.shape))
 
     mode = get_default_mode().including("local_useless_inc_subtensor")
@@ -198,7 +198,7 @@ def test_local_useless_inc_subtensor_no_opt():
     assert any(isinstance(n.op, IncSubtensor) for n in topo)
 
     # This is an increment with a non-zero target array
-    s = at.ones((2, 2))[:, :]
+    s = pt.ones((2, 2))[:, :]
     o_shape = inc_subtensor(s, specify_shape(y, s.shape))
 
     f_shape = function([y], o_shape, mode=mode)
@@ -209,7 +209,7 @@ def test_local_useless_inc_subtensor_no_opt():
 
 class TestLocalUselessSubtensor:
     x = matrix("x")
-    s = aes.int32("s")
+    s = ps.int32("s")
     mode = mode_opt.including(
         "local_useless_subtensor", "local_useless_AdvancedSubtensor1"
     )
@@ -308,7 +308,7 @@ def test_local_useless_subtensor_2(self, idx, res):
                 lambda x: (
                     slice(
                         0,
-                        at.scalar_from_tensor(x.shape[0])
+                        pt.scalar_from_tensor(x.shape[0])
                         if isinstance(x, Variable)
                         else x.shape[0],
                     ),
@@ -395,11 +395,11 @@ def test_local_useless_subtensor_5(self, idx_fn, res):
             ([1, 0], False),
             ([0, 0], False),
             ([0, 0, 1], False),
-            (at.arange(2), True),
-            (at.arange(0, 2), True),
-            (at.arange(0, 2, 2), False),
-            (at.arange(0, 2, -1), False),
-            (at.arange(1, 2), False),
+            (pt.arange(2), True),
+            (pt.arange(0, 2), True),
+            (pt.arange(0, 2, 2), False),
+            (pt.arange(0, 2, -1), False),
+            (pt.arange(1, 2), False),
         ],
     )
     def test_local_useless_subtensor_6(self, idx, res):
@@ -684,7 +684,7 @@ def test_scalar_idx(self):
     def test_idx_symbolic(self):
         x, y, z = iscalars("xyz")
         v = MakeVector("int32")(x, y, z)
-        idx = at.as_tensor([0], dtype=np.int64)
+        idx = pt.as_tensor([0], dtype=np.int64)
         f = function([x, y, z], v[idx], mode=self.mode)
 
         opt_fgraph = f.maker.fgraph
@@ -821,7 +821,7 @@ def test_basic_2(self):
         assert isinstance(prog[0].op, Subtensor)
         assert isinstance(prog[1].op, DimShuffle)
         assert isinstance(prog[2].op, Subtensor)
-        assert isinstance(prog[3].op.scalar_op, aes.Composite)  # Composite{add,add}
+        assert isinstance(prog[3].op.scalar_op, ps.Composite)  # Composite{add,add}
         assert len(prog) == 4
 
         # Check stacktrace was copied over correctly after opt was applied
@@ -841,7 +841,7 @@ def test_basic_3(self):
         assert isinstance(prog[0].op, Subtensor)
         assert isinstance(prog[1].op, DimShuffle)
         assert isinstance(prog[2].op, Subtensor)
-        assert isinstance(prog[3].op.scalar_op, aes.Composite)  # Composite{add,add}
+        assert isinstance(prog[3].op.scalar_op, ps.Composite)  # Composite{add,add}
         assert len(prog) == 4
 
         # Check stacktrace was copied over correctly after opt was applied
@@ -898,7 +898,7 @@ def test_basic_6(self):
 
         prog = f.maker.fgraph.toposort()
         assert isinstance(prog[0].op, DimShuffle)
-        assert isinstance(prog[1].op.scalar_op, aes.Composite)  # Composite{add,exp}
+        assert isinstance(prog[1].op.scalar_op, ps.Composite)  # Composite{add,exp}
         # first subtensor
         assert isinstance(prog[2].op, Subtensor)
         assert len(prog) == 3
@@ -919,7 +919,7 @@ def test_basic_7(self):
         prog = f.maker.fgraph.toposort()
         assert isinstance(prog[0].op, Subtensor)
         # Composite{add,exp}
-        assert isinstance(prog[1].op.scalar_op, aes.Composite)
+        assert isinstance(prog[1].op.scalar_op, ps.Composite)
         assert len(prog) == 2
         f([1, 2, 3], 4)  # let debugmode test something
 
@@ -1572,7 +1572,7 @@ def test_stack_trace(self):
 
         for y, out in zip(ys, outs):
             f = function([x, y, idx], out, self.mode)
-            assert check_stack_trace(f, ops_to_check=(Assert, aes.Cast))
+            assert check_stack_trace(f, ops_to_check=(Assert, ps.Cast))
 
 
 class TestSubtensorAllocRewrites:
@@ -1587,8 +1587,8 @@ def setup_method(self):
     def test_setsubtensor_allocs0(self):
         x = matrix()
         y = matrix()
-        x0 = at.zeros_like(x)
-        y0 = at.zeros_like(y)
+        x0 = pt.zeros_like(x)
+        y0 = pt.zeros_like(y)
         z = set_subtensor(x0[:4], y0)
         f = function([x, y], z, mode=self.mode)
         assert all(
@@ -1597,8 +1597,8 @@ def test_setsubtensor_allocs0(self):
 
     def test_setsubtensor_allocs1(self):
         y = matrix()
-        x0 = at.constant(np.asarray(np.zeros((4, 4)), dtype=config.floatX))
-        y0 = at.zeros_like(y)
+        x0 = pt.constant(np.asarray(np.zeros((4, 4)), dtype=config.floatX))
+        y0 = pt.zeros_like(y)
         z = set_subtensor(x0[:4], y0)
         f = function([y], z, mode=self.mode)
         assert all(
@@ -1607,8 +1607,8 @@ def test_setsubtensor_allocs1(self):
 
     def test_setsubtensor_allocs1t(self):
         y = matrix()
-        x0 = at.constant(np.asarray(np.zeros((4, 4)), dtype=config.floatX))
-        y0 = at.zeros_like(y)
+        x0 = pt.constant(np.asarray(np.zeros((4, 4)), dtype=config.floatX))
+        y0 = pt.zeros_like(y)
         z = set_subtensor(x0[:4], y0.T)
         f = function([y], z, mode=mode_opt)
         assert all(
@@ -1617,8 +1617,8 @@ def test_setsubtensor_allocs1t(self):
 
     def test_setsubtensor_allocs2(self):
         x = matrix()
-        y0 = at.constant(np.asarray(np.zeros_like((4, 4)), dtype=config.floatX))
-        x0 = at.zeros_like(x)
+        y0 = pt.constant(np.asarray(np.zeros_like((4, 4)), dtype=config.floatX))
+        x0 = pt.zeros_like(x)
         z = set_subtensor(x0[:4], y0)
         f = function([x], z, mode=self.mode)
         assert all(
@@ -1628,7 +1628,7 @@ def test_setsubtensor_allocs2(self):
     def test_incsubtensor_allocs0(self):
         x = matrix()
         y = matrix()
-        y0 = at.zeros_like(y)
+        y0 = pt.zeros_like(y)
         z = inc_subtensor(x[:4], y0)
         f = function([x, y], z, mode=self.mode)
         assert all(
@@ -1638,7 +1638,7 @@ def test_incsubtensor_allocs0(self):
     def test_incsubtensor_allocs0t(self):
         x = matrix()
         y = matrix()
-        y0 = at.zeros_like(y)
+        y0 = pt.zeros_like(y)
         z = inc_subtensor(x[:4], y0.T)
         f = function([x, y], z, mode=mode_opt)
         assert all(
@@ -1647,7 +1647,7 @@ def test_incsubtensor_allocs0t(self):
 
     def test_incsubtensor_allocs1(self):
         x = matrix()
-        y0 = at.constant(np.asarray(np.zeros_like((4, 4)), dtype=config.floatX))
+        y0 = pt.constant(np.asarray(np.zeros_like((4, 4)), dtype=config.floatX))
         z = inc_subtensor(x[:4], y0)
         f = function([x], z, mode=self.mode)
         assert all(
@@ -1655,7 +1655,7 @@ def test_incsubtensor_allocs1(self):
         )
 
     def test_incsubtensor_x_zeros(self):
-        x = at.constant(np.asarray(np.zeros((4, 4)), dtype=config.floatX))
+        x = pt.constant(np.asarray(np.zeros((4, 4)), dtype=config.floatX))
         y = matrix()
         z = inc_subtensor(x[:4], y)
         f = function([y], z)
@@ -1672,7 +1672,7 @@ def test_incsubtensor_x_zeros(self):
         # also check the flag doesn't get set if first input is not zeros:
         not_all_zeros = np.zeros((4, 4))
         not_all_zeros[1, 0] = 0.001
-        x = at.constant(np.asarray(not_all_zeros, dtype=config.floatX))
+        x = pt.constant(np.asarray(not_all_zeros, dtype=config.floatX))
         y = matrix()
         z = inc_subtensor(x[:4], y)
         f = function([y], z)
@@ -1687,7 +1687,7 @@ def test_incsubtensor_x_zeros(self):
     def test_advancedincsubtensor1_allocs0(self):
         x = matrix()
         y = matrix()
-        y0 = at.zeros_like(y)
+        y0 = pt.zeros_like(y)
         z = inc_subtensor(x[[0, 1, 2, 3]], y0)
         f = function([x, y], z, mode=self.mode)
         assert all(
@@ -1698,7 +1698,7 @@ def test_advancedincsubtensor1_allocs0(self):
     def test_advancedincsubtensor1_allocs0t(self):
         x = matrix()
         y = matrix()
-        y0 = at.zeros_like(y)
+        y0 = pt.zeros_like(y)
         z = inc_subtensor(x[[0, 1, 2, 3]], y0.T)
         f = function([x, y], z, mode=mode_opt)
         assert all(
@@ -1708,7 +1708,7 @@ def test_advancedincsubtensor1_allocs0t(self):
 
     def test_advancedincsubtensor1_allocs1(self):
         x = matrix()
-        y0 = at.constant(np.asarray(np.zeros_like((4, 4)), dtype=config.floatX))
+        y0 = pt.constant(np.asarray(np.zeros_like((4, 4)), dtype=config.floatX))
         z = inc_subtensor(x[[0, 1, 2, 3]], y0)
         f = function([x], z, mode=self.mode)
         assert all(
@@ -1719,7 +1719,7 @@ def test_advancedincsubtensor1_allocs1(self):
     def test_advancedincsubtensor_allocs0(self):
         x = matrix()
         y = matrix()
-        y0 = at.zeros_like(y)
+        y0 = pt.zeros_like(y)
         z = inc_subtensor(x[[[0, 0], [1, 1]], [[0, 1], [0, 1]]], y0)
         f = function([x, y], z, mode=self.mode)
         assert all(
@@ -1730,7 +1730,7 @@ def test_advancedincsubtensor_allocs0(self):
     def test_advancedincsubtensor_allocs0t(self):
         x = matrix()
         y = matrix()
-        y0 = at.zeros_like(y)
+        y0 = pt.zeros_like(y)
         z = inc_subtensor(x[[[0, 0], [1, 1]], [[0, 1], [0, 1]]], y0.T)
         f = function([x, y], z, mode=mode_opt)
         assert all(
@@ -1740,7 +1740,7 @@ def test_advancedincsubtensor_allocs0t(self):
 
     def test_advancedincsubtensor_allocs1(self):
         x = matrix()
-        y0 = at.constant(np.asarray(np.zeros_like((2, 2)), dtype=config.floatX))
+        y0 = pt.constant(np.asarray(np.zeros_like((2, 2)), dtype=config.floatX))
         z = inc_subtensor(x[[[0, 0], [1, 1]], [[0, 1], [0, 1]]], y0)
         f = function([x], z, mode=self.mode)
         assert all(
@@ -1761,11 +1761,11 @@ def test_dot_allocs_0(self):
             for _e2 in [(v2, vv2, vv3), (m2, vm2, vm3)]:
                 for p in [0, 1]:
                     if p == 0:
-                        e1 = at.zeros_like(_e1[0])
+                        e1 = pt.zeros_like(_e1[0])
                         e2 = _e2[0]
                     else:
                         e1 = _e1[0]
-                        e2 = at.zeros_like(_e2[0])
+                        e2 = pt.zeros_like(_e2[0])
                     o = dot(e1, e2)
                     f = function([_e1[0], _e2[0]], o, mode=self.mode)
                     f(_e1[1], _e2[1])
@@ -1799,7 +1799,7 @@ def test_local_IncSubtensor_serialize():
     adds = [
         n
         for n in topo
-        if isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, aes.Add)
+        if isinstance(n.op, Elemwise) and isinstance(n.op.scalar_op, ps.Add)
     ]
     for a in adds:
         assert not any(
@@ -1881,9 +1881,9 @@ def test_local_subtensor_of_alloc():
         xval = np.zeros(s, dtype=config.floatX)
         yval = np.arange(s[1], dtype=config.floatX)
 
-        for y in [shared(yval), at.constant([1.0])]:
+        for y in [shared(yval), pt.constant([1.0])]:
             # The rows of yx are copies of y
-            yx = at.alloc(y, x.shape[0], x.shape[1])
+            yx = pt.alloc(y, x.shape[0], x.shape[1])
 
             # Slice of each row
             z_mat = yx[:, 3:]
@@ -2153,10 +2153,10 @@ def test_local_subtensor_SpecifyShape_lift_fail(x, s, idx):
     ],
 )
 def test_local_join_subtensors(axis, slices_fn, expected_nodes):
-    x = at.dmatrix("x")
-    slice_scalar = at.iscalar("slice_scalar")
+    x = pt.dmatrix("x")
+    slice_scalar = pt.iscalar("slice_scalar")
     slices = slices_fn(slice_scalar)
-    y = at.concatenate([x[slice] for slice in slices], axis=axis)
+    y = pt.concatenate([x[slice] for slice in slices], axis=axis)
     f = pytensor.function(
         [x, slice_scalar],
         y,
@@ -2179,8 +2179,8 @@ def test_local_uint_constant_indices():
     rng = np.random.default_rng(20900)
 
     # Subtensor, don't convert
-    x = at.vector("x")
-    idx = at.as_tensor_variable(np.array(-1, np.int64))
+    x = pt.vector("x")
+    idx = pt.as_tensor_variable(np.array(-1, np.int64))
     z = x[idx]
 
     z_fn = pytensor.function([x], z, mode=mode)
@@ -2193,8 +2193,8 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "int64"
 
     # `Subtensor`, one index, convert
-    x = at.vector("x")
-    idx = at.as_tensor_variable(np.array(1, np.int64))
+    x = pt.vector("x")
+    idx = pt.as_tensor_variable(np.array(1, np.int64))
     z = x[idx]
 
     z_fn = pytensor.function([x], z, mode=mode)
@@ -2207,8 +2207,8 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # `Subtensor`, two indices, one slice, convert
-    x = at.matrix("x")
-    indices = (at.as_tensor_variable(np.array(1, np.int64)), slice(None, 10))
+    x = pt.matrix("x")
+    indices = (pt.as_tensor_variable(np.array(1, np.int64)), slice(None, 10))
     z = x[indices]
 
     z_fn = pytensor.function([x], z, mode=mode)
@@ -2221,9 +2221,9 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # `AdvancedSubtensor`, two indices, one symbolic slice, convert
-    x = at.matrix("x")
+    x = pt.matrix("x")
     indices = (
-        at.as_tensor_variable(np.array(1, np.int64)),
+        pt.as_tensor_variable(np.array(1, np.int64)),
         make_slice(slice(None, 10)),
     )
     z = x[indices]
@@ -2237,8 +2237,8 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # `AdvancedSubtensor1`, convert
-    x = at.vector("x")
-    idx = at.as_tensor_variable(rng.integers(0, 10, size=10).astype(np.int64))
+    x = pt.vector("x")
+    idx = pt.as_tensor_variable(rng.integers(0, 10, size=10).astype(np.int64))
     z = x[idx]
 
     z_fn = pytensor.function([x], z, mode=mode)
@@ -2250,8 +2250,8 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # AdvancedSubtensor, empty, convert
-    x = at.matrix("x")
-    idx = at.as_tensor_variable(1, dtype=np.int64)
+    x = pt.matrix("x")
+    idx = pt.as_tensor_variable(1, dtype=np.int64)
     z = x[idx, []]
 
     z_fn = pytensor.function([x], z, mode=mode)
@@ -2263,8 +2263,8 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # AdvancedSubtensor, bool, don't convert
-    x = at.matrix("x")
-    idx = at.as_tensor_variable(np.array([True]), dtype=bool)
+    x = pt.matrix("x")
+    idx = pt.as_tensor_variable(np.array([True]), dtype=bool)
     z = x[idx, []]
 
     z_fn = pytensor.function([x], z, mode=mode)
@@ -2276,9 +2276,9 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "bool"
 
     # `IncSubtensor`, convert
-    x = at.vector("x")
-    y = at.scalar("y")
-    idx = at.as_tensor_variable(1, dtype=np.int64)
+    x = pt.vector("x")
+    y = pt.scalar("y")
+    idx = pt.as_tensor_variable(1, dtype=np.int64)
     z = inc_subtensor(x[idx], y)
 
     z_fn = pytensor.function([x, y], z, mode=mode)
@@ -2290,9 +2290,9 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # `AdvancedIncSubtensor1`, convert
-    x = at.vector("x")
-    y = at.vector("y")
-    idx = at.as_tensor_variable(rng.integers(0, 10, size=10).astype(np.int64))
+    x = pt.vector("x")
+    y = pt.vector("y")
+    idx = pt.as_tensor_variable(rng.integers(0, 10, size=10).astype(np.int64))
     z = advanced_inc_subtensor1(x, y, idx)
 
     z_fn = pytensor.function([x, y], z, mode=mode)
@@ -2304,8 +2304,8 @@ def test_local_uint_constant_indices():
     assert new_index.type.dtype == "uint8"
 
     # `AdvancedIncSubtensor1`, convert
-    x = at.vector("x")
-    idx = at.as_tensor_variable(rng.integers(0, 10, size=10).astype(np.int64))
+    x = pt.vector("x")
+    idx = pt.as_tensor_variable(rng.integers(0, 10, size=10).astype(np.int64))
     z = x[idx, None]
 
     z_fn = pytensor.function([x], z, mode=mode)
diff --git a/tests/tensor/rewriting/test_uncanonicalize.py b/tests/tensor/rewriting/test_uncanonicalize.py
index 865da83137..a188613da6 100644
--- a/tests/tensor/rewriting/test_uncanonicalize.py
+++ b/tests/tensor/rewriting/test_uncanonicalize.py
@@ -1,18 +1,18 @@
 import numpy as np
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function
-from pytensor import scalar as aes
+from pytensor import scalar as ps
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.rewriting.basic import out2in
 from pytensor.link.basic import PerformLinker
 from pytensor.tensor.elemwise import CAReduce, DimShuffle, Elemwise
 from pytensor.tensor.math import MaxAndArgmax
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.math import max_and_argmax
-from pytensor.tensor.math import min as at_min
+from pytensor.tensor.math import min as pt_min
 from pytensor.tensor.rewriting.uncanonicalize import (
     local_alloc_dimshuffle,
     local_dimshuffle_alloc,
@@ -57,29 +57,29 @@ def test_optimization_max(self):
         n = matrix()
 
         for axis in [0, 1, -1]:
-            f = function([n], at_max(n, axis), mode=self.mode)
+            f = function([n], pt_max(n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 1
             assert isinstance(topo[0].op, CAReduce)
             f(data)
 
-            f = function([n], at_max(-n, axis), mode=self.mode)
+            f = function([n], pt_max(-n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, Elemwise)
-            assert isinstance(topo[0].op.scalar_op, aes.Neg)
+            assert isinstance(topo[0].op.scalar_op, ps.Neg)
             assert isinstance(topo[1].op, CAReduce)
             f(data)
 
-            f = function([n], -at_max(n, axis), mode=self.mode)
+            f = function([n], -pt_max(n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, CAReduce)
             assert isinstance(topo[1].op, Elemwise)
-            assert isinstance(topo[1].op.scalar_op, aes.Neg)
+            assert isinstance(topo[1].op.scalar_op, ps.Neg)
             f(data)
 
-            f = function([n], -at_max(-n, axis), mode=self.mode)
+            f = function([n], -pt_max(-n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 1
             assert isinstance(topo[0].op, CAReduce)  # min
@@ -90,30 +90,30 @@ def test_optimization_min(self):
         n = matrix()
 
         for axis in [0, 1, -1]:
-            f = function([n], at_min(n, axis), mode=self.mode)
+            f = function([n], pt_min(n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 1
             assert isinstance(topo[0].op, CAReduce)
             f(data)
 
             # test variant with neg to make sure we optimize correctly
-            f = function([n], at_min(-n, axis), mode=self.mode)
+            f = function([n], pt_min(-n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, CAReduce)  # max
             assert isinstance(topo[1].op, Elemwise)
-            assert isinstance(topo[1].op.scalar_op, aes.Neg)
+            assert isinstance(topo[1].op.scalar_op, ps.Neg)
             f(data)
 
-            f = function([n], -at_min(n, axis), mode=self.mode)
+            f = function([n], -pt_min(n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 2
             assert isinstance(topo[0].op, Elemwise)
-            assert isinstance(topo[0].op.scalar_op, aes.Neg)
+            assert isinstance(topo[0].op.scalar_op, ps.Neg)
             assert isinstance(topo[1].op, CAReduce)  # max
             f(data)
 
-            f = function([n], -at_min(-n, axis), mode=self.mode)
+            f = function([n], -pt_min(-n, axis), mode=self.mode)
             topo = f.maker.fgraph.toposort()
             assert len(topo) == 1
             assert isinstance(topo[0].op, CAReduce)  # max
@@ -127,7 +127,7 @@ def test_local_alloc_dimshuffle():
     m = iscalar("m")
 
     y = x.dimshuffle("x", 0)
-    out = at.alloc(y, m, 1, x.shape[0])
+    out = pt.alloc(y, m, 1, x.shape[0])
 
     g = FunctionGraph([x, m], [out])
     alloc_dimshuffle(g)
@@ -156,7 +156,7 @@ def test_local_dimshuffle_alloc():
 
     x = vector("x")
 
-    out = at.alloc(x, 3, 2).dimshuffle("x", "x", 0, 1)
+    out = pt.alloc(x, 3, 2).dimshuffle("x", "x", 0, 1)
 
     g = FunctionGraph([x], [out])
     reshape_dimshuffle(g)
diff --git a/tests/tensor/test_basic.py b/tests/tensor/test_basic.py
index 81dc14ef66..ba5e1cf648 100644
--- a/tests/tensor/test_basic.py
+++ b/tests/tensor/test_basic.py
@@ -6,9 +6,9 @@
 import pytest
 
 import pytensor
-import pytensor.scalar as aes
-import pytensor.tensor.basic as at
-import pytensor.tensor.math as tm
+import pytensor.scalar as ps
+import pytensor.tensor.basic as ptb
+import pytensor.tensor.math as ptm
 from pytensor import compile, config, function, shared
 from pytensor.compile.io import In, Out
 from pytensor.compile.mode import Mode, get_default_mode
@@ -92,7 +92,7 @@
 from pytensor.tensor.elemwise import DimShuffle
 from pytensor.tensor.exceptions import NotScalarConstantError
 from pytensor.tensor.math import dense_dot
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.shape import Reshape, Shape_i, shape_padright, specify_shape
 from pytensor.tensor.type import (
     TensorType,
@@ -507,7 +507,7 @@ def __init__(self, id, n_outs=1):
         self.n_outs = n_outs
 
     def make_node(self, x):
-        x = at.as_tensor_variable(x)
+        x = ptb.as_tensor_variable(x)
         return Apply(self, [x], [x.type() for _ in range(self.n_outs)])
 
     def perform(self, *args, **kwargs):
@@ -556,7 +556,7 @@ def setup_method(self):
         self.x = scalar("x")
 
     def test_tensor_from_scalar(self):
-        y = as_tensor_variable(aes.int8())
+        y = as_tensor_variable(ps.int8())
         assert isinstance(y.owner.op, TensorFromScalar)
 
     def test_default_output(self):
@@ -664,8 +664,8 @@ def test_empty_dtype(self, dtype):
         ("x", "y"),
         [
             ([1, 2], [1, 2]),
-            ([at.as_tensor(1), at.as_tensor(2)], [1, 2]),
-            ([aes.constant(1), aes.constant(2)], [1, 2]),
+            ([ptb.as_tensor(1), ptb.as_tensor(2)], [1, 2]),
+            ([ps.constant(1), ps.constant(2)], [1, 2]),
         ],
     )
     def test_constant_consistency(self, x, y):
@@ -689,14 +689,14 @@ def test_constant_identity(self):
 
     def test_make_vector(self):
         a = iscalar()
-        x = at.tile(a, (1, 1, 1))
+        x = ptb.tile(a, (1, 1, 1))
         y = (constant(1, dtype="int64"), x.shape[2])
-        res = at.as_tensor(y, ndim=1)
+        res = ptb.as_tensor(y, ndim=1)
         assert isinstance(res.owner.op, MakeVector)
         assert tuple(res.owner.inputs) == y
 
         y = (1, x.shape[2])
-        res = at.as_tensor(y)
+        res = ptb.as_tensor(y)
         assert isinstance(res.owner.op, MakeVector)
 
     def test_multi_out(self):
@@ -705,14 +705,14 @@ def make_node(self, a, b):
                 return Apply(self, [a, b], [a, b])
 
         with pytest.raises(TypeError):
-            at.as_tensor(TestOp(matrix(), matrix()))
+            ptb.as_tensor(TestOp(matrix(), matrix()))
 
     def test_masked_array_not_implemented(
         self,
     ):
         x = np.ma.masked_greater(np.array([1, 2, 3, 4]), 3)
         with pytest.raises(NotImplementedError, match="MaskedArrays are not supported"):
-            at.as_tensor(x)
+            ptb.as_tensor(x)
 
 
 class TestAlloc:
@@ -776,7 +776,7 @@ def test_alloc_constant_folding(self):
                 (some_matrix[idx, idx], 1),
             ],
         ):
-            derp = at_sum(dense_dot(subtensor, variables))
+            derp = pt_sum(dense_dot(subtensor, variables))
 
             fobj = pytensor.function([some_vector], derp, mode=self.mode)
             grad_derp = pytensor.grad(derp, some_vector)
@@ -806,54 +806,54 @@ def test_alloc_output(self):
 
     def test_ones(self):
         for shp in [[], 1, [1], [1, 2], [1, 2, 3], np.r_[1, 2, 3]]:
-            ones = pytensor.function([], [at.ones(shp)], mode=self.mode)
+            ones = pytensor.function([], [ptb.ones(shp)], mode=self.mode)
             assert np.allclose(ones(), np.ones(shp))
             # When shape is a TensorConstant
             ones_const = pytensor.function(
-                [], [at.ones(at.constant(shp))], mode=self.mode
+                [], [ptb.ones(ptb.constant(shp))], mode=self.mode
             )
             assert np.allclose(ones_const(), np.ones(shp))
 
         # scalar doesn't have to be provided as input
         x = scalar()
         shp = []
-        ones_scalar = pytensor.function([], [at.ones(x.shape)], mode=self.mode)
+        ones_scalar = pytensor.function([], [ptb.ones(x.shape)], mode=self.mode)
         assert np.allclose(ones_scalar(), np.ones(shp))
 
         for typ, shp in [(vector, [3]), (matrix, [3, 4])]:
             x = typ()
-            ones_tensor = pytensor.function([x], [at.ones(x.shape)], mode=self.mode)
+            ones_tensor = pytensor.function([x], [ptb.ones(x.shape)], mode=self.mode)
             inp = np.zeros(shp, dtype=config.floatX)
             assert np.allclose(ones_tensor(inp), np.ones(shp))
 
     def test_zeros(self):
         for shp in [[], 1, [1], [1, 2], [1, 2, 3], np.r_[1, 2, 3]]:
-            zeros = pytensor.function([], [at.zeros(shp)], mode=self.mode)
+            zeros = pytensor.function([], [ptb.zeros(shp)], mode=self.mode)
             assert np.allclose(zeros(), np.zeros(shp))
             # When shape is a TensorConstant
             zeros_const = pytensor.function(
-                [], [at.zeros(at.constant(shp))], mode=self.mode
+                [], [ptb.zeros(ptb.constant(shp))], mode=self.mode
             )
             assert np.allclose(zeros_const(), np.zeros(shp))
 
         # scalar doesn't have to be provided as input
         x = scalar()
         shp = []
-        zeros_scalar = pytensor.function([], [at.zeros(x.shape)], mode=self.mode)
+        zeros_scalar = pytensor.function([], [ptb.zeros(x.shape)], mode=self.mode)
         assert np.allclose(zeros_scalar(), np.zeros(shp))
 
         for typ, shp in [(vector, [3]), (matrix, [3, 4])]:
             x = typ()
-            zeros_tensor = pytensor.function([x], [at.zeros(x.shape)], mode=self.mode)
+            zeros_tensor = pytensor.function([x], [ptb.zeros(x.shape)], mode=self.mode)
             inp = np.zeros(shp, dtype=config.floatX)
             assert np.allclose(zeros_tensor(inp), np.zeros(shp))
 
     def test_full(self):
-        full_at = at.full((2, 3), 3, dtype="int64")
-        res = pytensor.function([], full_at, mode=self.mode)()
+        full_pt = ptb.full((2, 3), 3, dtype="int64")
+        res = pytensor.function([], full_pt, mode=self.mode)()
         assert np.array_equal(res, np.full((2, 3), 3, dtype="int64"))
 
-    @pytest.mark.parametrize("func", (at.zeros, at.empty))
+    @pytest.mark.parametrize("func", (ptb.zeros, ptb.empty))
     def test_rebuild(self, func):
         x = vector(shape=(50,))
         x_test = np.zeros((50,), dtype=config.floatX)
@@ -873,17 +873,17 @@ def test_static_shape(self):
         x = tensor(shape=(None, 1, 5))
         d0 = scalar("d0", dtype=int)
         d1 = scalar("d1", dtype=int)
-        assert at.alloc(x, 3, 1, 5).type.shape == (3, 1, 5)
-        assert at.alloc(x, 3, 4, 5).type.shape == (3, 4, 5)
-        assert at.alloc(x, d0, d1, 5).type.shape == (None, None, 5)
-        assert at.alloc(x, d0, 1, d1).type.shape == (None, 1, 5)
+        assert ptb.alloc(x, 3, 1, 5).type.shape == (3, 1, 5)
+        assert ptb.alloc(x, 3, 4, 5).type.shape == (3, 4, 5)
+        assert ptb.alloc(x, d0, d1, 5).type.shape == (None, None, 5)
+        assert ptb.alloc(x, d0, 1, d1).type.shape == (None, 1, 5)
 
         msg = "Alloc static input type and target shape are incompatible"
         with pytest.raises(ValueError, match=msg):
-            at.alloc(x, 3, 1, 1)
+            ptb.alloc(x, 3, 1, 1)
 
         with pytest.raises(ValueError, match=msg):
-            at.alloc(x, 3, 1, 6)
+            ptb.alloc(x, 3, 1, 6)
 
     def test_alloc_of_view_linker(self):
         """Check we can allocate a new array properly in the C linker when input is a view."""
@@ -1096,11 +1096,11 @@ def check(m):
             m_symb = tensor(dtype=m.dtype, shape=(None,) * m.ndim)
             m_symb.tag.test_value = m
 
-            res_tuple_at = nonzero(m_symb, return_matrix=False)
-            res_matrix_at = nonzero(m_symb, return_matrix=True)
+            res_tuple_pt = nonzero(m_symb, return_matrix=False)
+            res_matrix_pt = nonzero(m_symb, return_matrix=True)
 
-            res_tuple = tuple(r.tag.test_value for r in res_tuple_at)
-            res_matrix = res_matrix_at.tag.test_value
+            res_tuple = tuple(r.tag.test_value for r in res_tuple_pt)
+            res_matrix = res_matrix_pt.tag.test_value
 
             assert np.allclose(res_matrix, np.vstack(np.nonzero(m)))
 
@@ -1125,9 +1125,9 @@ def check(m):
             m_symb = tensor(dtype=m.dtype, shape=(None,) * m.ndim)
             m_symb.tag.test_value = m
 
-            res_at = flatnonzero(m_symb)
+            res_pt = flatnonzero(m_symb)
 
-            result = res_at.tag.test_value
+            result = res_pt.tag.test_value
             assert np.allclose(result, np.flatnonzero(m))
 
         rand0d = np.empty(())
@@ -1154,9 +1154,9 @@ def check(m):
             m_symb = tensor(dtype=m.dtype, shape=(None,) * m.ndim)
             m_symb.tag.test_value = m
 
-            res_at = nonzero_values(m_symb)
+            res_pt = nonzero_values(m_symb)
 
-            result = res_at.tag.test_value
+            result = res_pt.tag.test_value
             assert np.allclose(result, m[np.nonzero(m)], equal_nan=True)
 
         rand0d = np.empty(())
@@ -1244,7 +1244,7 @@ def test_cast_from_complex_to_real_raises_error(self, real_dtype, complex_dtype)
 
 def test_basic_allclose():
     # This was raised by a user in https://github.com/Theano/Theano/issues/2975
-    assert tm._allclose(-0.311023883434, -0.311022856884)
+    assert ptm._allclose(-0.311023883434, -0.311022856884)
 
 
 def test_get_vector_length():
@@ -1501,7 +1501,7 @@ def test_join_concatenate_one_element(self):
         # Fast test of concatenate as this is an alias for join.
         # also test that we remove the Join op if there is only 1 input
         m = fmatrix()
-        c = at.concatenate([m])
+        c = ptb.concatenate([m])
         f = pytensor.function(
             inputs=[m], outputs=[c], mode=self.mode.including("local_join_1")
         )
@@ -2020,7 +2020,7 @@ def test_rebroadcast(self):
         x = TensorType(self.floatX, shape=(None, None, 1))()
         u = TensorType(self.floatX, shape=(None, None, 1))()
         # This line used to crash.
-        at.concatenate([x, -u], axis=2)
+        ptb.concatenate([x, -u], axis=2)
 
     def test_concatenate_same(self):
         # Test that we can concatenate the same tensor multiple time.
@@ -2028,7 +2028,7 @@ def test_concatenate_same(self):
         # In the past it was broken on the GPU.
         rng = np.random.default_rng(seed=utt.fetch_seed())
         T_shared = self.shared(rng.random((3, 4)).astype(self.floatX))
-        Tout = at.concatenate([T_shared, T_shared])
+        Tout = ptb.concatenate([T_shared, T_shared])
         f = function([], Tout, mode=self.mode)
         out = f()
         if config.mode != "FAST_COMPILE":
@@ -2049,24 +2049,24 @@ def test_mixed_ndim_error(self):
             self.join_op(0, v, m)
 
     def test_static_shape_inference(self):
-        a = at.tensor(dtype="int8", shape=(2, 3))
-        b = at.tensor(dtype="int8", shape=(2, 5))
+        a = ptb.tensor(dtype="int8", shape=(2, 3))
+        b = ptb.tensor(dtype="int8", shape=(2, 5))
 
-        res = at.join(1, a, b).type.shape
+        res = ptb.join(1, a, b).type.shape
         assert res == (2, 8)
         assert all(isinstance(s, int) for s in res)
 
-        res = at.join(-1, a, b).type.shape
+        res = ptb.join(-1, a, b).type.shape
         assert res == (2, 8)
         assert all(isinstance(s, int) for s in res)
 
         # Check early informative errors from static shape info
         with pytest.raises(ValueError, match="must match exactly"):
-            at.join(0, at.ones((2, 3)), at.ones((2, 5)))
+            ptb.join(0, ptb.ones((2, 3)), ptb.ones((2, 5)))
 
         # Check partial inference
-        d = at.tensor(dtype="int8", shape=(2, None))
-        res = at.join(1, a, b, d).type.shape
+        d = ptb.tensor(dtype="int8", shape=(2, None))
+        res = ptb.join(1, a, b, d).type.shape
         assert res == (2, None)
         assert isinstance(res[0], int)
 
@@ -2111,7 +2111,7 @@ def test_join_inplace(self):
         # element.
         s = lscalar()
         x = vector("x")
-        z = at.zeros((s,))
+        z = ptb.zeros((s,))
 
         join = Join(view=0)
         c = join(0, x, z, z)
@@ -2134,8 +2134,8 @@ def test_join_oneInput(self):
         x_0 = fmatrix()
         x_1 = fmatrix()
         x_2 = fvector()
-        join_0 = at.concatenate([x_0], axis=1)
-        join_1 = at.concatenate([x_0, x_1, shape_padright(x_2)], axis=1)
+        join_0 = ptb.concatenate([x_0], axis=1)
+        join_1 = ptb.concatenate([x_0, x_1, shape_padright(x_2)], axis=1)
 
         assert join_0 is x_0
         assert join_1 is not x_0
@@ -2164,7 +2164,7 @@ def test_split_view(self, linker):
 
 
 def test_TensorFromScalar():
-    s = aes.constant(56)
+    s = ps.constant(56)
     t = tensor_from_scalar(s)
     assert t.owner.op is tensor_from_scalar
     assert t.type.shape == ()
@@ -2193,7 +2193,7 @@ def test_TensorFromScalar():
 )
 def test_ScalarFromTensor(cast_policy):
     with config.change_flags(cast_policy=cast_policy):
-        tc = constant(56)  # aes.constant(56)
+        tc = constant(56)  # ps.constant(56)
         ss = scalar_from_tensor(tc)
         assert ss.owner.op is scalar_from_tensor
         assert ss.type.dtype == tc.type.dtype
@@ -2208,10 +2208,10 @@ def test_ScalarFromTensor(cast_policy):
         elif cast_policy == "numpy+floatX":
             assert isinstance(v, np.int64)
 
-        aes = lscalar()
-        ss = scalar_from_tensor(aes)
-        ss.owner.op.grad([aes], [ss])
-        fff = function([aes], ss)
+        pts = lscalar()
+        ss = scalar_from_tensor(pts)
+        ss.owner.op.grad([pts], [ss])
+        fff = function([pts], ss)
         v = fff(np.asarray(5))
         assert v == 5
         assert isinstance(v, np.int64)
@@ -2348,26 +2348,26 @@ def test_is_flat():
     # given `ndim`
 
     # Constant variable
-    assert at.is_flat(at.as_tensor_variable(np.zeros(10)))
-    assert at.is_flat(at.as_tensor_variable(np.zeros((10, 10, 10))), ndim=3)
-    assert not at.is_flat(at.as_tensor_variable(np.zeros((10, 10, 10))))
+    assert ptb.is_flat(ptb.as_tensor_variable(np.zeros(10)))
+    assert ptb.is_flat(ptb.as_tensor_variable(np.zeros((10, 10, 10))), ndim=3)
+    assert not ptb.is_flat(ptb.as_tensor_variable(np.zeros((10, 10, 10))))
 
     # Symbolic variable
-    assert at.is_flat(vector())
-    assert at.is_flat(tensor3(), ndim=3)
-    assert not at.is_flat(tensor3())
+    assert ptb.is_flat(vector())
+    assert ptb.is_flat(tensor3(), ndim=3)
+    assert not ptb.is_flat(tensor3())
 
     # Reshape with constant shape
     X = tensor4()
-    assert at.is_flat(X.reshape((-1,)))
-    assert at.is_flat(X.reshape((10, 10, -1)), ndim=3)
-    assert not at.is_flat(X.reshape((10, 10, -1)))
+    assert ptb.is_flat(X.reshape((-1,)))
+    assert ptb.is_flat(X.reshape((10, 10, -1)), ndim=3)
+    assert not ptb.is_flat(X.reshape((10, 10, -1)))
 
     # Reshape with symbolic shape
     X = tensor4()
-    assert at.is_flat(X.reshape((iscalar(),)))
-    assert at.is_flat(X.reshape((iscalar(),) * 3), ndim=3)
-    assert not at.is_flat(X.reshape((iscalar(),) * 3))
+    assert ptb.is_flat(X.reshape((iscalar(),)))
+    assert ptb.is_flat(X.reshape((iscalar(),) * 3), ndim=3)
+    assert not ptb.is_flat(X.reshape((iscalar(),) * 3))
 
 
 def test_tile():
@@ -3002,7 +3002,7 @@ def test_dim1(self):
 
         # Test passing a list
         p = [2, 4, 3, 0, 1]
-        inv = at.inverse_permutation(p)
+        inv = ptb.inverse_permutation(p)
         f = pytensor.function([], inv)
         assert np.array_equal(f(), np.array([3, 4, 0, 2, 1]))
 
@@ -3411,9 +3411,9 @@ def test_dimshuffle_duplicate():
 class TestGetUnderlyingScalarConstantValue:
     def test_basic(self):
         with pytest.raises(NotScalarConstantError):
-            get_underlying_scalar_constant_value(aes.int64())
+            get_underlying_scalar_constant_value(ps.int64())
 
-        res = get_underlying_scalar_constant_value(at.as_tensor(10))
+        res = get_underlying_scalar_constant_value(ptb.as_tensor(10))
         assert res == 10
         assert isinstance(res, np.ndarray)
 
@@ -3421,13 +3421,13 @@ def test_basic(self):
         assert res == 10
         assert isinstance(res, np.ndarray)
 
-        a = at.stack([1, 2, 3])
+        a = ptb.stack([1, 2, 3])
         assert get_underlying_scalar_constant_value(a[0]) == 1
         assert get_underlying_scalar_constant_value(a[1]) == 2
         assert get_underlying_scalar_constant_value(a[2]) == 3
 
         b = iscalar()
-        a = at.stack([b, 2, 3])
+        a = ptb.stack([b, 2, 3])
         with pytest.raises(NotScalarConstantError):
             get_underlying_scalar_constant_value(a[0])
         assert get_underlying_scalar_constant_value(a[1]) == 2
@@ -3436,7 +3436,7 @@ def test_basic(self):
         # For now get_underlying_scalar_constant_value goes through only MakeVector and Join of
         # scalars.
         v = ivector()
-        a = at.stack([v, [2], [3]])
+        a = ptb.stack([v, [2], [3]])
         with pytest.raises(NotScalarConstantError):
             get_underlying_scalar_constant_value(a[0])
         with pytest.raises(NotScalarConstantError):
@@ -3449,12 +3449,12 @@ def test_basic(self):
         v = row()
         assert get_underlying_scalar_constant_value(v.shape[0]) == 1
 
-        res = at.get_underlying_scalar_constant_value(at.as_tensor([10, 20]).shape[0])
+        res = ptb.get_underlying_scalar_constant_value(ptb.as_tensor([10, 20]).shape[0])
         assert isinstance(res, np.ndarray)
         assert 2 == res
 
-        res = at.get_underlying_scalar_constant_value(
-            9 + at.as_tensor([1.0]).shape[0],
+        res = ptb.get_underlying_scalar_constant_value(
+            9 + ptb.as_tensor([1.0]).shape[0],
             elemwise=True,
             only_process_constants=False,
             max_recur=9,
@@ -3494,7 +3494,7 @@ def test_make_vector(self):
         assert get_underlying_scalar_constant_value(mv[np.int32(0)]) == 1
         assert get_underlying_scalar_constant_value(mv[np.int64(1)]) == 2
         assert get_underlying_scalar_constant_value(mv[np.uint(2)]) == 3
-        t = aes.ScalarType("int64")
+        t = ps.ScalarType("int64")
         with pytest.raises(NotScalarConstantError):
             get_underlying_scalar_constant_value(mv[t()])
 
@@ -3520,7 +3520,7 @@ def test_elemwise(self):
         assert np.allclose(get_underlying_scalar_constant_value(s), c.data * 1.2)
         s = c < 0.5
         assert np.allclose(get_underlying_scalar_constant_value(s), int(c.data < 0.5))
-        s = at.second(c, 0.4)
+        s = ptb.second(c, 0.4)
         assert np.allclose(get_underlying_scalar_constant_value(s), 0.4)
 
     def test_assert(self):
@@ -3548,7 +3548,7 @@ def test_second(self):
         # Second should apply when the value is constant but not the shape
         c = constant(np.random.random())
         shp = vector()
-        s = at.second(shp, c)
+        s = ptb.second(shp, c)
         assert get_underlying_scalar_constant_value(s) == c.data
 
     def test_copy(self):
@@ -3576,7 +3576,7 @@ def test_None_and_NoneConst(self, only_process_constants):
 
 @pytest.mark.parametrize(
     ["valid_inp", "invalid_inp"],
-    ((np.array(4), np.zeros(5)), (at.constant(4), at.constant(3, ndim=1))),
+    ((np.array(4), np.zeros(5)), (ptb.constant(4), ptb.constant(3, ndim=1))),
 )
 def test_get_scalar_constant_value(valid_inp, invalid_inp):
     with pytest.raises(NotScalarConstantError):
@@ -3587,7 +3587,7 @@ def test_get_scalar_constant_value(valid_inp, invalid_inp):
 def test_complex_mod_failure():
     # Make sure % fails on complex numbers.
     x = vector(dtype="complex64")
-    with pytest.raises(aes.ComplexError):
+    with pytest.raises(ps.ComplexError):
         x % 5
 
 
@@ -3752,7 +3752,7 @@ def test_alloc_diag_values(self):
                 # Test perform
                 if np.maximum(axis1, axis2) > len(test_val.shape):
                     continue
-                diag_x = at.alloc_diag(x, offset=offset, axis1=axis1, axis2=axis2)
+                diag_x = ptb.alloc_diag(x, offset=offset, axis1=axis1, axis2=axis2)
                 f = pytensor.function([x], diag_x)
                 # alloc_diag and extract the diagonal again to check for correctness
                 diag_arr = f(test_val)
@@ -3769,7 +3769,7 @@ def test_alloc_diag_values(self):
                 assert np.all(rediag_shape == test_val.shape)
 
                 # Test grad
-                sum_diag_x = at_sum(diag_x)
+                sum_diag_x = pt_sum(diag_x)
                 grad_x = pytensor.grad(sum_diag_x, x)
                 grad_diag_x = pytensor.grad(sum_diag_x, diag_x)
                 f_grad_x = pytensor.function([x], grad_x)
@@ -3785,7 +3785,7 @@ def test_alloc_diag_values(self):
 def test_diagonal_negative_axis():
     x = np.arange(2 * 3 * 3).reshape((2, 3, 3))
     np.testing.assert_allclose(
-        at.diagonal(x, axis1=-1, axis2=-2).eval(),
+        ptb.diagonal(x, axis1=-1, axis2=-2).eval(),
         np.diagonal(x, axis1=-1, axis2=-2),
     )
 
@@ -3802,16 +3802,16 @@ def test_transpose():
     f = pytensor.function(
         [x1, x2, x3],
         [
-            at.transpose(x1),
-            at.transpose(x2),
-            at.transpose(x3),
+            ptb.transpose(x1),
+            ptb.transpose(x2),
+            ptb.transpose(x3),
             x1.transpose(),
             x2.transpose(),
             x3.transpose(),
             x2.transpose(0, 1),
             x3.transpose((0, 2, 1)),
-            at.transpose(x2, [0, 1]),
-            at.transpose(x3, [0, 2, 1]),
+            ptb.transpose(x2, [0, 1]),
+            ptb.transpose(x3, [0, 2, 1]),
         ],
     )
 
@@ -3835,10 +3835,10 @@ def test_transpose():
     assert np.all(t3d == np.transpose(x3v, [0, 2, 1]))
 
     # Check that we create a name.
-    assert at.transpose(x1).name == "x1.T"
-    assert at.transpose(x2).name == "x2.T"
-    assert at.transpose(x3).name == "x3.T"
-    assert at.transpose(dmatrix()).name is None
+    assert ptb.transpose(x1).name == "x1.T"
+    assert ptb.transpose(x2).name == "x2.T"
+    assert ptb.transpose(x3).name == "x3.T"
+    assert ptb.transpose(dmatrix()).name is None
 
 
 def test_stacklists():
@@ -4063,7 +4063,7 @@ def test_ScalarFromTensor(self):
         )
 
     def test_TensorFromScalar(self):
-        aiscal = aes.float64()
+        aiscal = ps.float64()
 
         self._compile_and_check(
             [aiscal], [TensorFromScalar()(aiscal)], [4.0], TensorFromScalar
@@ -4168,7 +4168,7 @@ def test_numpy_compare(self):
 
 
 def test_moveaxis():
-    x = at.zeros((3, 4, 5))
+    x = ptb.zeros((3, 4, 5))
     tuple(moveaxis(x, 0, -1).shape.eval()) == (4, 5, 3)
     tuple(moveaxis(x, -1, 0).shape.eval()) == (5, 3, 4)
     tuple(moveaxis(x, [0, 1], [-1, -2]).shape.eval()) == (5, 4, 3)
@@ -4176,7 +4176,7 @@ def test_moveaxis():
 
 
 def test_moveaxis_error():
-    x = at.zeros((3, 4, 5))
+    x = ptb.zeros((3, 4, 5))
     with pytest.raises(
         ValueError,
         match="`source` and `destination` arguments must have the same number of elements",
@@ -4348,21 +4348,21 @@ def test_empty():
     assert out.shape == (2, 3)
     assert out.dtype == "float32"
 
-    empty_at = at.empty(3)
-    res = pytensor.function([], empty_at)()
+    empty_pt = ptb.empty(3)
+    res = pytensor.function([], empty_pt)()
     assert res.shape == (3,)
 
-    empty_at = at.empty((2, 3), dtype=None)
-    res = pytensor.function([], empty_at)()
+    empty_pt = ptb.empty((2, 3), dtype=None)
+    res = pytensor.function([], empty_pt)()
     assert res.shape == (2, 3)
 
-    empty_at = at.empty((2, 3), dtype="int64")
-    res = pytensor.function([], empty_at)()
+    empty_pt = ptb.empty((2, 3), dtype="int64")
+    res = pytensor.function([], empty_pt)()
     assert res.shape == (2, 3)
     assert res.dtype == "int64"
 
-    empty_at = at.empty_like(empty_at)
-    res = pytensor.function([], empty_at)()
+    empty_pt = ptb.empty_like(empty_pt)
+    res = pytensor.function([], empty_pt)()
     assert res.shape == (2, 3)
     assert res.dtype == "int64"
 
@@ -4377,7 +4377,7 @@ def test_identity_like_dtype():
 
     # Test passing list
     m = [[0, 1], [1, 3]]
-    out = at.identity_like(m)
+    out = ptb.identity_like(m)
     f = pytensor.function([], out)
     assert np.array_equal(f(), np.eye(2))
 
@@ -4409,25 +4409,25 @@ def test_atleast_Nd():
 
 
 def test_expand_dims():
-    x_at = dscalar()
-    res_at = expand_dims(x_at, 0)
+    x_pt = dscalar()
+    res_pt = expand_dims(x_pt, 0)
     x_val = np.array(1.0, dtype=np.float64)
     exp_res = np.expand_dims(x_val, 0)
-    res_val = pytensor.function([x_at], res_at)(x_val)
+    res_val = pytensor.function([x_pt], res_pt)(x_val)
     assert np.array_equal(exp_res, res_val)
 
-    x_at = dscalar()
-    res_at = expand_dims(x_at, (0, 1))
+    x_pt = dscalar()
+    res_pt = expand_dims(x_pt, (0, 1))
     x_val = np.array(1.0, dtype=np.float64)
     exp_res = np.expand_dims(x_val, (0, 1))
-    res_val = pytensor.function([x_at], res_at)(x_val)
+    res_val = pytensor.function([x_pt], res_pt)(x_val)
     assert np.array_equal(exp_res, res_val)
 
-    x_at = dmatrix()
-    res_at = expand_dims(x_at, (2, 1))
+    x_pt = dmatrix()
+    res_pt = expand_dims(x_pt, (2, 1))
     x_val = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64)
     exp_res = np.expand_dims(x_val, (2, 1))
-    res_val = pytensor.function([x_at], res_at)(x_val)
+    res_val = pytensor.function([x_pt], res_pt)(x_val)
     assert np.array_equal(exp_res, res_val)
 
 
@@ -4450,14 +4450,14 @@ def test_take_along_axis(self, shape, axis, samples):
         indices_size[axis or 0] = samples
         indices = rng.integers(low=0, high=shape[axis or 0], size=indices_size)
 
-        arr_in = at.tensor(
+        arr_in = ptb.tensor(
             dtype=config.floatX, shape=tuple(1 if s == 1 else None for s in arr.shape)
         )
-        indices_in = at.tensor(
+        indices_in = ptb.tensor(
             dtype=np.int64, shape=tuple(1 if s == 1 else None for s in indices.shape)
         )
 
-        out = at.take_along_axis(arr_in, indices_in, axis)
+        out = ptb.take_along_axis(arr_in, indices_in, axis)
 
         func = pytensor.function([arr_in, indices_in], out)
 
@@ -4466,14 +4466,14 @@ def test_take_along_axis(self, shape, axis, samples):
         )
 
     def test_ndim_dtype_failures(self):
-        arr = at.tensor(dtype=config.floatX, shape=(None,) * 2)
-        indices = at.tensor(dtype=np.int64, shape=(None,) * 3)
+        arr = ptb.tensor(dtype=config.floatX, shape=(None,) * 2)
+        indices = ptb.tensor(dtype=np.int64, shape=(None,) * 3)
         with pytest.raises(ValueError):
-            at.take_along_axis(arr, indices)
+            ptb.take_along_axis(arr, indices)
 
-        indices = at.tensor(dtype=np.float64, shape=(None,) * 2)
+        indices = ptb.tensor(dtype=np.float64, shape=(None,) * 2)
         with pytest.raises(IndexError):
-            at.take_along_axis(arr, indices)
+            ptb.take_along_axis(arr, indices)
 
 
 @pytest.mark.parametrize(
@@ -4498,7 +4498,7 @@ def test_oriented_stack_functions(func):
     with pytest.raises(ValueError):
         func()
 
-    a = at.tensor(dtype=np.float64, shape=(None, None, None))
+    a = ptb.tensor(dtype=np.float64, shape=(None, None, None))
 
     with pytest.raises(ValueError):
         func(a, a)
@@ -4506,7 +4506,7 @@ def test_oriented_stack_functions(func):
 
 def test_trace():
     x_val = np.ones((5, 4, 2))
-    x = at.as_tensor(x_val)
+    x = ptb.as_tensor(x_val)
 
     np.testing.assert_allclose(
         trace(x).eval(),
@@ -4528,7 +4528,7 @@ def test_vectorize_extract_diag():
     signature = "(a1,b,a2)->(b,a)"
 
     def core_pt(x):
-        return at.diagonal(x, offset=1, axis1=0, axis2=2)
+        return ptb.diagonal(x, offset=1, axis1=0, axis2=2)
 
     def core_np(x):
         return np.diagonal(x, offset=1, axis1=0, axis2=2)
diff --git a/tests/tensor/test_blas.py b/tests/tensor/test_blas.py
index 035f9e036b..34c757dc25 100644
--- a/tests/tensor/test_blas.py
+++ b/tests/tensor/test_blas.py
@@ -7,8 +7,8 @@
 from numpy.testing import assert_array_almost_equal
 
 import pytensor
-import pytensor.scalar as aes
-import pytensor.tensor as at
+import pytensor.scalar as ps
+import pytensor.tensor as pt
 import pytensor.tensor.blas_scipy
 from pytensor.compile.function import function
 from pytensor.compile.io import In
@@ -216,9 +216,9 @@ def test_factorised_scalar(self):
         b = matrix()
         s = shared(np.zeros((5, 5)).astype(config.floatX))
 
-        lr1 = at.constant(0.01).astype(config.floatX)
-        lr2 = at.constant(2).astype(config.floatX)
-        l2_reg = at.constant(0.0001).astype(config.floatX)
+        lr1 = pt.constant(0.01).astype(config.floatX)
+        lr2 = pt.constant(2).astype(config.floatX)
+        l2_reg = pt.constant(0.0001).astype(config.floatX)
 
         # test constant merge with gemm
         f = function(
@@ -292,7 +292,7 @@ def test_destroy_map4(self):
         rng = np.random.default_rng(seed=utt.fetch_seed())
         Z = shared(rng.random((2, 2)), name="Z")
         A = shared(rng.random((2, 2)), name="A")
-        one = at.constant(1.0).astype(Z.dtype)
+        one = pt.constant(1.0).astype(Z.dtype)
         f = inplace_func([], gemm_inplace(Z, one, A, A, one))
         # TODO FIXME: This is a bad test
         f()
@@ -396,7 +396,7 @@ def t(z, x, y, a=1.0, b=0.0, l="c|py", dt="float64"):
                 g_i = function(
                     [],
                     tz_i,
-                    updates=[(tz, at.set_subtensor(tz[:, :, i], tz_i))],
+                    updates=[(tz, pt.set_subtensor(tz[:, :, i], tz_i))],
                     mode=Mode(optimizer=None, linker=l),
                 )
                 for j in range(3):
@@ -581,8 +581,8 @@ def test_res_is_a():
 class TestAsScalar:
     def test_basic(self):
         # Test that it works on scalar constants
-        a = at.constant(2.5)
-        b = at.constant(np.asarray([[[0.5]]]))
+        a = pt.constant(2.5)
+        b = pt.constant(np.asarray([[[0.5]]]))
         b2 = b.dimshuffle()
         assert b2.ndim == 0
         d_a = DimShuffle([], [])(a)
@@ -597,7 +597,7 @@ def test_basic(self):
 
     def test_basic_1(self):
         # Test that it fails on nonscalar constants
-        a = at.constant(np.ones(5))
+        a = pt.constant(np.ones(5))
         assert _as_scalar(a) is None
         assert _as_scalar(DimShuffle([False], [0, "x"])(a)) is None
 
@@ -733,7 +733,7 @@ def test_gemm_opt_double_gemm():
     o = [
         (
             a * dot(X, Y)
-            + gemm_inplace(Z, b, S.T, R.T, at.constant(1.0).astype(config.floatX))
+            + gemm_inplace(Z, b, S.T, R.T, pt.constant(1.0).astype(config.floatX))
         )
     ]
     f = inplace_func(
@@ -908,7 +908,7 @@ def test_gemm_nested():
 def test_gemm_opt_wishlist():
     X, Y, Z, a, b = matrix(), matrix(), matrix(), scalar(), scalar()
 
-    # with >2 additions of the same ``at.dot(X, Y)`` term
+    # with >2 additions of the same ``pt.dot(X, Y)`` term
     just_gemm([X, Y, Z, a, b], [(b * b) * Z * a + (a * a) * dot(X, Y) + b * dot(X, Y)])
 
     just_gemm([X, Y, Z, a, b], [Z + dot(X, Y) + dot(X, Y)])
@@ -1121,8 +1121,8 @@ def test_dot22scalar():
             for dtype3 in ["complex64", "complex128"]:
                 c = matrix("c", dtype=dtype3)
                 for dtype4 in ["complex64", "complex128"]:
-                    cst = at.constant(0.2, dtype=dtype4)
-                    cst2 = at.constant(0.1, dtype=dtype4)
+                    cst = pt.constant(0.2, dtype=dtype4)
+                    cst2 = pt.constant(0.1, dtype=dtype4)
 
                     def check_dot22scalar(func, len_topo_scalar=-1):
                         topo = func.maker.fgraph.toposort()
@@ -1878,7 +1878,7 @@ def function(self, inputs, outputs, updates=None):
         return function(inputs, outputs, self.mode, updates=updates)
 
     def b(self, bval):
-        return at.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
+        return pt.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
 
     def test_b_0_triggers_ger(self):
         # test local_gemm_to_ger opt
@@ -2062,7 +2062,7 @@ def test_inplace(self):
             [self.x, self.y],
             [],
             updates=[
-                (A, A + at.constant(0.1, dtype=self.dtype) * outer(self.x, self.y))
+                (A, A + pt.constant(0.1, dtype=self.dtype) * outer(self.x, self.y))
             ],
         )
         self.assertFunctionContains(f, self.ger_destructive)
@@ -2276,7 +2276,7 @@ def cmp_gemm(self, a_shp, b_shp, c_shp, rng):
                 a_n = l * av[::a_step1, ::a_step2] + np.dot(
                     bv[::b_step1, ::b_step2], cv[::c_step1, ::c_step2]
                 )
-                at_n = (
+                pt_n = (
                     l * av[::a_step1, ::a_step2].T
                     + np.dot(bv[::b_step1, ::b_step2], cv[::c_step1, ::c_step2]).T
                 )
@@ -2302,25 +2302,25 @@ def cmp_gemm(self, a_shp, b_shp, c_shp, rng):
                     np.transpose(a_dev.copy())[::a_step2, ::a_step1], borrow=True
                 )
                 f_tnn()
-                assert np.allclose(a_t.get_value(), at_n)
+                assert np.allclose(a_t.get_value(), pt_n)
 
                 a_t.set_value(
                     np.transpose(a_dev.copy())[::a_step2, ::a_step1], borrow=True
                 )
                 f_tnt()
-                assert np.allclose(a_t.get_value(), at_n)
+                assert np.allclose(a_t.get_value(), pt_n)
 
                 a_t.set_value(
                     np.transpose(a_dev.copy())[::a_step2, ::a_step1], borrow=True
                 )
                 f_ttn()
-                assert np.allclose(a_t.get_value(), at_n)
+                assert np.allclose(a_t.get_value(), pt_n)
 
                 a_t.set_value(
                     np.transpose(a_dev.copy())[::a_step2, ::a_step1], borrow=True
                 )
                 f_ttt()
-                assert np.allclose(a_t.get_value(), at_n)
+                assert np.allclose(a_t.get_value(), pt_n)
 
     def test_gemm(self):
         rng = np.random.default_rng(unittest_tools.fetch_seed())
@@ -2592,7 +2592,7 @@ def test_ger(self):
                 x * y if x.ndim == 0 or y.ndim == 0 else np.dot(x, y)
                 for x, y in zip(xs, ys)
             ],
-            dtype=aes.upcast(xs.dtype, ys.dtype),
+            dtype=ps.upcast(xs.dtype, ys.dtype),
         )
     ),
     checks={},
diff --git a/tests/tensor/test_blas_c.py b/tests/tensor/test_blas_c.py
index b377d80b3d..58b425d53a 100644
--- a/tests/tensor/test_blas_c.py
+++ b/tests/tensor/test_blas_c.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.misc.safe_asarray import _asarray
 from pytensor.tensor.basic import AllocEmpty
 from pytensor.tensor.blas import Ger
@@ -62,7 +62,7 @@ def run_f(self, f):
         f(self.Aval[::-1, ::-1], self.xval, self.yval)
 
     def b(self, bval):
-        return at.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
+        return pt.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
 
     def test_eq(self):
         assert CGer(True) == CGer(True)
@@ -85,33 +85,33 @@ def test_hash(self):
 
     def test_optimization_pipeline(self):
         skip_if_blas_ldflags_empty()
-        f = self.function([self.x, self.y], at.outer(self.x, self.y))
+        f = self.function([self.x, self.y], pt.outer(self.x, self.y))
         self.assertFunctionContains(f, CGer(destructive=True))
         f(self.xval, self.yval)  # DebugMode tests correctness
 
     def test_optimization_pipeline_float(self):
         skip_if_blas_ldflags_empty()
         self.manual_setup_method("float32")
-        f = self.function([self.x, self.y], at.outer(self.x, self.y))
+        f = self.function([self.x, self.y], pt.outer(self.x, self.y))
         self.assertFunctionContains(f, CGer(destructive=True))
         f(self.xval, self.yval)  # DebugMode tests correctness
 
     def test_int_fails(self):
         self.manual_setup_method("int32")
-        f = self.function([self.x, self.y], at.outer(self.x, self.y))
+        f = self.function([self.x, self.y], pt.outer(self.x, self.y))
         self.assertFunctionContains0(f, CGer(destructive=True))
         self.assertFunctionContains0(f, CGer(destructive=False))
 
     def test_A_plus_outer(self):
         skip_if_blas_ldflags_empty()
-        f = self.function([self.A, self.x, self.y], self.A + at.outer(self.x, self.y))
+        f = self.function([self.A, self.x, self.y], self.A + pt.outer(self.x, self.y))
         self.assertFunctionContains(f, CGer(destructive=False))
         self.run_f(f)  # DebugMode tests correctness
 
     def test_A_plus_scaled_outer(self):
         skip_if_blas_ldflags_empty()
         f = self.function(
-            [self.A, self.x, self.y], self.A + 0.1 * at.outer(self.x, self.y)
+            [self.A, self.x, self.y], self.A + 0.1 * pt.outer(self.x, self.y)
         )
         self.assertFunctionContains(f, CGer(destructive=False))
         self.run_f(f)  # DebugMode tests correctness
@@ -148,7 +148,7 @@ def test_nan_beta_0(self):
         mode.check_isfinite = False
         f = pytensor.function(
             [self.A, self.x, self.y, self.a],
-            self.a * self.y + at.dot(self.A, self.x),
+            self.a * self.y + pt.dot(self.A, self.x),
             mode=mode,
         )
         Aval = np.ones((3, 1), dtype=self.dtype)
@@ -160,10 +160,10 @@ def test_nan_beta_0(self):
     def test_optimizations_vm(self):
         skip_if_blas_ldflags_empty()
         """ Test vector dot matrix """
-        f = pytensor.function([self.x, self.A], at.dot(self.x, self.A), mode=self.mode)
+        f = pytensor.function([self.x, self.A], pt.dot(self.x, self.A), mode=self.mode)
 
         # Assert that the dot was optimized somehow
-        self.assertFunctionContains0(f, at.dot)
+        self.assertFunctionContains0(f, pt.dot)
         self.assertFunctionContains1(f, CGemv(inplace=True))
 
         # Assert they produce the same output
@@ -178,10 +178,10 @@ def test_optimizations_vm(self):
     def test_optimizations_mv(self):
         skip_if_blas_ldflags_empty()
         """ Test matrix dot vector """
-        f = pytensor.function([self.A, self.y], at.dot(self.A, self.y), mode=self.mode)
+        f = pytensor.function([self.A, self.y], pt.dot(self.A, self.y), mode=self.mode)
 
         # Assert that the dot was optimized somehow
-        self.assertFunctionContains0(f, at.dot)
+        self.assertFunctionContains0(f, pt.dot)
         self.assertFunctionContains1(f, CGemv(inplace=True))
 
         # Assert they produce the same output
@@ -208,7 +208,7 @@ def t_gemv1(self, m_shp):
         v2 = pytensor.shared(v2_orig)
         m = pytensor.shared(np.array(rng.uniform(size=m_shp), dtype="float32"))
 
-        f = pytensor.function([], v2 + at.dot(m, v1), mode=self.mode)
+        f = pytensor.function([], v2 + pt.dot(m, v1), mode=self.mode)
 
         # Assert they produce the same output
         assert np.allclose(f(), np.dot(m.get_value(), v1.get_value()) + v2_orig)
@@ -217,7 +217,7 @@ def t_gemv1(self, m_shp):
 
         # test the inplace version
         g = pytensor.function(
-            [], [], updates=[(v2, v2 + at.dot(m, v1))], mode=self.mode
+            [], [], updates=[(v2, v2 + pt.dot(m, v1))], mode=self.mode
         )
 
         # Assert they produce the same output
@@ -252,7 +252,7 @@ def test_gemv_dimensions(self, dtype="float32"):
         alpha = pytensor.shared(_asarray(1.0, dtype=dtype), name="alpha")
         beta = pytensor.shared(_asarray(1.0, dtype=dtype), name="beta")
 
-        z = beta * self.y + alpha * at.dot(self.A, self.x)
+        z = beta * self.y + alpha * pt.dot(self.A, self.x)
         f = pytensor.function([self.A, self.x, self.y], z, mode=self.mode)
 
         # Matrix value
@@ -278,7 +278,7 @@ def test_multiple_inplace(self):
         y = dvector("y")
         z = dvector("z")
         f = pytensor.function(
-            [x, y, z], [at.dot(y, x), at.dot(z, x)], mode=mode_blas_opt
+            [x, y, z], [pt.dot(y, x), pt.dot(z, x)], mode=mode_blas_opt
         )
         vx = np.random.random((3, 3))
         vy = np.random.random(3)
diff --git a/tests/tensor/test_blas_scipy.py b/tests/tensor/test_blas_scipy.py
index 120eb9f0e6..e65e7d90c2 100644
--- a/tests/tensor/test_blas_scipy.py
+++ b/tests/tensor/test_blas_scipy.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pytensor
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.tensor.blas_scipy import ScipyGer
 from pytensor.tensor.math import outer
 from pytensor.tensor.type import tensor
@@ -33,7 +33,7 @@ def run_f(self, f):
         f(self.Aval[::-1, ::-1], self.xval[::-1], self.yval[::-1])
 
     def b(self, bval):
-        return at.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
+        return pt.as_tensor_variable(np.asarray(bval, dtype=self.dtype))
 
     def test_outer(self):
         f = self.function([self.x, self.y], outer(self.x, self.y))
diff --git a/tests/tensor/test_complex.py b/tests/tensor/test_complex.py
index 1f29f198ca..f0f7333f9c 100644
--- a/tests/tensor/test_complex.py
+++ b/tests/tensor/test_complex.py
@@ -4,7 +4,7 @@
 import pytensor
 from pytensor.gradient import GradientError
 from pytensor.tensor.basic import cast
-from pytensor.tensor.math import complex as at_complex
+from pytensor.tensor.math import complex as pt_complex
 from pytensor.tensor.math import complex_from_polar, imag, real
 from pytensor.tensor.type import cvector, dvector, fmatrix, fvector, imatrix, zvector
 from tests import unittest_tools as utt
@@ -40,7 +40,7 @@ def test_cast(self):
     def test_complex(self):
         rng = np.random.default_rng(2333)
         m = fmatrix()
-        c = at_complex(m[0], m[1])
+        c = pt_complex(m[0], m[1])
         assert c.type == cvector
         r, i = [real(c), imag(c)]
         assert r.type == fvector
@@ -55,7 +55,7 @@ def test_complex(self):
     @pytest.mark.skip(reason="Complex grads not enabled, see #178")
     def test_complex_grads(self):
         def f(m):
-            c = at_complex(m[0], m[1])
+            c = pt_complex(m[0], m[1])
             return 0.5 * real(c) + 0.9 * imag(c)
 
         rng = np.random.default_rng(9333)
@@ -65,7 +65,7 @@ def f(m):
     @pytest.mark.skip(reason="Complex grads not enabled, see #178")
     def test_mul_mixed0(self):
         def f(a):
-            ac = at_complex(a[0], a[1])
+            ac = pt_complex(a[0], a[1])
             return abs((ac) ** 2).sum()
 
         rng = np.random.default_rng(9333)
@@ -80,7 +80,7 @@ def f(a):
     @pytest.mark.skip(reason="Complex grads not enabled, see #178")
     def test_mul_mixed1(self):
         def f(a):
-            ac = at_complex(a[0], a[1])
+            ac = pt_complex(a[0], a[1])
             return abs(ac).sum()
 
         rng = np.random.default_rng(9333)
@@ -95,7 +95,7 @@ def f(a):
     @pytest.mark.skip(reason="Complex grads not enabled, see #178")
     def test_mul_mixed(self):
         def f(a, b):
-            ac = at_complex(a[0], a[1])
+            ac = pt_complex(a[0], a[1])
             return abs((ac * b) ** 2).sum()
 
         rng = np.random.default_rng(9333)
@@ -121,7 +121,7 @@ def f(m):
     @pytest.mark.skip(reason="Complex grads not enabled, see #178")
     def test_abs_grad(self):
         def f(m):
-            c = at_complex(m[0], m[1])
+            c = pt_complex(m[0], m[1])
             return 0.5 * abs(c)
 
         rng = np.random.default_rng(9333)
diff --git a/tests/tensor/test_elemwise.py b/tests/tensor/test_elemwise.py
index b4a8cc9aea..9c6c140901 100644
--- a/tests/tensor/test_elemwise.py
+++ b/tests/tensor/test_elemwise.py
@@ -7,7 +7,7 @@
 import pytest
 
 import pytensor
-import pytensor.scalar as aes
+import pytensor.scalar as ps
 import tests.unittest_tools as utt
 from pytensor.compile.mode import Mode
 from pytensor.configdefaults import config
@@ -132,9 +132,9 @@ def test_too_big_rank(self):
             y.eval({x: 0})
 
     def test_c_views(self):
-        x_at = vector()
+        x_pt = vector()
         thunk, inputs, outputs = (
-            CLinker().accept(FunctionGraph([x_at], [x_at[None]])).make_thunk()
+            CLinker().accept(FunctionGraph([x_pt], [x_pt[None]])).make_thunk()
         )
 
         # This is a little hackish, but we're hoping that--by running this more than
@@ -245,7 +245,7 @@ def with_linker(self, linker, op, type, rand_val):
 
             x = x_type("x")
             y = y_type("y")
-            e = op(aes.add)(x, y)
+            e = op(ps.add)(x, y)
             f = make_function(copy(linker).accept(FunctionGraph([x, y], [e])))
             xv = rand_val(xsh)
             yv = rand_val(ysh)
@@ -258,7 +258,7 @@ def with_linker(self, linker, op, type, rand_val):
             if isinstance(linker, PerformLinker):
                 x = x_type("x")
                 y = y_type("y")
-                e = op(aes.add)(x, y)
+                e = op(ps.add)(x, y)
                 f = make_function(copy(linker).accept(FunctionGraph([x, y], [e.shape])))
                 assert tuple(f(xv, yv)) == tuple(zv.shape)
 
@@ -284,7 +284,7 @@ def with_linker_inplace(self, linker, op, type, rand_val):
 
             x = x_type("x")
             y = y_type("y")
-            e = op(aes.Add(aes.transfer_type(0)), {0: 0})(x, y)
+            e = op(ps.Add(ps.transfer_type(0)), {0: 0})(x, y)
             f = make_function(copy(linker).accept(FunctionGraph([x, y], [e])))
             xv = rand_val(xsh)
             yv = rand_val(ysh)
@@ -298,7 +298,7 @@ def with_linker_inplace(self, linker, op, type, rand_val):
             if isinstance(linker, PerformLinker):
                 x = x_type("x")
                 y = y_type("y")
-                e = op(aes.Add(aes.transfer_type(0)), {0: 0})(x, y)
+                e = op(ps.Add(ps.transfer_type(0)), {0: 0})(x, y)
                 f = make_function(copy(linker).accept(FunctionGraph([x, y], [e.shape])))
                 xv = rand_val(xsh)
                 yv = rand_val(ysh)
@@ -339,7 +339,7 @@ def test_fill(self):
         ):
             x = t(pytensor.config.floatX, shape=(None, None))("x")
             y = t(pytensor.config.floatX, shape=(1, 1))("y")
-            e = op(aes.Second(aes.transfer_type(0)), {0: 0})(x, y)
+            e = op(ps.Second(ps.transfer_type(0)), {0: 0})(x, y)
             f = make_function(linker().accept(FunctionGraph([x, y], [e])))
             xv = rval((5, 5))
             yv = rval((1, 1))
@@ -370,7 +370,7 @@ def test_weird_strides(self):
         ):
             x = t(pytensor.config.floatX, shape=(None,) * 5)("x")
             y = t(pytensor.config.floatX, shape=(None,) * 5)("y")
-            e = op(aes.add)(x, y)
+            e = op(ps.add)(x, y)
             f = make_function(linker().accept(FunctionGraph([x, y], [e])))
             xv = rval((2, 2, 2, 2, 2))
             yv = rval((2, 2, 2, 2, 2)).transpose(4, 0, 3, 1, 2)
@@ -389,7 +389,7 @@ def test_same_inputs(self):
             [self.rand_val, self.rand_cval],
         ):
             x = t(pytensor.config.floatX, shape=(None,) * 2)("x")
-            e = op(aes.add)(x, x)
+            e = op(ps.add)(x, x)
             f = make_function(linker().accept(FunctionGraph([x], [e])))
             xv = rval((2, 2))
             zv = xv + xv
@@ -420,7 +420,7 @@ class TestCAReduce(unittest_tools.InferShapeTester):
     def with_mode(
         self,
         mode,
-        scalar_op=aes.add,
+        scalar_op=ps.add,
         dtype="floatX",
         pre_scalar_op=None,
         test_nan=False,
@@ -483,36 +483,36 @@ def with_mode(
                     zv = np.any(zv, axis)
                 if len(tosum) == 0:
                     zv = zv != 0
-            elif scalar_op == aes.add:
+            elif scalar_op == ps.add:
                 for axis in sorted(tosum, reverse=True):
                     zv = np.add.reduce(zv, axis)
                 if dtype == "bool":
                     # np.add of a bool upcast, while CAReduce don't
                     zv = zv.astype(dtype)
-            elif scalar_op == aes.mul:
+            elif scalar_op == ps.mul:
                 for axis in sorted(tosum, reverse=True):
                     zv = np.multiply.reduce(zv, axis)
-            elif scalar_op == aes.scalar_maximum:
+            elif scalar_op == ps.scalar_maximum:
                 # There is no identity value for the maximum function
                 # So we can't support shape of dimensions 0.
                 if np.prod(zv.shape) == 0:
                     continue
                 for axis in sorted(tosum, reverse=True):
                     zv = np.maximum.reduce(zv, axis)
-            elif scalar_op == aes.scalar_minimum:
+            elif scalar_op == ps.scalar_minimum:
                 # There is no identity value for the minimum function
                 # So we can't support shape of dimensions 0.
                 if np.prod(zv.shape) == 0:
                     continue
                 for axis in sorted(tosum, reverse=True):
                     zv = np.minimum.reduce(zv, axis)
-            elif scalar_op == aes.or_:
+            elif scalar_op == ps.or_:
                 for axis in sorted(tosum, reverse=True):
                     zv = np.bitwise_or.reduce(zv, axis)
-            elif scalar_op == aes.and_:
+            elif scalar_op == ps.and_:
                 for axis in sorted(tosum, reverse=True):
                     zv = reduce_bitwise_and(zv, axis, dtype=dtype)
-            elif scalar_op == aes.xor:
+            elif scalar_op == ps.xor:
                 # There is no identity value for the xor function
                 # So we can't support shape of dimensions 0.
                 if np.prod(zv.shape) == 0:
@@ -542,47 +542,47 @@ def with_mode(
                 tosum = list(range(len(xsh)))
             f = pytensor.function([x], e.shape, mode=mode, on_unused_input="ignore")
             if not (
-                scalar_op in [aes.scalar_maximum, aes.scalar_minimum]
+                scalar_op in [ps.scalar_maximum, ps.scalar_minimum]
                 and (xsh == () or np.prod(xsh) == 0)
             ):
                 assert all(f(xv) == zv.shape)
 
     def test_perform_noopt(self):
-        self.with_mode(Mode(linker="py", optimizer=None), aes.add, dtype="floatX")
+        self.with_mode(Mode(linker="py", optimizer=None), ps.add, dtype="floatX")
 
     def test_perform(self):
         for dtype in ["bool", "floatX", "complex64", "complex128", "int8", "uint8"]:
-            self.with_mode(Mode(linker="py"), aes.add, dtype=dtype)
-            self.with_mode(Mode(linker="py"), aes.mul, dtype=dtype)
-            self.with_mode(Mode(linker="py"), aes.scalar_maximum, dtype=dtype)
-            self.with_mode(Mode(linker="py"), aes.scalar_minimum, dtype=dtype)
-            self.with_mode(Mode(linker="py"), aes.and_, dtype=dtype, tensor_op=pt_all)
-            self.with_mode(Mode(linker="py"), aes.or_, dtype=dtype, tensor_op=pt_any)
+            self.with_mode(Mode(linker="py"), ps.add, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.mul, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.scalar_maximum, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.scalar_minimum, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.and_, dtype=dtype, tensor_op=pt_all)
+            self.with_mode(Mode(linker="py"), ps.or_, dtype=dtype, tensor_op=pt_any)
         for dtype in ["int8", "uint8"]:
-            self.with_mode(Mode(linker="py"), aes.or_, dtype=dtype)
-            self.with_mode(Mode(linker="py"), aes.and_, dtype=dtype)
-            self.with_mode(Mode(linker="py"), aes.xor, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.or_, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.and_, dtype=dtype)
+            self.with_mode(Mode(linker="py"), ps.xor, dtype=dtype)
 
     def test_perform_nan(self):
         for dtype in ["floatX", "complex64", "complex128"]:
-            self.with_mode(Mode(linker="py"), aes.add, dtype=dtype, test_nan=True)
-            self.with_mode(Mode(linker="py"), aes.mul, dtype=dtype, test_nan=True)
+            self.with_mode(Mode(linker="py"), ps.add, dtype=dtype, test_nan=True)
+            self.with_mode(Mode(linker="py"), ps.mul, dtype=dtype, test_nan=True)
             self.with_mode(
-                Mode(linker="py"), aes.scalar_maximum, dtype=dtype, test_nan=True
+                Mode(linker="py"), ps.scalar_maximum, dtype=dtype, test_nan=True
             )
             self.with_mode(
-                Mode(linker="py"), aes.scalar_minimum, dtype=dtype, test_nan=True
+                Mode(linker="py"), ps.scalar_minimum, dtype=dtype, test_nan=True
             )
             self.with_mode(
                 Mode(linker="py"),
-                aes.or_,
+                ps.or_,
                 dtype=dtype,
                 test_nan=True,
                 tensor_op=pt_any,
             )
             self.with_mode(
                 Mode(linker="py"),
-                aes.and_,
+                ps.and_,
                 dtype=dtype,
                 test_nan=True,
                 tensor_op=pt_all,
@@ -595,7 +595,7 @@ def test_perform_nan(self):
     def test_c_noopt(self):
         # We need to make sure that we cover the corner cases that
         # optimizations normally cover
-        self.with_mode(Mode(linker="c", optimizer=None), aes.add, dtype="floatX")
+        self.with_mode(Mode(linker="c", optimizer=None), ps.add, dtype="floatX")
 
     @pytest.mark.slow
     @pytest.mark.skipif(
@@ -604,17 +604,17 @@ def test_c_noopt(self):
     )
     def test_c(self):
         for dtype in ["bool", "floatX", "complex64", "complex128", "int8", "uint8"]:
-            self.with_mode(Mode(linker="c"), aes.add, dtype=dtype)
-            self.with_mode(Mode(linker="c"), aes.mul, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.add, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.mul, dtype=dtype)
         for dtype in ["bool", "floatX", "int8", "uint8"]:
-            self.with_mode(Mode(linker="c"), aes.scalar_minimum, dtype=dtype)
-            self.with_mode(Mode(linker="c"), aes.scalar_maximum, dtype=dtype)
-            self.with_mode(Mode(linker="c"), aes.and_, dtype=dtype, tensor_op=pt_all)
-            self.with_mode(Mode(linker="c"), aes.or_, dtype=dtype, tensor_op=pt_any)
+            self.with_mode(Mode(linker="c"), ps.scalar_minimum, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.scalar_maximum, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.and_, dtype=dtype, tensor_op=pt_all)
+            self.with_mode(Mode(linker="c"), ps.or_, dtype=dtype, tensor_op=pt_any)
         for dtype in ["bool", "int8", "uint8"]:
-            self.with_mode(Mode(linker="c"), aes.or_, dtype=dtype)
-            self.with_mode(Mode(linker="c"), aes.and_, dtype=dtype)
-            self.with_mode(Mode(linker="c"), aes.xor, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.or_, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.and_, dtype=dtype)
+            self.with_mode(Mode(linker="c"), ps.xor, dtype=dtype)
 
     @pytest.mark.slow
     @pytest.mark.skipif(
@@ -623,14 +623,14 @@ def test_c(self):
     )
     def test_c_nan(self):
         for dtype in ["floatX", "complex64", "complex128"]:
-            self.with_mode(Mode(linker="c"), aes.add, dtype=dtype, test_nan=True)
-            self.with_mode(Mode(linker="c"), aes.mul, dtype=dtype, test_nan=True)
+            self.with_mode(Mode(linker="c"), ps.add, dtype=dtype, test_nan=True)
+            self.with_mode(Mode(linker="c"), ps.mul, dtype=dtype, test_nan=True)
         for dtype in ["floatX"]:
             self.with_mode(
-                Mode(linker="c"), aes.scalar_minimum, dtype=dtype, test_nan=True
+                Mode(linker="c"), ps.scalar_minimum, dtype=dtype, test_nan=True
             )
             self.with_mode(
-                Mode(linker="c"), aes.scalar_maximum, dtype=dtype, test_nan=True
+                Mode(linker="c"), ps.scalar_maximum, dtype=dtype, test_nan=True
             )
 
     def test_infer_shape(self, dtype=None, pre_scalar_op=None):
@@ -651,7 +651,7 @@ def test_infer_shape(self, dtype=None, pre_scalar_op=None):
                 d = {pre_scalar_op: pre_scalar_op}
             self._compile_and_check(
                 [x],
-                [self.op(aes.add, axis=tosum, *d)(x)],
+                [self.op(ps.add, axis=tosum, *d)(x)],
                 [xv],
                 self.op,
                 ["local_cut_useless_reduce"],
@@ -659,26 +659,26 @@ def test_infer_shape(self, dtype=None, pre_scalar_op=None):
             )
 
     def test_str(self):
-        op = CAReduce(aes.add, axis=None)
+        op = CAReduce(ps.add, axis=None)
         assert str(op) == "CAReduce{add, axes=None}"
-        op = CAReduce(aes.add, axis=(1,))
+        op = CAReduce(ps.add, axis=(1,))
         assert str(op) == "CAReduce{add, axis=1}"
 
     def test_repeated_axis(self):
         x = vector("x")
         with pytest.raises(ValueError, match="repeated axis"):
-            self.op(aes.add, axis=(0, 0))(x)
+            self.op(ps.add, axis=(0, 0))(x)
 
     def test_scalar_input(self):
         x = scalar("x")
 
-        assert self.op(aes.add, axis=(-1,))(x).eval({x: 5}) == 5
+        assert self.op(ps.add, axis=(-1,))(x).eval({x: 5}) == 5
 
         with pytest.raises(
             np.AxisError,
             match=re.escape("axis (-2,) is out of bounds for array of dimension 0"),
         ):
-            self.op(aes.add, axis=(-2,))(x)
+            self.op(ps.add, axis=(-2,))(x)
 
 
 class TestBitOpReduceGrad:
@@ -739,7 +739,7 @@ def test_infer_shape(self):
             t_right_val = np.zeros(s_right, dtype=dtype)
             self._compile_and_check(
                 [t_left, t_right],
-                [Elemwise(aes.add)(t_left, t_right)],
+                [Elemwise(ps.add)(t_left, t_right)],
                 [t_left_val, t_right_val],
                 Elemwise,
             )
@@ -791,18 +791,18 @@ def test_runtime_broadcast_c(self):
         self.check_runtime_broadcast(Mode(linker="c"))
 
     def test_str(self):
-        op = Elemwise(aes.add, inplace_pattern={0: 0}, name=None)
+        op = Elemwise(ps.add, inplace_pattern={0: 0}, name=None)
         assert str(op) == "Add"
-        op = Elemwise(aes.add, inplace_pattern=None, name="my_op")
+        op = Elemwise(ps.add, inplace_pattern=None, name="my_op")
         assert str(op) == "my_op"
 
     def test_partial_static_shape_info(self):
         """Make sure that `Elemwise.infer_shape` can handle changes in the static shape information during rewriting."""
 
         x = TensorType("floatX", shape=(None, None))()
-        z = Elemwise(aes.add)(x, x)
+        z = Elemwise(ps.add)(x, x)
 
-        x_inferred_shape = (aes.constant(1), aes.constant(1))
+        x_inferred_shape = (ps.constant(1), ps.constant(1))
 
         res_shape = z.owner.op.infer_shape(
             None, z.owner, [x_inferred_shape, x_inferred_shape]
@@ -827,13 +827,13 @@ def make_node(self, *args):
                     ],
                 )
 
-        custom_elemwise = CustomElemwise(aes.add)
+        custom_elemwise = CustomElemwise(ps.add)
 
         z_1, z_2 = custom_elemwise(
             as_tensor_variable(np.eye(1)),
             as_tensor_variable(np.eye(1)),
         )
-        in_1_shape = (aes.constant(1), aes.constant(1))
+        in_1_shape = (ps.constant(1), ps.constant(1))
         outs = z_1.owner.op.infer_shape(None, z_1.owner, [in_1_shape, in_1_shape])
         for out in outs:
             assert out[0].eval() == 1
@@ -842,7 +842,7 @@ def make_node(self, *args):
         z_1, z_2 = custom_elemwise(
             as_tensor_variable(np.eye(1)), as_tensor_variable(np.eye(3))
         )
-        in_2_shape = (aes.constant(3), aes.constant(3))
+        in_2_shape = (ps.constant(3), ps.constant(3))
         outs = z_1.owner.op.infer_shape(None, z_1.owner, [in_1_shape, in_2_shape])
         for out in outs:
             assert out[0].eval() == 3
@@ -898,9 +898,9 @@ def test_invalid_static_shape(self):
 def test_not_implemented_elemwise_grad():
     # Regression test for unimplemented gradient in an Elemwise Op.
 
-    class TestOp(aes.ScalarOp):
+    class TestOp(ps.ScalarOp):
         def __init__(self):
-            self.output_types_preference = aes.upgrade_to_float
+            self.output_types_preference = ps.upgrade_to_float
 
         def impl(self, n, x):
             return x * n
diff --git a/tests/tensor/test_extra_ops.py b/tests/tensor/test_extra_ops.py
index e103567564..cda745d023 100644
--- a/tests/tensor/test_extra_ops.py
+++ b/tests/tensor/test_extra_ops.py
@@ -5,7 +5,7 @@
 
 import pytensor
 from pytensor import function
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.compile.mode import Mode
 from pytensor.configdefaults import config
 from pytensor.graph.basic import Constant, applys_between, equal_computations
@@ -336,10 +336,10 @@ def test_perform(self, axis, n):
     @pytest.mark.parametrize(
         "x_type",
         (
-            at.TensorType("float64", shape=(None, None)),
-            at.TensorType("float64", shape=(None, 30)),
-            at.TensorType("float64", shape=(10, None)),
-            at.TensorType("float64", shape=(10, 30)),
+            pt.TensorType("float64", shape=(None, None)),
+            pt.TensorType("float64", shape=(None, 30)),
+            pt.TensorType("float64", shape=(10, None)),
+            pt.TensorType("float64", shape=(10, 30)),
         ),
     )
     @pytest.mark.parametrize("axis", (-2, -1, 0, 1))
@@ -472,7 +472,7 @@ def test_invalid_axis(self):
             squeeze(variable, axis=1)
 
     def test_scalar_input(self):
-        x = at.scalar("x")
+        x = pt.scalar("x")
 
         assert squeeze(x, axis=(0,)).eval({x: 5}) == 5
 
@@ -886,7 +886,7 @@ def test_basic_vector(self, x, inp, axis):
             np.unique(inp, True, True, True, axis=axis),
         ]
         for params, outs_expected in zip(self.op_params, list_outs_expected):
-            out = at.unique(x, *params, axis=axis)
+            out = pt.unique(x, *params, axis=axis)
             f = pytensor.function(inputs=[x], outputs=out)
             outs = f(inp)
             for out, out_exp in zip(outs, outs_expected):
@@ -908,9 +908,9 @@ def test_infer_shape(self, x, inp, axis):
             if not params[1]:
                 continue
             if params[0]:
-                f = at.unique(x, *params, axis=axis)[2]
+                f = pt.unique(x, *params, axis=axis)[2]
             else:
-                f = at.unique(x, *params, axis=axis)[1]
+                f = pt.unique(x, *params, axis=axis)[1]
             self._compile_and_check(
                 [x],
                 [f],
@@ -1066,8 +1066,8 @@ def shape_tuple(x, use_bcast=True):
     x = np.array([[1], [2], [3]])
     y = np.array([4, 5, 6])
     b = np.broadcast(x, y)
-    x_at = at.as_tensor_variable(x)
-    y_at = at.as_tensor_variable(y)
+    x_at = pt.as_tensor_variable(x)
+    y_at = pt.as_tensor_variable(y)
     b_at = broadcast_shape(x_at, y_at)
     assert np.array_equal([z.eval() for z in b_at], b.shape)
     # Now, we try again using shapes as the inputs
@@ -1100,8 +1100,8 @@ def shape_tuple(x, use_bcast=True):
     x = np.array([1, 2, 3])
     y = np.array([4, 5, 6])
     b = np.broadcast(x, y)
-    x_at = at.as_tensor_variable(x)
-    y_at = at.as_tensor_variable(y)
+    x_at = pt.as_tensor_variable(x)
+    y_at = pt.as_tensor_variable(y)
     b_at = broadcast_shape(x_at, y_at)
     assert np.array_equal([z.eval() for z in b_at], b.shape)
     b_at = broadcast_shape(shape_tuple(x_at), shape_tuple(y_at), arrays_are_shapes=True)
@@ -1110,8 +1110,8 @@ def shape_tuple(x, use_bcast=True):
     x = np.empty((1, 2, 3))
     y = np.array(1)
     b = np.broadcast(x, y)
-    x_at = at.as_tensor_variable(x)
-    y_at = at.as_tensor_variable(y)
+    x_at = pt.as_tensor_variable(x)
+    y_at = pt.as_tensor_variable(y)
     b_at = broadcast_shape(x_at, y_at)
     assert b_at[0].value == 1
     assert np.array_equal([z.eval() for z in b_at], b.shape)
@@ -1121,8 +1121,8 @@ def shape_tuple(x, use_bcast=True):
     x = np.empty((2, 1, 3))
     y = np.empty((2, 1, 1))
     b = np.broadcast(x, y)
-    x_at = at.as_tensor_variable(x)
-    y_at = at.as_tensor_variable(y)
+    x_at = pt.as_tensor_variable(x)
+    y_at = pt.as_tensor_variable(y)
     b_at = broadcast_shape(x_at, y_at)
     assert b_at[1].value == 1
     assert np.array_equal([z.eval() for z in b_at], b.shape)
@@ -1133,11 +1133,11 @@ def shape_tuple(x, use_bcast=True):
     x2_shp_at = iscalar("x2")
     y1_shp_at = iscalar("y1")
     x_shapes = (1, x1_shp_at, x2_shp_at)
-    x_at = at.ones(x_shapes)
+    x_at = pt.ones(x_shapes)
     y_shapes = (y1_shp_at, 1, x2_shp_at)
-    y_at = at.ones(y_shapes)
+    y_at = pt.ones(y_shapes)
     b_at = broadcast_shape(x_at, y_at)
-    res = at.as_tensor(b_at).eval(
+    res = pt.as_tensor(b_at).eval(
         {
             x1_shp_at: 10,
             x2_shp_at: 4,
@@ -1147,7 +1147,7 @@ def shape_tuple(x, use_bcast=True):
     assert np.array_equal(res, (2, 10, 4))
 
     y_shapes = (y1_shp_at, 1, y1_shp_at)
-    y_at = at.ones(y_shapes)
+    y_at = pt.ones(y_shapes)
     b_at = broadcast_shape(x_at, y_at)
     assert isinstance(b_at[-1].owner.op, Assert)
 
@@ -1196,19 +1196,19 @@ def test_broadcast_shape_constants():
     ],
 )
 def test_broadcast_shape_symbolic(s1_vals, s2_vals, exp_res):
-    s1s = at.lscalars(len(s1_vals))
+    s1s = pt.lscalars(len(s1_vals))
     eval_point = {}
     for s, s_val in zip(s1s, s1_vals):
         eval_point[s] = s_val
         s.tag.test_value = s_val
 
-    s2s = at.lscalars(len(s2_vals))
+    s2s = pt.lscalars(len(s2_vals))
     for s, s_val in zip(s2s, s2_vals):
         eval_point[s] = s_val
         s.tag.test_value = s_val
 
     res = broadcast_shape(s1s, s2s, arrays_are_shapes=True)
-    res = at.as_tensor(res)
+    res = pt.as_tensor(res)
 
     if exp_res is AssertionError:
         with pytest.raises(AssertionError):
@@ -1219,11 +1219,11 @@ def test_broadcast_shape_symbolic(s1_vals, s2_vals, exp_res):
 
 def test_broadcast_shape_symbolic_one_symbolic():
     """Test case for a constant non-broadcast shape and a symbolic shape."""
-    one_at = at.as_tensor(1, dtype=np.int64)
-    three_at = at.as_tensor(3, dtype=np.int64)
+    one_at = pt.as_tensor(1, dtype=np.int64)
+    three_at = pt.as_tensor(3, dtype=np.int64)
     int_div = one_at / one_at
 
-    assert int_div.owner.op == at.true_div
+    assert int_div.owner.op == pt.true_div
 
     index_shapes = [
         (one_at, one_at, three_at),
@@ -1254,7 +1254,7 @@ def test_broadcast_to():
 
 
 def test_broadcast_arrays():
-    x, y = at.tensor(shape=(1,), dtype="float64"), at.dmatrix()
+    x, y = pt.tensor(shape=(1,), dtype="float64"), pt.dmatrix()
     x_bcast, y_bcast = broadcast_arrays(x, y)
 
     py_mode = Mode("py", None)
diff --git a/tests/tensor/test_keepdims.py b/tests/tensor/test_keepdims.py
index b3c7d1bb75..309c7d01e9 100644
--- a/tests/tensor/test_keepdims.py
+++ b/tests/tensor/test_keepdims.py
@@ -5,14 +5,14 @@
 from pytensor import function
 from pytensor.compile.mode import Mode
 from pytensor.tensor.elemwise import DimShuffle
-from pytensor.tensor.math import all as at_all
-from pytensor.tensor.math import any as at_any
+from pytensor.tensor.math import all as pt_all
+from pytensor.tensor.math import any as pt_any
 from pytensor.tensor.math import argmax, argmin
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.math import max_and_argmax, mean
-from pytensor.tensor.math import min as at_min
+from pytensor.tensor.math import min as pt_min
 from pytensor.tensor.math import prod, std
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import var
 from pytensor.tensor.type import dtensor3
 
@@ -168,15 +168,15 @@ def test_single_or_any_axis(self, axis, op):
     @pytest.mark.parametrize(
         "op",
         [
-            at_sum,
+            pt_sum,
             prod,
             mean,
             var,
             std,
-            at_all,
-            at_any,
-            at_max,
-            at_min,
+            pt_all,
+            pt_any,
+            pt_max,
+            pt_min,
         ],
     )
     def test_free_axis(self, axis, op):
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
index af653c2f51..d543019f8d 100644
--- a/tests/tensor/test_math.py
+++ b/tests/tensor/test_math.py
@@ -11,7 +11,7 @@
 from numpy.testing import assert_array_equal
 from scipy.special import logsumexp as scipy_logsumexp
 
-import pytensor.scalar as aes
+import pytensor.scalar as ps
 from pytensor.compile.debugmode import DebugMode
 from pytensor.compile.function import function
 from pytensor.compile.mode import get_default_mode
@@ -111,7 +111,7 @@
     sqrt,
     sub,
 )
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tan, tanh, tensordot, true_div, trunc, var
 from pytensor.tensor.type import (
     TensorType,
@@ -358,7 +358,7 @@ def test_maximum_minimum_grad():
 
 TestModBroadcast = makeBroadcastTester(
     op=mod,
-    expected=lambda x, y: np.asarray(x % y, dtype=aes.upcast(x.dtype, y.dtype)),
+    expected=lambda x, y: np.asarray(x % y, dtype=ps.upcast(x.dtype, y.dtype)),
     good=copymod(_good_broadcast_div_mod_normal_float, ["complex1", "complex2"]),
     grad=_grad_broadcast_div_mod_normal,
     grad_eps=1e-5,
@@ -430,7 +430,7 @@ def test_maximum_minimum_grad():
 # This happen in float32 mode.
 TestRoundHalfAwayFromZeroBroadcast = makeBroadcastTester(
     op=round_half_away_from_zero,
-    expected=lambda a: aes.round_half_away_from_zero_vec(a),
+    expected=lambda a: ps.round_half_away_from_zero_vec(a),
     good=_good_broadcast_unary_normal_float_no_empty_no_complex,
     grad=_grad_broadcast_unary_normal_no_complex_no_corner_case,
 )
@@ -1006,9 +1006,9 @@ def test_zero_shape(self):
 
     def test_numpy_input(self):
         ar = np.array([1, 2, 3])
-        max_at, argmax_at = max_and_argmax(ar, axis=None)
-        assert max_at.eval() == 3
-        assert argmax_at.eval() == 2
+        max_pt, argmax_pt = max_and_argmax(ar, axis=None)
+        assert max_pt.eval() == 3
+        assert argmax_pt.eval() == 2
 
 
 class TestArgminArgmax:
@@ -2240,12 +2240,12 @@ class TestSum:
     def test_sum_overflow(self):
         # Ensure that overflow errors are a little bit harder to get
         a = TensorType(dtype="int8", shape=(None,))()
-        f = function([a], at_sum(a))
+        f = function([a], pt_sum(a))
         assert f([1] * 300) == 300
 
     def test_list(self):
         ll = [shared(0.0), shared(2.0)]
-        at_sum(ll).eval() == 2
+        pt_sum(ll).eval() == 2
 
 
 class TestArithmeticCast:
@@ -2302,7 +2302,7 @@ def numpy_array(dtype):
             return np.array([1], dtype=dtype)
 
         def pytensor_i_scalar(dtype):
-            return aes.ScalarType(str(dtype))()
+            return ps.ScalarType(str(dtype))()
 
         def numpy_i_scalar(dtype):
             return numpy_scalar(dtype)
@@ -2329,7 +2329,7 @@ def numpy_i_scalar(dtype):
                 op(numpy_arg_1, numpy_arg_2).dtype,
                 op(numpy_arg_2, numpy_arg_1).dtype,
             ]
-            numpy_dtype = aes.upcast(*list(map(str, numpy_dtypes)))
+            numpy_dtype = ps.upcast(*list(map(str, numpy_dtypes)))
 
             if numpy_dtype == pytensor_dtype:
                 # Same data type found, all is good!
@@ -2354,7 +2354,7 @@ def numpy_i_scalar(dtype):
                     (a_type, b_type)[list(combo).index(arg)]
                     for arg in ("array", "scalar")
                 )
-                up_type = aes.upcast(array_type, scalar_type)
+                up_type = ps.upcast(array_type, scalar_type)
                 if (
                     # The two data types are different.
                     scalar_type != array_type
@@ -2764,7 +2764,7 @@ def test_prod_without_zeros(self):
     def test_prod_without_zeros_grad(self):
         x = dmatrix()
         pwz_a1 = ProdWithoutZeros(axis=0)(x)
-        pwz_grad = grad(at_sum(pwz_a1), x)
+        pwz_grad = grad(pt_sum(pwz_a1), x)
         # FIXME: This is not a real test.
         function([x], pwz_grad, mode=self.mode)
 
@@ -2834,9 +2834,9 @@ def setup_method(self):
             self.mode = copy(self.mode)
             self.mode.check_isfinite = False
 
-    def run_isfunc(self, at_func, np_func):
+    def run_isfunc(self, pt_func, np_func):
         for args in (self.scalar, self.vector):
-            PyTensor_isfunc = function([args], at_func(args), mode=self.mode)
+            PyTensor_isfunc = function([args], pt_func(args), mode=self.mode)
             for x in self.test_vals:
                 if (x.ndim == 0 and args is not self.scalar) or (
                     x.ndim == 1 and args is not self.vector
@@ -2859,7 +2859,7 @@ class TestSumProdReduceDtype:
     op = CAReduce
     axes = [None, 0, 1, [], [0], [1], [0, 1]]
     methods = ["sum", "prod"]
-    dtypes = list(map(str, aes.all_types))
+    dtypes = list(map(str, ps.all_types))
 
     # Test the default dtype of a method().
     def test_reduce_default_dtype(self):
@@ -2983,7 +2983,7 @@ def test_reduce_custom_acc_dtype(self):
                     axis = self.axes[idx % len(self.axes)]
                     # If output_dtype would force a downcast, we expect a TypeError
                     # We always allow int/uint inputs with float/complex outputs.
-                    upcasted_dtype = aes.upcast(input_dtype, acc_dtype)
+                    upcasted_dtype = ps.upcast(input_dtype, acc_dtype)
                     if acc_dtype == upcasted_dtype or (
                         input_dtype in discrete_dtypes
                         and acc_dtype in continuous_dtypes
@@ -3022,7 +3022,7 @@ def test_mean_default_dtype(self):
 
         # We try multiple axis combinations even though axis should not matter.
         axes = [None, 0, 1, [], [0], [1], [0, 1]]
-        for idx, dtype in enumerate(map(str, aes.all_types)):
+        for idx, dtype in enumerate(map(str, ps.all_types)):
             axis = axes[idx % len(axes)]
             x = matrix(dtype=dtype)
             m = x.mean(axis=axis)
@@ -3043,9 +3043,9 @@ def test_mean_custom_dtype(self):
         # We try multiple axis combinations even though axis should not matter.
         axes = [None, 0, 1, [], [0], [1], [0, 1]]
         idx = 0
-        for input_dtype in map(str, aes.all_types):
+        for input_dtype in map(str, ps.all_types):
             x = matrix(dtype=input_dtype)
-            for sum_dtype in map(str, aes.all_types):
+            for sum_dtype in map(str, ps.all_types):
                 axis = axes[idx % len(axes)]
                 # If the inner sum cannot be created, it will raise a
                 # TypeError.
@@ -3098,7 +3098,7 @@ def test_prod_without_zeros_default_dtype(self):
 
         # We try multiple axis combinations even though axis should not matter.
         axes = [None, 0, 1, [], [0], [1], [0, 1]]
-        for idx, dtype in enumerate(map(str, aes.all_types)):
+        for idx, dtype in enumerate(map(str, ps.all_types)):
             axis = axes[idx % len(axes)]
             x = ProdWithoutZeros(axis=axis)(matrix(dtype=dtype))
             assert x.dtype == dict(
@@ -3116,7 +3116,7 @@ def test_prod_without_zeros_default_acc_dtype(self):
 
         # We try multiple axis combinations even though axis should not matter.
         axes = [None, 0, 1, [], [0], [1], [0, 1]]
-        for idx, dtype in enumerate(map(str, aes.all_types)):
+        for idx, dtype in enumerate(map(str, ps.all_types)):
             axis = axes[idx % len(axes)]
             x = matrix(dtype=dtype)
             p = ProdWithoutZeros(axis=axis)(x)
@@ -3148,9 +3148,9 @@ def test_prod_without_zeros_custom_dtype(self):
         # We try multiple axis combinations even though axis should not matter.
         axes = [None, 0, 1, [], [0], [1], [0, 1]]
         idx = 0
-        for input_dtype in map(str, aes.all_types):
+        for input_dtype in map(str, ps.all_types):
             x = matrix(dtype=input_dtype)
-            for output_dtype in map(str, aes.all_types):
+            for output_dtype in map(str, ps.all_types):
                 axis = axes[idx % len(axes)]
                 prod_woz_var = ProdWithoutZeros(axis=axis, dtype=output_dtype)(x)
                 assert prod_woz_var.dtype == output_dtype
@@ -3170,13 +3170,13 @@ def test_prod_without_zeros_custom_acc_dtype(self):
         # We try multiple axis combinations even though axis should not matter.
         axes = [None, 0, 1, [], [0], [1], [0, 1]]
         idx = 0
-        for input_dtype in map(str, aes.all_types):
+        for input_dtype in map(str, ps.all_types):
             x = matrix(dtype=input_dtype)
-            for acc_dtype in map(str, aes.all_types):
+            for acc_dtype in map(str, ps.all_types):
                 axis = axes[idx % len(axes)]
                 # If acc_dtype would force a downcast, we expect a TypeError
                 # We always allow int/uint inputs with float/complex outputs.
-                upcasted_dtype = aes.upcast(input_dtype, acc_dtype)
+                upcasted_dtype = ps.upcast(input_dtype, acc_dtype)
                 if acc_dtype == upcasted_dtype or (
                     input_dtype in discrete_dtypes and acc_dtype in continuous_dtypes
                 ):
@@ -3401,7 +3401,7 @@ def test_logsumexp(shape, axis, keepdims):
 
 def test_pprint():
     x = vector("x")
-    y = at_sum(x, axis=0)
+    y = pt_sum(x, axis=0)
     assert pprint(y) == "sum(x, axis=(0,))"
 
 
diff --git a/tests/tensor/test_math_scipy.py b/tests/tensor/test_math_scipy.py
index 2d4c52282a..d98daccf1d 100644
--- a/tests/tensor/test_math_scipy.py
+++ b/tests/tensor/test_math_scipy.py
@@ -16,7 +16,7 @@
 import scipy.stats
 
 from pytensor import function, grad
-from pytensor import tensor as at
+from pytensor import tensor as pt
 from pytensor.compile.mode import get_default_mode
 from pytensor.configdefaults import config
 from pytensor.tensor import gammaincc, inplace, vector
@@ -81,7 +81,7 @@ def scipy_special_gammal(k, x):
 expected_hyp2f1 = scipy.special.hyp2f1
 
 TestErfBroadcast = makeBroadcastTester(
-    op=at.erf,
+    op=pt.erf,
     expected=expected_erf,
     good=_good_broadcast_unary_normal,
     grad=_grad_broadcast_unary_normal,
@@ -98,7 +98,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestErfcBroadcast = makeBroadcastTester(
-    op=at.erfc,
+    op=pt.erfc,
     expected=expected_erfc,
     good=_good_broadcast_unary_normal_float_no_complex,
     grad=_grad_broadcast_unary_normal,
@@ -115,7 +115,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestErfcxBroadcast = makeBroadcastTester(
-    op=at.erfcx,
+    op=pt.erfcx,
     expected=expected_erfcx,
     good=_good_broadcast_unary_normal_float_no_complex_small_neg_range,
     grad=_grad_broadcast_unary_normal_small_neg_range,
@@ -132,7 +132,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestErfinvBroadcast = makeBroadcastTester(
-    op=at.erfinv,
+    op=pt.erfinv,
     expected=expected_erfinv,
     good={
         "normal": [random_ranged(-0.9, 0.9, (2, 3))],
@@ -144,7 +144,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestErfcinvBroadcast = makeBroadcastTester(
-    op=at.erfcinv,
+    op=pt.erfcinv,
     expected=expected_erfcinv,
     good={
         "normal": [random_ranged(0.001, 1.9, (2, 3))],
@@ -188,7 +188,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestOwensTBroadcast = makeBroadcastTester(
-    op=at.owens_t,
+    op=pt.owens_t,
     expected=expected_owenst,
     good=_good_broadcast_binary_owenst,
     grad=_grad_broadcast_binary_owenst,
@@ -219,7 +219,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestGammaBroadcast = makeBroadcastTester(
-    op=at.gamma,
+    op=pt.gamma,
     expected=expected_gamma,
     good=_good_broadcast_unary_gammaln,
     grad=_grad_broadcast_unary_gammaln,
@@ -236,7 +236,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestGammalnBroadcast = makeBroadcastTester(
-    op=at.gammaln,
+    op=pt.gammaln,
     expected=expected_gammaln,
     good=_good_broadcast_unary_gammaln,
     grad=_grad_broadcast_unary_gammaln,
@@ -262,7 +262,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestPsiBroadcast = makeBroadcastTester(
-    op=at.psi,
+    op=pt.psi,
     expected=expected_psi,
     good=_good_broadcast_unary_psi,
     eps=2e-10,
@@ -280,7 +280,7 @@ def scipy_special_gammal(k, x):
 _good_broadcast_unary_tri_gamma = _good_broadcast_unary_psi
 
 TestTriGammaBroadcast = makeBroadcastTester(
-    op=at.tri_gamma,
+    op=pt.tri_gamma,
     expected=expected_tri_gamma,
     good=_good_broadcast_unary_psi,
     eps=2e-8,
@@ -296,7 +296,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestChi2SFBroadcast = makeBroadcastTester(
-    op=at.chi2sf,
+    op=pt.chi2sf,
     expected=expected_chi2sf,
     good=_good_broadcast_unary_chi2sf,
     eps=2e-10,
@@ -348,7 +348,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestGammaIncBroadcast = makeBroadcastTester(
-    op=at.gammainc,
+    op=pt.gammainc,
     expected=expected_gammainc,
     good=_good_broadcast_binary_gamma,
     grad=_good_broadcast_binary_gamma_grad,
@@ -366,7 +366,7 @@ def scipy_special_gammal(k, x):
 )
 
 TestGammaInccBroadcast = makeBroadcastTester(
-    op=at.gammaincc,
+    op=pt.gammaincc,
     expected=expected_gammaincc,
     good=_good_broadcast_binary_gamma,
     grad=_good_broadcast_binary_gamma_grad,
@@ -387,9 +387,9 @@ def scipy_special_gammal(k, x):
 def test_gammainc_ddk_tabulated_values():
     # This test replicates part of the old STAN test:
     # https://github.com/stan-dev/math/blob/21333bb70b669a1bd54d444ecbe1258078d33153/test/unit/math/prim/scal/fun/grad_reg_lower_inc_gamma_test.cpp
-    k, x = at.scalars("k", "x")
-    gammainc_out = at.gammainc(k, x)
-    gammaincc_ddk = at.grad(gammainc_out, k)
+    k, x = pt.scalars("k", "x")
+    gammainc_out = pt.gammainc(k, x)
+    gammaincc_ddk = pt.grad(gammainc_out, k)
     f_grad = function([k, x], gammaincc_ddk)
 
     rtol = 1e-5 if config.floatX == "float64" else 1e-2
@@ -451,7 +451,7 @@ def test_gammaincc_ddk_performance(benchmark):
 
 
 TestGammaUBroadcast = makeBroadcastTester(
-    op=at.gammau,
+    op=pt.gammau,
     expected=expected_gammau,
     good=_good_broadcast_binary_gamma,
     eps=2e-8,
@@ -468,7 +468,7 @@ def test_gammaincc_ddk_performance(benchmark):
 )
 
 TestGammaLBroadcast = makeBroadcastTester(
-    op=at.gammal,
+    op=pt.gammal,
     expected=expected_gammal,
     good=_good_broadcast_binary_gamma,
     eps=2e-8,
@@ -522,7 +522,7 @@ def test_gammaincc_ddk_performance(benchmark):
 )
 
 TestJ0Broadcast = makeBroadcastTester(
-    op=at.j0,
+    op=pt.j0,
     expected=expected_j0,
     good=_good_broadcast_unary_bessel,
     grad=_grad_broadcast_unary_bessel,
@@ -540,7 +540,7 @@ def test_gammaincc_ddk_performance(benchmark):
 )
 
 TestJ1Broadcast = makeBroadcastTester(
-    op=at.j1,
+    op=pt.j1,
     expected=expected_j1,
     good=_good_broadcast_unary_bessel,
     grad=_grad_broadcast_unary_bessel,
@@ -558,7 +558,7 @@ def test_gammaincc_ddk_performance(benchmark):
 )
 
 TestJvBroadcast = makeBroadcastTester(
-    op=at.jv,
+    op=pt.jv,
     expected=expected_jv,
     good=_good_broadcast_binary_bessel,
     eps=2e-10,
@@ -582,13 +582,13 @@ def test_verify_jv_grad():
     v_val, x_val = _grad_broadcast_binary_bessel["normal"]
 
     def fixed_first_input_jv(x):
-        return at.jv(v_val, x)
+        return pt.jv(v_val, x)
 
     utt.verify_grad(fixed_first_input_jv, [x_val])
 
 
 TestI0Broadcast = makeBroadcastTester(
-    op=at.i0,
+    op=pt.i0,
     expected=expected_i0,
     good=_good_broadcast_unary_bessel,
     grad=_grad_broadcast_unary_bessel,
@@ -606,7 +606,7 @@ def fixed_first_input_jv(x):
 )
 
 TestI1Broadcast = makeBroadcastTester(
-    op=at.i1,
+    op=pt.i1,
     expected=expected_i1,
     good=_good_broadcast_unary_bessel,
     grad=_grad_broadcast_unary_bessel,
@@ -624,7 +624,7 @@ def fixed_first_input_jv(x):
 )
 
 TestIvBroadcast = makeBroadcastTester(
-    op=at.iv,
+    op=pt.iv,
     expected=expected_iv,
     good=_good_broadcast_binary_bessel,
     eps=2e-10,
@@ -641,7 +641,7 @@ def fixed_first_input_jv(x):
 )
 
 TestIveBroadcast = makeBroadcastTester(
-    op=at.ive,
+    op=pt.ive,
     expected=expected_ive,
     good=_good_broadcast_binary_bessel,
     eps=2e-10,
@@ -665,7 +665,7 @@ def test_verify_iv_grad():
     v_val, x_val = _grad_broadcast_binary_bessel["normal"]
 
     def fixed_first_input_iv(x):
-        return at.iv(v_val, x)
+        return pt.iv(v_val, x)
 
     utt.verify_grad(fixed_first_input_iv, [x_val])
 
@@ -677,13 +677,13 @@ def test_verify_ive_grad():
     v_val, x_val = _grad_broadcast_binary_bessel["normal"]
 
     def fixed_first_input_ive(x):
-        return at.ive(v_val, x)
+        return pt.ive(v_val, x)
 
     utt.verify_grad(fixed_first_input_ive, [x_val])
 
 
 TestSigmoidBroadcast = makeBroadcastTester(
-    op=at.sigmoid,
+    op=pt.sigmoid,
     expected=expected_sigmoid,
     good=_good_broadcast_unary_normal_no_complex,
     eps=1e-8,
@@ -701,7 +701,7 @@ def fixed_first_input_ive(x):
 
 class TestSigmoid:
     def test_elemwise(self):
-        utt.verify_grad(at.sigmoid, [np.random.random((3, 4))])
+        utt.verify_grad(pt.sigmoid, [np.random.random((3, 4))])
 
 
 _good_broadcast_unary_softplus = dict(
@@ -720,7 +720,7 @@ def test_elemwise(self):
 )
 
 TestSoftplusBroadcast = makeBroadcastTester(
-    op=at.softplus,
+    op=pt.softplus,
     expected=expected_sofplus,
     good=_good_broadcast_unary_softplus,
     eps=1e-8,
@@ -738,12 +738,12 @@ def test_elemwise(self):
 
 class TestSoftplus:
     def test_elemwise(self):
-        utt.verify_grad(at.softplus, [np.random.random((3, 4))])
+        utt.verify_grad(pt.softplus, [np.random.random((3, 4))])
 
     def test_accuracy(self):
         # Test all approximations are working (cutoff points are -37, 18, 33.3)
         x_test = np.array([-40.0, -17.5, 17.5, 18.5, 40.0])
-        y_th = at.softplus(x_test).eval()
+        y_th = pt.softplus(x_test).eval()
         y_np = np.log1p(np.exp(x_test))
         np.testing.assert_allclose(y_th, y_np, rtol=10e-10)
 
@@ -766,7 +766,7 @@ def expected_log1mexp(x):
 
 
 TestLog1mexpBroadcast = makeBroadcastTester(
-    op=at.log1mexp,
+    op=pt.log1mexp,
     expected=expected_log1mexp,
     good=_good_broadcast_unary_log1mexp,
     grad=_grad_broadcast_unary_log1mexp,
@@ -790,7 +790,7 @@ def expected_log1mexp(x):
 )
 
 TestBetaincBroadcast = makeBroadcastTester(
-    op=at.betainc,
+    op=pt.betainc,
     expected=scipy.special.betainc,
     good=_good_broadcast_ternary_betainc,
     grad=_good_broadcast_ternary_betainc,
@@ -811,9 +811,9 @@ def test_stan_grad_partial(self):
         # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/inc_beta_dda_test.cpp
         # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/inc_beta_ddb_test.cpp
         # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/inc_beta_ddz_test.cpp
-        a, b, z = at.scalars("a", "b", "z")
-        betainc_out = at.betainc(a, b, z)
-        betainc_grad = at.grad(betainc_out, [a, b, z])
+        a, b, z = pt.scalars("a", "b", "z")
+        betainc_out = pt.betainc(a, b, z)
+        betainc_grad = pt.grad(betainc_out, [a, b, z])
         f_grad = function([a, b, z], betainc_grad)
 
         decimal_precision = 7 if config.floatX == "float64" else 3
@@ -846,9 +846,9 @@ def test_boik_robison_cox(self):
         # This test compares against the tabulated values in:
         # Boik, R. J., & Robison-Cox, J. F. (1998). Derivatives of the incomplete beta function.
         # Journal of Statistical Software, 3(1), 1-20.
-        a, b, z = at.scalars("a", "b", "z")
-        betainc_out = at.betainc(a, b, z)
-        betainc_grad = at.grad(betainc_out, [a, b])
+        a, b, z = pt.scalars("a", "b", "z")
+        betainc_out = pt.betainc(a, b, z)
+        betainc_grad = pt.grad(betainc_out, [a, b])
         f_grad = function([a, b, z], betainc_grad)
         decimal = 7 if config.floatX == "float64" else 5
         for test_a, test_b, test_z, expected_dda, expected_ddb in (
@@ -866,9 +866,9 @@ def test_boik_robison_cox(self):
     def test_beta_inc_stan_grad_combined(self):
         # This test replicates the following STAN test:
         # https://github.com/stan-dev/math/blob/master/test/unit/math/prim/fun/grad_reg_inc_beta_test.cpp
-        a, b, z = at.scalars("a", "b", "z")
-        betainc_out = at.betainc(a, b, z)
-        betainc_grad = at.grad(betainc_out, [a, b])
+        a, b, z = pt.scalars("a", "b", "z")
+        betainc_out = pt.betainc(a, b, z)
+        betainc_grad = pt.grad(betainc_out, [a, b])
         f_grad = function([a, b, z], betainc_grad)
 
         for test_a, test_b, test_z, expected_dda, expected_ddb in (
@@ -890,7 +890,7 @@ def test_beta_inc_stan_grad_combined(self):
 )
 
 TestHyp2F1Broadcast = makeBroadcastTester(
-    op=at.hyp2f1,
+    op=pt.hyp2f1,
     expected=expected_hyp2f1,
     good=_good_broadcast_quaternary_hyp2f1,
     grad=_good_broadcast_quaternary_hyp2f1,
@@ -934,9 +934,9 @@ def test_hyp2f1_grad_stan_cases(self):
 
         Note: The expected_ddz was computed from the perform method, as it is not part of all Stan tests
         """
-        a1, a2, b1, z = at.scalars("a1", "a2", "b1", "z")
-        hyp2f1_out = at.hyp2f1(a1, a2, b1, z)
-        hyp2f1_grad = at.grad(hyp2f1_out, [a1, a2, b1, z])
+        a1, a2, b1, z = pt.scalars("a1", "a2", "b1", "z")
+        hyp2f1_out = pt.hyp2f1(a1, a2, b1, z)
+        hyp2f1_grad = pt.grad(hyp2f1_out, [a1, a2, b1, z])
         f_grad = function([a1, a2, b1, z], hyp2f1_grad)
 
         rtol = 1e-9 if config.floatX == "float64" else 2e-3
@@ -1068,9 +1068,9 @@ def test_hyp2f1_grad_stan_cases(self):
     @pytest.mark.parametrize("case", (few_iters_case, many_iters_case))
     @pytest.mark.parametrize("wrt", ("a", "all"))
     def test_benchmark(self, case, wrt, benchmark):
-        a1, a2, b1, z = at.scalars("a1", "a2", "b1", "z")
-        hyp2f1_out = at.hyp2f1(a1, a2, b1, z)
-        hyp2f1_grad = at.grad(hyp2f1_out, wrt=a1 if wrt == "a" else [a1, a2, b1, z])
+        a1, a2, b1, z = pt.scalars("a1", "a2", "b1", "z")
+        hyp2f1_out = pt.hyp2f1(a1, a2, b1, z)
+        hyp2f1_grad = pt.grad(hyp2f1_out, wrt=a1 if wrt == "a" else [a1, a2, b1, z])
         f_grad = function([a1, a2, b1, z], hyp2f1_grad)
 
         (test_a1, test_a2, test_b1, test_z, *expected_dds) = case
@@ -1097,10 +1097,10 @@ def test_unused_grad_loop_opt(self, wrt):
             expected_ddz,
         ) = self.few_iters_case
 
-        a1, a2, b1, z = at.scalars("a1", "a2", "b1", "z")
-        hyp2f1_out = at.hyp2f1(a1, a2, b1, z)
+        a1, a2, b1, z = pt.scalars("a1", "a2", "b1", "z")
+        hyp2f1_out = pt.hyp2f1(a1, a2, b1, z)
         wrt_vars = [v for i, v in enumerate((a1, a2, b1, z)) if i in wrt]
-        hyp2f1_grad = at.grad(hyp2f1_out, wrt=wrt_vars)
+        hyp2f1_grad = pt.grad(hyp2f1_out, wrt=wrt_vars)
 
         mode = get_default_mode().including("local_useless_2f1grad_loop")
         f_grad = function([a1, a2, b1, z], hyp2f1_grad, mode=mode)
diff --git a/tests/tensor/test_merge.py b/tests/tensor/test_merge.py
index 164d17975c..d72577a25e 100644
--- a/tests/tensor/test_merge.py
+++ b/tests/tensor/test_merge.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor.graph.basic import Apply, Variable
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import Op
@@ -69,8 +69,8 @@ def test_merge_with_weird_eq():
     # numpy arrays don't compare equal like other python objects
 
     # SCALAR CASE
-    x = at.constant(np.asarray(1), name="x")
-    y = at.constant(np.asarray(1), name="y")
+    x = ptb.constant(np.asarray(1), name="x")
+    y = ptb.constant(np.asarray(1), name="y")
     g = FunctionGraph([x, y], [x + y])
     MergeOptimizer().rewrite(g)
 
@@ -81,8 +81,8 @@ def test_merge_with_weird_eq():
 
     # NONSCALAR CASE
     # This was created to test TensorConstantSignature
-    x = at.constant(np.ones(5), name="x")
-    y = at.constant(np.ones(5), name="y")
+    x = ptb.constant(np.ones(5), name="x")
+    y = ptb.constant(np.ones(5), name="y")
     g = FunctionGraph([x, y], [x + y])
     MergeOptimizer().rewrite(g)
 
diff --git a/tests/tensor/test_sharedvar.py b/tests/tensor/test_sharedvar.py
index 547a73dea2..dc18f7b35a 100644
--- a/tests/tensor/test_sharedvar.py
+++ b/tests/tensor/test_sharedvar.py
@@ -5,7 +5,7 @@
 
 import pytensor
 import pytensor.sparse
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.misc.may_share_memory import may_share_memory
 from pytensor.tensor import get_vector_length
 from pytensor.tensor.basic import MakeVector
@@ -453,7 +453,7 @@ def test_specify_shape_partial(self):
             x1_shared = self.shared_constructor(x1_1)
             x1_specify_shape = specify_shape(
                 x1_shared,
-                (at.as_tensor_variable(x1_1.shape[0]), x1_shared.shape[1]),
+                (pt.as_tensor_variable(x1_1.shape[0]), x1_shared.shape[1]),
             )
             x1_shared.set_value(x1_2)
             assert np.allclose(
diff --git a/tests/tensor/test_subtensor.py b/tests/tensor/test_subtensor.py
index 9ee39a4a98..63acbabb29 100644
--- a/tests/tensor/test_subtensor.py
+++ b/tests/tensor/test_subtensor.py
@@ -8,7 +8,7 @@
 
 import pytensor
 import pytensor.scalar as scal
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor import function
 from pytensor.compile import DeepCopyOp, shared
 from pytensor.compile.io import In
@@ -21,7 +21,7 @@
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.elemwise import DimShuffle
 from pytensor.tensor.math import exp, isinf
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
     AdvancedIncSubtensor1,
@@ -82,18 +82,18 @@
 
 
 def test_as_index_literal():
-    res = as_index_literal(slice(None, at.as_tensor(1)))
+    res = as_index_literal(slice(None, ptb.as_tensor(1)))
     assert res == slice(None, 1)
-    res = as_index_literal(slice(at.as_tensor(1), None))
+    res = as_index_literal(slice(ptb.as_tensor(1), None))
     assert res == slice(1, None)
-    res = as_index_literal(slice(None, None, at.as_tensor(2)))
+    res = as_index_literal(slice(None, None, ptb.as_tensor(2)))
     assert res == slice(None, None, 2)
     res = as_index_literal(SliceConstant(slicetype, slice(None)))
     assert res == slice(None)
-    res = as_index_literal(make_slice(None, at.as_tensor(1)))
+    res = as_index_literal(make_slice(None, ptb.as_tensor(1)))
     assert res == slice(None, 1)
 
-    res = as_index_literal(at.as_tensor(2))
+    res = as_index_literal(ptb.as_tensor(2))
     assert res == 2
 
     res = as_index_literal(np.newaxis)
@@ -109,7 +109,7 @@ def test_scalar_constant(self):
         a = as_scalar(0)
         length = lscalar()
         res = get_canonical_form_slice(a, length)
-        assert res[0].owner.op == at.switch
+        assert res[0].owner.op == ptb.switch
         assert res[1] == 1
 
     def test_all_symbolic(self):
@@ -121,10 +121,10 @@ def test_all_symbolic(self):
         f = pytensor.function(
             [start, stop, step, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -147,10 +147,10 @@ def test_start_None(self):
         f = pytensor.function(
             [stop, step, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -172,10 +172,10 @@ def test_stop_None(self):
         f = pytensor.function(
             [start, step, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -197,10 +197,10 @@ def test_step_None(self):
         f = pytensor.function(
             [start, stop, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -221,10 +221,10 @@ def test_start_stop_None(self):
         f = pytensor.function(
             [step, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -244,10 +244,10 @@ def test_stop_step_None(self):
         f = pytensor.function(
             [start, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -267,10 +267,10 @@ def test_start_step_None(self):
         f = pytensor.function(
             [stop, length],
             [
-                at.as_tensor_variable(cnf[0].start),
-                at.as_tensor_variable(cnf[0].stop),
-                at.as_tensor_variable(cnf[0].step),
-                at.as_tensor_variable(cnf[1]),
+                ptb.as_tensor_variable(cnf[0].start),
+                ptb.as_tensor_variable(cnf[0].stop),
+                ptb.as_tensor_variable(cnf[0].step),
+                ptb.as_tensor_variable(cnf[1]),
             ],
         )
 
@@ -739,7 +739,7 @@ def test_grad_1d(self):
         n = self.shared(data)
         z = scal.constant(subi).astype("int32")
         t = n[z:, z]
-        gn = pytensor.grad(at_sum(exp(t)), n)
+        gn = pytensor.grad(pt_sum(exp(t)), n)
 
         f = inplace_func([], gn, mode=self.mode)
         topo = f.maker.fgraph.toposort()
@@ -770,7 +770,7 @@ def test_grad_2d_inc_set_subtensor(self):
                 mv = np.asarray(random(*m_shape), dtype=self.dtype)
 
                 t = op(n[:z, :z], m)
-                gn, gm = pytensor.grad(at_sum(t), [n, m])
+                gn, gm = pytensor.grad(pt_sum(t), [n, m])
                 utt.verify_grad(lambda m: op(n[:z, :z], m), [mv], mode=self.mode)
                 utt.verify_grad(lambda nn: op(nn[:z, :z], mv), [data], mode=self.mode)
 
@@ -778,7 +778,7 @@ def test_grad_0d(self):
         data = np.asarray(random(2, 3), dtype=self.dtype)
         n = self.shared(data)
         t = n[1, 0]
-        gn = pytensor.grad(at_sum(exp(t)), n)
+        gn = pytensor.grad(pt_sum(exp(t)), n)
         f = self.function([], gn)
         topo = f.maker.fgraph.toposort()
         topo_ = [node for node in topo if not isinstance(node.op, DeepCopyOp)]
@@ -804,7 +804,7 @@ def test_ok_list(self):
             # optimized for that case.
             (random(4, 4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
             # Test with TensorConstant index.
-            (random(4, 2, 3), at.constant([3, 3, 1, 1, 2, 2, 0, 0])),
+            (random(4, 2, 3), ptb.constant([3, 3, 1, 1, 2, 2, 0, 0])),
         ]:
             data = np.asarray(data, dtype=self.dtype)
             n = self.shared(data)
@@ -1035,7 +1035,7 @@ def grad_list_(self, idxs, data):
             # Should stay on the cpu.
             idx_ = shared(np.asarray(idx))
             t = n[idx_]
-            gn = pytensor.grad(at_sum(exp(t)), n)
+            gn = pytensor.grad(pt_sum(exp(t)), n)
             f = self.function([], [gn, gn.shape], op=AdvancedIncSubtensor1)
             topo = f.maker.fgraph.toposort()
             if not self.fast_compile:
@@ -1057,13 +1057,13 @@ def grad_list_(self, idxs, data):
             assert np.allclose(gshape, data.shape)
 
             def fct(t):
-                return at_sum(t[idx_])
+                return pt_sum(t[idx_])
 
             utt.verify_grad(fct, [data], mode=self.mode)
 
             # Test the grad of the grad (e.i. AdvancedIncSubtensor1.grad)
             def fct2(t):
-                return pytensor.grad(at_sum(t[idx_]), t)
+                return pytensor.grad(pt_sum(t[idx_]), t)
 
             utt.verify_grad(fct2, [data], mode=self.mode)
 
@@ -1272,7 +1272,7 @@ def test_adv_constant_arg(self):
         # Test case provided (and bug detected, gh-607) by John Salvatier
         m = matrix("m")
         gv = np.array([0, 1, 3])
-        g = at.constant(gv)
+        g = ptb.constant(gv)
         i = lvector("i")
 
         # s1 used to fail
@@ -1412,8 +1412,8 @@ def test_take_cases(a_shape, index, axis, mode):
     a_val = np.random.random(size=a_shape).astype(config.floatX)
     py_res = a_val.take(index, axis=axis, mode=mode)
 
-    a = at.as_tensor_variable(a_val)
-    index = at.as_tensor_variable(index)
+    a = ptb.as_tensor_variable(a_val)
+    index = ptb.as_tensor_variable(index)
 
     f = pytensor.function([], a.take(index, axis=axis, mode=mode))
     f_res = f()
@@ -1681,7 +1681,7 @@ def test_matrix_idx(self):
         utt.assert_allclose(a2val[3], mval[3] * 2)
 
     def test_inc_bcastableidx(self):
-        idx = at.constant([0])
+        idx = ptb.constant([0])
         c_inc = col()
         m_inc = matrix()
         out1 = inc_subtensor(self.m[:, idx], c_inc)
@@ -1727,7 +1727,7 @@ def test_advinc_subtensor(self, inplace):
         def check(idx, y_val, x_val, true):
             x = self.shared(x_val, name="x")
             y = tensor(dtype="float32", shape=(None,) * len(y_val.shape), name="y")
-            sym_idx = [at.as_tensor_variable(ix) for ix in idx]
+            sym_idx = [ptb.as_tensor_variable(ix) for ix in idx]
             expr = AdvancedIncSubtensor(inplace=inplace)(x, y, *sym_idx)
             f = pytensor.function(
                 [y], expr, mode=self.mode.excluding("inplace"), accept_inplace=inplace
@@ -1809,7 +1809,7 @@ def test_index_w_int_and_vec(self):
             # optimized for that case.
             (random(4, 4, 2, 3), [3, 3, 1, 1, 2, 2, 0, 0, -1, -2, -3, -4]),
             # Test with TensorConstant index.
-            (random(2, 4, 3), at.constant([3, 3, 1, 1, 2, 2, 0, 0])),
+            (random(2, 4, 3), ptb.constant([3, 3, 1, 1, 2, 2, 0, 0])),
         ]:
             data = np.asarray(data, dtype=self.dtype)
             n = self.shared(data)
@@ -2076,7 +2076,7 @@ def test_adv_grouped(self):
         var = self.shared(var_v)
         idx1_v = rng.integers(0, 61, size=(5, 4)).astype("int32")
         idx1 = self.shared(idx1_v)
-        idx2 = at.arange(4)
+        idx2 = ptb.arange(4)
         out = var[:, idx1, idx2]
         f = pytensor.function([], out, mode=self.mode)
         out_v = f()
@@ -2126,7 +2126,7 @@ def fun(x, y):
         # Test boolean gradients
         def fun(x, y):
             return advanced_inc_subtensor(
-                x, y, at.as_tensor(np.array([[True, False], [False, True]]))
+                x, y, ptb.as_tensor(np.array([[True, False], [False, True]]))
             )
 
         utt.verify_grad(
@@ -2140,7 +2140,7 @@ def fun(x, y):
 
         def fun(x, y):
             return advanced_set_subtensor(
-                x, y, at.as_tensor(np.array([[True, False], [False, True]]))
+                x, y, ptb.as_tensor(np.array([[True, False], [False, True]]))
             )
 
         utt.verify_grad(
@@ -2541,7 +2541,7 @@ def idx_as_tensor(x):
     if isinstance(x, (slice, type(None))):
         return x
     else:
-        return at.as_tensor(x)
+        return ptb.as_tensor(x)
 
 
 def bcast_shape_tuple(x):
@@ -2603,14 +2603,14 @@ def bcast_shape_tuple(x):
 @config.change_flags(compute_test_value="raise")
 def test_indexed_result_shape(test_array, test_idx):
     res = indexed_result_shape(
-        at.as_tensor(test_array).shape, [idx_as_tensor(i) for i in test_idx]
+        ptb.as_tensor(test_array).shape, [idx_as_tensor(i) for i in test_idx]
     )
     exp_res = test_array[test_idx].shape
     assert np.array_equal(tuple(get_test_value(r) for r in res), exp_res)
 
     # Test shape-only version
     res = indexed_result_shape(
-        at.as_tensor(test_array).shape,
+        ptb.as_tensor(test_array).shape,
         [bcast_shape_tuple(idx_as_tensor(i)) for i in test_idx],
         indices_are_shapes=True,
     )
@@ -2626,7 +2626,7 @@ def test_symbolic_slice():
 
 
 def test_get_vector_length():
-    x = at.as_tensor_variable(np.arange(4))
+    x = ptb.as_tensor_variable(np.arange(4))
     assert get_vector_length(x[2:4]) == 2
     assert get_vector_length(x[2:]) == 2
     assert get_vector_length(x[1:4]) == 3
@@ -2681,7 +2681,7 @@ def test_pprint_IncSubtensor(indices, set_instead_of_inc, exp_res):
 
 
 def test_index_vars_to_types():
-    x = at.as_tensor_variable(np.array([True, False]))
+    x = ptb.as_tensor_variable(np.array([True, False]))
 
     with pytest.raises(AdvancedIndexingError):
         index_vars_to_types(x)
@@ -2708,7 +2708,7 @@ def test_index_vars_to_types():
     ],
 )
 def test_static_shapes(x_shape, indices, expected):
-    x = at.tensor(dtype="float64", shape=x_shape)
+    x = ptb.tensor(dtype="float64", shape=x_shape)
     y = x[indices]
     assert y.type.shape == expected
 
diff --git a/tests/tensor/test_type.py b/tests/tensor/test_type.py
index 5fa2327410..656361fe20 100644
--- a/tests/tensor/test_type.py
+++ b/tests/tensor/test_type.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.configdefaults import config
 from pytensor.tensor.shape import SpecifyShape
 from pytensor.tensor.type import (
@@ -83,7 +83,7 @@ def test_convert_variable():
     res = test_type2.convert_variable(test_var3)
     assert res is None
 
-    const_var = at.as_tensor([[1, 2], [3, 4]], dtype=config.floatX)
+    const_var = pt.as_tensor([[1, 2], [3, 4]], dtype=config.floatX)
     res = test_type.convert_variable(const_var)
     assert res is const_var
 
diff --git a/tests/tensor/test_utils.py b/tests/tensor/test_utils.py
index b7fdabe3a9..e2fd3d2958 100644
--- a/tests/tensor/test_utils.py
+++ b/tests/tensor/test_utils.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.graph.fg import FunctionGraph
 from pytensor.tensor.type import matrix
 from pytensor.tensor.utils import hash_from_ndarray, shape_of_variables
@@ -58,7 +58,7 @@ def test_simple(self):
         assert shapes == {x: (5, 5), y: (5, 5)}
 
         x = matrix("x")
-        y = at.dot(x, x.T)
+        y = pt.dot(x, x.T)
         fgraph = FunctionGraph([x], [y], clone=False)
         shapes = shape_of_variables(fgraph, {x: (5, 1)})
         assert shapes[x] == (5, 1)
diff --git a/tests/test_gradient.py b/tests/test_gradient.py
index 739289c3c4..7a25c63900 100644
--- a/tests/test_gradient.py
+++ b/tests/test_gradient.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pytensor
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor.configdefaults import config
 from pytensor.gradient import (
     DisconnectedInputError,
@@ -31,7 +31,7 @@
 from pytensor.graph.null_type import NullType
 from pytensor.graph.op import Op
 from pytensor.tensor.math import add, dot, exp, sigmoid, sqr
-from pytensor.tensor.math import sum as at_sum
+from pytensor.tensor.math import sum as pt_sum
 from pytensor.tensor.math import tanh
 from pytensor.tensor.random import RandomStream
 from pytensor.tensor.type import (
@@ -50,7 +50,7 @@
 from tests import unittest_tools as utt
 
 
-one = at.as_tensor_variable(1.0)
+one = ptb.as_tensor_variable(1.0)
 
 
 def grad_sources_inputs(sources, inputs):
@@ -274,7 +274,7 @@ def test_1None_rval(self):
         o = TestGrad.Obj1()
         a1 = o.make_node()
         g = grad(a1.outputs[0], a1.outputs[1], disconnected_inputs="ignore")
-        assert g.owner.op == at.fill
+        assert g.owner.op == ptb.fill
         assert g.owner.inputs[1].data == 0
 
     def test_NNone_rval(self):
@@ -286,7 +286,7 @@ def test_NNone_rval(self):
         )
         assert o.gval0 is g0
         assert o.gval1 is g1
-        assert g2.owner.op == at.fill
+        assert g2.owner.op == ptb.fill
         assert g2.owner.inputs[1].data == 0
 
     def test_zero_gradient_shape(self):
@@ -498,7 +498,7 @@ def test_grad_disconnected(self):
         total.name = "total"
         num_elements = x.shape[0]
         num_elements.name = "num_elements"
-        silly_vector = at.alloc(total / num_elements, num_elements)
+        silly_vector = ptb.alloc(total / num_elements, num_elements)
         silly_vector.name = "silly_vector"
         cost = silly_vector.sum()
         cost.name = "cost"
@@ -609,7 +609,7 @@ def test_known_grads():
     # matches what happens if you put its own known_grads
     # in for each variable
 
-    full_range = at.arange(10)
+    full_range = ptb.arange(10)
     x = scalar("x")
     t = iscalar("t")
     ft = full_range[t]
@@ -786,7 +786,7 @@ def test_grad(self):
         expressions_gradients = [
             (x * zero_grad(x), x),
             (x * zero_grad(exp(x)), exp(x)),
-            (zero_grad(x), at.constant(0.0)),
+            (zero_grad(x), ptb.constant(0.0)),
             (x**2 * zero_grad(x), 2 * x**2),
         ]
 
@@ -921,10 +921,10 @@ def test_undefined_grad_opt():
     pvals = zero_grad(pvals)
 
     samples = random.multinomial(p=pvals, n=1)
-    samples = at.cast(samples, pvals.dtype)
+    samples = ptb.cast(samples, pvals.dtype)
     samples = zero_grad(samples)
 
-    cost = at_sum(samples + pvals)
+    cost = pt_sum(samples + pvals)
     grad_res = grad(cost, samples)
 
     f = pytensor.function([], grad_res)
@@ -1059,7 +1059,7 @@ def test_jacobian_scalar():
 
 def test_hessian():
     x = vector()
-    y = at_sum(x**2)
+    y = pt_sum(x**2)
     Hx = hessian(y, x)
     f = pytensor.function([x], Hx)
     vx = np.arange(10).astype(pytensor.config.floatX)
diff --git a/tests/test_ifelse.py b/tests/test_ifelse.py
index c1201e676c..90769f2c74 100644
--- a/tests/test_ifelse.py
+++ b/tests/test_ifelse.py
@@ -7,7 +7,7 @@
 import pytensor
 import pytensor.ifelse
 import pytensor.sparse
-import pytensor.tensor.basic as at
+import pytensor.tensor.basic as ptb
 from pytensor import function
 from pytensor.compile.mode import Mode, get_mode
 from pytensor.graph.basic import Apply
@@ -31,7 +31,7 @@
 class TestIfelse(utt.OptimizationTestMixin):
     mode = None
     dtype = pytensor.config.floatX
-    cast_output = staticmethod(at.as_tensor_variable)
+    cast_output = staticmethod(ptb.as_tensor_variable)
     shared = staticmethod(pytensor.shared)
 
     def get_ifelse(self, n):
@@ -269,7 +269,7 @@ def test_multiple_out_crash(self):
 
         fsub = [fsub0, fsub1, fsub2, fsub3]
 
-        acc = at.constant(1, "int8") >= 0
+        acc = ptb.constant(1, "int8") >= 0
 
         new_positions = ifelse(acc, fsub, p)
 
@@ -291,7 +291,7 @@ def test_dtype_mismatch(self):
         rng = np.random.default_rng(utt.fetch_seed())
         data = rng.random(5).astype(self.dtype)
         x = self.shared(data)
-        y = at.cast(x * 10, "int8")
+        y = ptb.cast(x * 10, "int8")
         cond = iscalar("cond")
 
         with pytest.raises(TypeError):
@@ -542,8 +542,8 @@ def test_str(self):
         ],
     )
     def test_static_branch_shapes(self, x_shape, y_shape, x_val, y_val, exp_shape):
-        x = at.tensor(dtype=self.dtype, shape=x_shape, name="x")
-        y = at.tensor(dtype=self.dtype, shape=y_shape, name="y")
+        x = ptb.tensor(dtype=self.dtype, shape=x_shape, name="x")
+        y = ptb.tensor(dtype=self.dtype, shape=y_shape, name="y")
         c = iscalar("c")
         z = IfElse(1)(c, x, y)
         assert z.type.shape == exp_shape
diff --git a/tests/test_raise_op.py b/tests/test_raise_op.py
index de6d150333..2cd1cc830f 100644
--- a/tests/test_raise_op.py
+++ b/tests/test_raise_op.py
@@ -3,7 +3,7 @@
 import scipy.sparse
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor.compile.mode import OPT_FAST_RUN, Mode
 from pytensor.graph.basic import Constant, equal_computations
 from pytensor.raise_op import Assert, CheckAndRaise, assert_op
@@ -31,7 +31,7 @@ def test_CheckAndRaise_pickle():
     exc_msg = "this is the exception"
     check_and_raise = CheckAndRaise(CustomException, exc_msg)
 
-    y = check_and_raise(at.as_tensor(1), at.as_tensor(0))
+    y = check_and_raise(pt.as_tensor(1), pt.as_tensor(0))
     y_str = pickle.dumps(y)
     new_y = pickle.loads(y_str)
 
@@ -41,7 +41,7 @@ def test_CheckAndRaise_pickle():
 
 
 def test_CheckAndRaise_equal():
-    x, y = at.vectors("xy")
+    x, y = pt.vectors("xy")
     g1 = assert_op(x, (x > y).all())
     g2 = assert_op(x, (x > y).all())
 
@@ -73,21 +73,21 @@ def test_CheckAndRaise_basic_c(linker):
     exc_msg = "this is the exception"
     check_and_raise = CheckAndRaise(CustomException, exc_msg)
 
-    conds = at.scalar()
-    y = check_and_raise(at.as_tensor(1), conds)
+    conds = pt.scalar()
+    y = check_and_raise(pt.as_tensor(1), conds)
     y_fn = pytensor.function([conds], y, mode=Mode(linker))
 
     with pytest.raises(CustomException, match=exc_msg):
         y_fn(0)
 
-    x = at.vector()
+    x = pt.vector()
     y = check_and_raise(x, conds)
     y_fn = pytensor.function([conds, x], y.shape, mode=Mode(linker, OPT_FAST_RUN))
 
     x_val = np.array([1.0], dtype=pytensor.config.floatX)
     assert np.array_equal(y_fn(0, x_val), x_val)
 
-    y = check_and_raise(x, at.as_tensor(0))
+    y = check_and_raise(x, pt.as_tensor(0))
     y_grad = pytensor.grad(y.sum(), [x])
     y_fn = pytensor.function([x], y_grad, mode=Mode(linker, OPT_FAST_RUN))
 
@@ -143,8 +143,8 @@ def setup_method(self):
         super().setup_method()
 
     def test_infer_shape(self):
-        adscal = at.dscalar()
-        bdscal = at.dscalar()
+        adscal = pt.dscalar()
+        bdscal = pt.dscalar()
         adscal_val = np.random.random()
         bdscal_val = np.random.random() + 1
         out = assert_op(adscal, bdscal)
@@ -152,7 +152,7 @@ def test_infer_shape(self):
             [adscal, bdscal], [out], [adscal_val, bdscal_val], Assert
         )
 
-        admat = at.dmatrix()
+        admat = pt.dmatrix()
         admat_val = np.random.random((3, 4))
         adscal_val += 1
         out = assert_op(admat, adscal, bdscal)
diff --git a/tests/test_rop.py b/tests/test_rop.py
index f05625b416..58463833d5 100644
--- a/tests/test_rop.py
+++ b/tests/test_rop.py
@@ -16,13 +16,13 @@
 import pytest
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 from pytensor import function
 from pytensor.gradient import Lop, Rop, grad, grad_undefined
 from pytensor.graph.basic import Apply
 from pytensor.graph.op import Op
 from pytensor.tensor.math import argmax, dot
-from pytensor.tensor.math import max as at_max
+from pytensor.tensor.math import max as pt_max
 from pytensor.tensor.shape import unbroadcast
 from pytensor.tensor.type import matrix, vector
 from tests import unittest_tools as utt
@@ -111,7 +111,7 @@ def check_mat_rop_lop(self, y, out_shape):
         rop_f = function([self.mx, self.mv], yv, on_unused_input="ignore")
         sy, _ = pytensor.scan(
             lambda i, y, x, v: (grad(y[i], x) * v).sum(),
-            sequences=at.arange(y.shape[0]),
+            sequences=pt.arange(y.shape[0]),
             non_sequences=[y, self.mx, self.mv],
         )
         scan_f = function([self.mx, self.mv], sy, on_unused_input="ignore")
@@ -149,7 +149,7 @@ def check_rop_lop(self, y, out_shape):
         rop_f = function([self.x, self.v], yv, on_unused_input="ignore")
         J, _ = pytensor.scan(
             lambda i, y, x: grad(y[i], x),
-            sequences=at.arange(y.shape[0]),
+            sequences=pt.arange(y.shape[0]),
             non_sequences=[y, self.x],
         )
         sy = dot(J, self.v)
@@ -179,7 +179,7 @@ def check_rop_lop(self, y, out_shape):
         lop_f = function([self.x, self.v], yv, on_unused_input="ignore")
         J, _ = pytensor.scan(
             lambda i, y, x: grad(y[i], x),
-            sequences=at.arange(y.shape[0]),
+            sequences=pt.arange(y.shape[0]),
             non_sequences=[y, self.x],
         )
         sy = dot(self.v, J)
@@ -196,8 +196,8 @@ def test_max(self):
         # If we call max directly, we will return an CAReduce object
         # which doesn't have R_op implemented!
         # self.check_mat_rop_lop(at_max(self.mx, axis=[0,1])[0], ())
-        self.check_mat_rop_lop(at_max(self.mx, axis=0), (self.mat_in_shape[1],))
-        self.check_mat_rop_lop(at_max(self.mx, axis=1), (self.mat_in_shape[0],))
+        self.check_mat_rop_lop(pt_max(self.mx, axis=0), (self.mat_in_shape[1],))
+        self.check_mat_rop_lop(pt_max(self.mx, axis=1), (self.mat_in_shape[0],))
 
     def test_argmax(self):
         self.check_nondiff_rop(argmax(self.mx, axis=1))
@@ -248,7 +248,7 @@ def test_unbroadcast(self):
     def test_join(self):
         tv = np.asarray(self.rng.uniform(size=(10,)), pytensor.config.floatX)
         t = pytensor.shared(tv)
-        out = at.join(0, self.x, t)
+        out = pt.join(0, self.x, t)
         self.check_rop_lop(out, (self.in_shape[0] + 10,))
 
     def test_dot(self):
@@ -261,7 +261,7 @@ def test_elemwise0(self):
         self.check_rop_lop((self.x + 1) ** 2, self.in_shape)
 
     def test_elemwise1(self):
-        self.check_rop_lop(self.x + at.cast(self.x, "int32"), self.in_shape)
+        self.check_rop_lop(self.x + pt.cast(self.x, "int32"), self.in_shape)
 
     def test_flatten(self):
         self.check_mat_rop_lop(
@@ -278,11 +278,11 @@ def test_softmax(self):
 
     def test_alloc(self):
         # Alloc of the sum of x into a vector
-        out1d = at.alloc(self.x.sum(), self.in_shape[0])
+        out1d = pt.alloc(self.x.sum(), self.in_shape[0])
         self.check_rop_lop(out1d, self.in_shape[0])
 
         # Alloc of x into a 3-D tensor, flattened
-        out3d = at.alloc(
+        out3d = pt.alloc(
             self.x, self.mat_in_shape[0], self.mat_in_shape[1], self.in_shape[0]
         )
         self.check_rop_lop(
@@ -330,7 +330,7 @@ def test_Rop_dot_bug_18Oct2013_Jeremiah(self):
         # 2013. The bug consists when through a dot operation there is only
         # one differentiable path (i.e. there is no gradient wrt to one of
         # the inputs).
-        x = at.arange(20.0).reshape([1, 20])
+        x = pt.arange(20.0).reshape([1, 20])
         v = pytensor.shared(np.ones([20]))
         d = dot(x, v).sum()
         Rop(grad(d, v), v, v)
diff --git a/tests/typed_list/test_rewriting.py b/tests/typed_list/test_rewriting.py
index 4948918a9c..dbd16d871a 100644
--- a/tests/typed_list/test_rewriting.py
+++ b/tests/typed_list/test_rewriting.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 import pytensor
-import pytensor.tensor as at
+import pytensor.tensor as pt
 import pytensor.typed_list
 from pytensor.compile.io import In
 from pytensor.tensor.type import TensorType, matrix, scalar
@@ -147,7 +147,7 @@ def test_remove_inplace(self):
 
 
 def test_constant_folding():
-    m = at.ones((1,), dtype="int8")
+    m = pt.ones((1,), dtype="int8")
     l = pytensor.typed_list.make_list([m, m])
     f = pytensor.function([], l)
     topo = f.maker.fgraph.toposort()
diff --git a/tests/unittest_tools.py b/tests/unittest_tools.py
index d8c1bd0876..823f5653dc 100644
--- a/tests/unittest_tools.py
+++ b/tests/unittest_tools.py
@@ -12,7 +12,7 @@
 from pytensor.gradient import verify_grad as orig_verify_grad
 from pytensor.tensor.basic import as_tensor_variable
 from pytensor.tensor.math import _allclose
-from pytensor.tensor.math import add as at_add
+from pytensor.tensor.math import add as pt_add
 
 
 _logger = logging.getLogger("tests.unittest_tools")
@@ -133,7 +133,7 @@ def assertFunctionContainsClassN(self, f, op, N):
 class OpContractTestMixin:
     # self.ops should be a list of instantiations of an Op class to test.
     # self.other_op should be an op which is different from every op
-    other_op = at_add
+    other_op = pt_add
 
     def copy(self, x):
         return copy(x)