diff --git a/beginner_source/introyt/introyt1_tutorial.py b/beginner_source/introyt/introyt1_tutorial.py
index c01befb40c..0962334307 100644
--- a/beginner_source/introyt/introyt1_tutorial.py
+++ b/beginner_source/introyt/introyt1_tutorial.py
@@ -18,21 +18,27 @@
      <iframe width="560" height="315" src="https://www.youtube.com/embed/IC0_FRiX-sw" frameborder="0" allow="accelerometer; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
    </div>
 
+.. note::
+   The video above was recorded with an earlier version of PyTorch.
+   The code samples in this tutorial have been updated to use modern
+   PyTorch APIs. Where the video differs from the code
+   below, follow the written tutorial.
+
 PyTorch Tensors
 ---------------
 
 Follow along with the video beginning at `03:50 <https://www.youtube.com/watch?v=IC0_FRiX-sw&t=230s>`__.
 
-First, we’ll import pytorch.
+First, we'll import pytorch.
 
 """
 
 import torch
 
 ######################################################################
-# Let’s see a few basic tensor manipulations. First, just a few of the
+# Let's see a few basic tensor manipulations. First, just a few of the
 # ways to create tensors:
-# 
+#
 
 z = torch.zeros(5, 3)
 print(z)
@@ -43,10 +49,10 @@
 # Above, we create a 5x3 matrix filled with zeros, and query its datatype
 # to find out that the zeros are 32-bit floating point numbers, which is
 # the default PyTorch.
-# 
+#
 # What if you wanted integers instead? You can always override the
 # default:
-# 
+#
 
 i = torch.ones((5, 3), dtype=torch.int16)
 print(i)
@@ -55,10 +61,10 @@
 ######################################################################
 # You can see that when we do change the default, the tensor helpfully
 # reports this when printed.
-# 
-# It’s common to initialize learning weights randomly, often with a
+#
+# It's common to initialize learning weights randomly, often with a
 # specific seed for the PRNG for reproducibility of results:
-# 
+#
 
 torch.manual_seed(1729)
 r1 = torch.rand(2, 2)
@@ -79,7 +85,7 @@
 # PyTorch tensors perform arithmetic operations intuitively. Tensors of
 # similar shapes may be added, multiplied, etc. Operations with scalars
 # are distributed over the tensor:
-# 
+#
 
 ones = torch.ones(2, 3)
 print(ones)
@@ -98,8 +104,8 @@
 
 
 ######################################################################
-# Here’s a small sample of the mathematical operations available:
-# 
+# Here's a small sample of the mathematical operations available:
+#
 
 r = (torch.rand(2, 2) - 0.5) * 2 # values between -1 and 1
 print('A random matrix, r:')
@@ -115,9 +121,9 @@
 
 # ...and linear algebra operations like determinant and singular value decomposition
 print('\nDeterminant of r:')
-print(torch.det(r))
+print(torch.linalg.det(r))
 print('\nSingular value decomposition of r:')
-print(torch.svd(r))
+print(torch.linalg.svd(r))
 
 # ...and statistical and aggregate operations:
 print('\nAverage and standard deviation of r:')
@@ -127,16 +133,22 @@
 
 
 ##########################################################################
-# There’s a good deal more to know about the power of PyTorch tensors,
-# including how to set them up for parallel computations on GPU - we’ll be
+# There's a good deal more to know about the power of PyTorch tensors,
+# including how to set them up for parallel computations on GPU - we'll be
 # going into more depth in another video.
-# 
+#
+# .. note::
+#    Linear algebra operations in PyTorch live in the ``torch.linalg``
+#    module. Functions like ``torch.linalg.det()``, ``torch.linalg.svd()``,
+#    and ``torch.linalg.eigh()`` follow NumPy conventions and are the
+#    recommended API for new code.
+#
 # PyTorch Models
 # --------------
 #
 # Follow along with the video beginning at `10:00 <https://www.youtube.com/watch?v=IC0_FRiX-sw&t=600s>`__.
 #
-# Let’s talk about how we can express models in PyTorch
+# Let's talk about how we can express models in PyTorch
 #
 
 import torch                     # for all things PyTorch
@@ -149,33 +161,33 @@
 #    :alt: le-net-5 diagram
 #
 # *Figure: LeNet-5*
-# 
+#
 # Above is a diagram of LeNet-5, one of the earliest convolutional neural
 # nets, and one of the drivers of the explosion in Deep Learning. It was
 # built to read small images of handwritten numbers (the MNIST dataset),
 # and correctly classify which digit was represented in the image.
-# 
-# Here’s the abridged version of how it works:
-# 
+#
+# Here's the abridged version of how it works:
+#
 # -  Layer C1 is a convolutional layer, meaning that it scans the input
 #    image for features it learned during training. It outputs a map of
 #    where it saw each of its learned features in the image. This
-#    “activation map” is downsampled in layer S2.
-# -  Layer C3 is another convolutional layer, this time scanning C1’s
+#    "activation map" is downsampled in layer S2.
+# -  Layer C3 is another convolutional layer, this time scanning C1's
 #    activation map for *combinations* of features. It also puts out an
 #    activation map describing the spatial locations of these feature
 #    combinations, which is downsampled in layer S4.
 # -  Finally, the fully-connected layers at the end, F5, F6, and OUTPUT,
 #    are a *classifier* that takes the final activation map, and
 #    classifies it into one of ten bins representing the 10 digits.
-# 
+#
 # How do we express this simple neural network in code?
-# 
+#
 
 class LeNet(nn.Module):
 
     def __init__(self):
-        super(LeNet, self).__init__()
+        super().__init__()
         # 1 input image channel (black & white), 6 output channels, 5x5 square convolution
         # kernel
         self.conv1 = nn.Conv2d(1, 6, 5)
@@ -207,8 +219,8 @@ def num_flat_features(self, x):
 ############################################################################
 # Looking over this code, you should be able to spot some structural
 # similarities with the diagram above.
-# 
-# This demonstrates the structure of a typical PyTorch model: 
+#
+# This demonstrates the structure of a typical PyTorch model:
 #
 # -  It inherits from ``torch.nn.Module`` - modules may be nested - in fact,
 #    even the ``Conv2d`` and ``Linear`` layer classes inherit from
@@ -221,10 +233,10 @@ def num_flat_features(self, x):
 #    and various functions to generate an output.
 # -  Other than that, you can build out your model class like any other
 #    Python class, adding whatever properties and methods you need to
-#    support your model’s computation.
-# 
-# Let’s instantiate this object and run a sample input through it.
-# 
+#    support your model's computation.
+#
+# Let's instantiate this object and run a sample input through it.
+#
 
 net = LeNet()
 print(net)                         # what does the object tell us about itself?
@@ -241,37 +253,37 @@ def num_flat_features(self, x):
 
 ##########################################################################
 # There are a few important things happening above:
-# 
+#
 # First, we instantiate the ``LeNet`` class, and we print the ``net``
 # object. A subclass of ``torch.nn.Module`` will report the layers it has
 # created and their shapes and parameters. This can provide a handy
 # overview of a model if you want to get the gist of its processing.
-# 
+#
 # Below that, we create a dummy input representing a 32x32 image with 1
 # color channel. Normally, you would load an image tile and convert it to
 # a tensor of this shape.
-# 
+#
 # You may have noticed an extra dimension to our tensor - the *batch
 # dimension.* PyTorch models assume they are working on *batches* of data
 # - for example, a batch of 16 of our image tiles would have the shape
-# ``(16, 1, 32, 32)``. Since we’re only using one image, we create a batch
+# ``(16, 1, 32, 32)``. Since we're only using one image, we create a batch
 # of 1 with shape ``(1, 1, 32, 32)``.
-# 
+#
 # We ask the model for an inference by calling it like a function:
-# ``net(input)``. The output of this call represents the model’s
+# ``net(input)``. The output of this call represents the model's
 # confidence that the input represents a particular digit. (Since this
-# instance of the model hasn’t learned anything yet, we shouldn’t expect
+# instance of the model hasn't learned anything yet, we shouldn't expect
 # to see any signal in the output.) Looking at the shape of ``output``, we
 # can see that it also has a batch dimension, the size of which should
 # always match the input batch dimension. If we had passed in an input
 # batch of 16 instances, ``output`` would have a shape of ``(16, 10)``.
-# 
+#
 # Datasets and Dataloaders
 # ------------------------
 #
 # Follow along with the video beginning at `14:00 <https://www.youtube.com/watch?v=IC0_FRiX-sw&t=840s>`__.
 #
-# Below, we’re going to demonstrate using one of the ready-to-download,
+# Below, we're going to demonstrate using one of the ready-to-download,
 # open-access datasets from TorchVision, how to transform the images for
 # consumption by your model, and how to use the DataLoader to feed batches
 # of data to your model.
@@ -284,19 +296,23 @@ def num_flat_features(self, x):
 
 import torch
 import torchvision
-import torchvision.transforms as transforms
+from torchvision.transforms import v2
 
-transform = transforms.Compose(
-    [transforms.ToTensor(),
-     transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
+transform = v2.Compose(
+    [v2.ToImage(),
+     v2.ToDtype(torch.float32, scale=True),
+     v2.Normalize((0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616))])
 
 
 ##########################################################################
-# Here, we specify two transformations for our input:
+# Here, we specify three transformations for our input:
 #
-# -  ``transforms.ToTensor()`` converts images loaded by Pillow into 
-#    PyTorch tensors.
-# -  ``transforms.Normalize()`` adjusts the values of the tensor so
+# -  ``v2.ToImage()`` converts images loaded by Pillow into the
+#    ``TVTensor`` image type used by torchvision v2.
+# -  ``v2.ToDtype(torch.float32, scale=True)`` converts pixel values to
+#    float32 and scales them from [0, 255] to [0.0, 1.0]. (This replaces
+#    the older ``transforms.ToTensor()``.)
+# -  ``v2.Normalize()`` adjusts the values of the tensor so
 #    that their average is zero and their standard deviation is 1.0. Most
 #    activation functions have their strongest gradients around x = 0, so
 #    centering our data there can speed learning.
@@ -306,27 +322,29 @@ def num_flat_features(self, x):
 #    few lines of code::
 #
 #        from torch.utils.data import ConcatDataset
-#        transform = transforms.Compose([transforms.ToTensor()])
+#        transform = v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)])
 #        trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
 #                                        download=True, transform=transform)
 #
-#        # stack all train images together into a tensor of shape 
+#        # stack all train images together into a tensor of shape
 #        # (50000, 3, 32, 32)
 #        x = torch.stack([sample[0] for sample in ConcatDataset([trainset])])
-#           
-#        # get the mean of each channel            
+#
+#        # get the mean of each channel
 #        mean = torch.mean(x, dim=(0,2,3)) # tensor([0.4914, 0.4822, 0.4465])
-#        std = torch.std(x, dim=(0,2,3)) # tensor([0.2470, 0.2435, 0.2616])  
-#    
-# 
+#        std = torch.std(x, dim=(0,2,3)) # tensor([0.2470, 0.2435, 0.2616])
+#
+#
 # There are many more transforms available, including cropping, centering,
-# rotation, and reflection.
-# 
-# Next, we’ll create an instance of the CIFAR10 dataset. This is a set of
+# rotation, and reflection. See
+# `torchvision.transforms.v2 <https://docs.pytorch.org/vision/stable/transforms.html>`_
+# for the full list.
+#
+# Next, we'll create an instance of the CIFAR10 dataset. This is a set of
 # 32x32 color image tiles representing 10 classes of objects: 6 of animals
 # (bird, cat, deer, dog, frog, horse) and 4 of vehicles (airplane,
 # automobile, ship, truck):
-# 
+#
 
 trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                         download=True, transform=transform)
@@ -334,9 +352,9 @@ def num_flat_features(self, x):
 
 ##########################################################################
 # .. note::
-#      When you run the cell above, it may take a little time for the 
+#      When you run the cell above, it may take a little time for the
 #      dataset to download.
-# 
+#
 # This is an example of creating a dataset object in PyTorch. Downloadable
 # datasets (like CIFAR-10 above) are subclasses of
 # ``torch.utils.data.Dataset``. ``Dataset`` classes in PyTorch include the
@@ -344,17 +362,17 @@ def num_flat_features(self, x):
 # as utility dataset classes such as ``torchvision.datasets.ImageFolder``,
 # which will read a folder of labeled images. You can also create your own
 # subclasses of ``Dataset``.
-# 
+#
 # When we instantiate our dataset, we need to tell it a few things:
 #
-# -  The filesystem path to where we want the data to go. 
+# -  The filesystem path to where we want the data to go.
 # -  Whether or not we are using this set for training; most datasets
 #    will be split into training and test subsets.
-# -  Whether we would like to download the dataset if we haven’t already.
+# -  Whether we would like to download the dataset if we haven't already.
 # -  The transformations we want to apply to the data.
-# 
+#
 # Once your dataset is ready, you can give it to the ``DataLoader``:
-# 
+#
 
 trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                           shuffle=True, num_workers=2)
@@ -362,16 +380,16 @@ def num_flat_features(self, x):
 
 ##########################################################################
 # A ``Dataset`` subclass wraps access to the data, and is specialized to
-# the type of data it’s serving. The ``DataLoader`` knows *nothing* about
+# the type of data it's serving. The ``DataLoader`` knows *nothing* about
 # the data, but organizes the input tensors served by the ``Dataset`` into
 # batches with the parameters you specify.
-# 
-# In the example above, we’ve asked a ``DataLoader`` to give us batches of
+#
+# In the example above, we've asked a ``DataLoader`` to give us batches of
 # 4 images from ``trainset``, randomizing their order (``shuffle=True``),
 # and we told it to spin up two workers to load data from disk.
-# 
-# It’s good practice to visualize the batches your ``DataLoader`` serves:
-# 
+#
+# It's good practice to visualize the batches your ``DataLoader`` serves:
+#
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -392,19 +410,19 @@ def imshow(img):
 # show images
 imshow(torchvision.utils.make_grid(images))
 # print labels
-print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
+print(' '.join(f'{classes[labels[j]]:>5s}' for j in range(4)))
 
 
 ########################################################################
 # Running the above cell should show you a strip of four images, and the
 # correct label for each.
-# 
+#
 # Training Your PyTorch Model
 # ---------------------------
 #
 # Follow along with the video beginning at `17:10 <https://www.youtube.com/watch?v=IC0_FRiX-sw&t=1030s>`__.
 #
-# Let’s put all the pieces together, and train a model:
+# Let's put all the pieces together, and train a model:
 #
 
 #%matplotlib inline
@@ -415,7 +433,7 @@ def imshow(img):
 import torch.optim as optim
 
 import torchvision
-import torchvision.transforms as transforms
+from torchvision.transforms import v2
 
 import matplotlib
 import matplotlib.pyplot as plt
@@ -423,14 +441,15 @@ def imshow(img):
 
 
 #########################################################################
-# First, we’ll need training and test datasets. If you haven’t already,
+# First, we'll need training and test datasets. If you haven't already,
 # run the cell below to make sure the dataset is downloaded. (It may take
 # a minute.)
-# 
+#
 
-transform = transforms.Compose(
-    [transforms.ToTensor(),
-     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+transform = v2.Compose(
+    [v2.ToImage(),
+     v2.ToDtype(torch.float32, scale=True),
+     v2.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
 
 trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                         download=True, transform=transform)
@@ -447,8 +466,8 @@ def imshow(img):
 
 
 ######################################################################
-# We’ll run our check on the output from ``DataLoader``:
-# 
+# We'll run our check on the output from ``DataLoader``:
+#
 
 import matplotlib.pyplot as plt
 import numpy as np
@@ -469,18 +488,18 @@ def imshow(img):
 # show images
 imshow(torchvision.utils.make_grid(images))
 # print labels
-print(' '.join('%5s' % classes[labels[j]] for j in range(4)))
+print(' '.join(f'{classes[labels[j]]:>5s}' for j in range(4)))
 
 
 ##########################################################################
-# This is the model we’ll train. If it looks familiar, that’s because it’s
+# This is the model we'll train. If it looks familiar, that's because it's
 # a variant of LeNet - discussed earlier in this video - adapted for
 # 3-color images.
-# 
+#
 
 class Net(nn.Module):
     def __init__(self):
-        super(Net, self).__init__()
+        super().__init__()
         self.conv1 = nn.Conv2d(3, 6, 5)
         self.pool = nn.MaxPool2d(2, 2)
         self.conv2 = nn.Conv2d(6, 16, 5)
@@ -503,7 +522,7 @@ def forward(self, x):
 
 ######################################################################
 # The last ingredients we need are a loss function and an optimizer:
-# 
+#
 
 criterion = nn.CrossEntropyLoss()
 optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
@@ -511,19 +530,19 @@ def forward(self, x):
 
 ##########################################################################
 # The loss function, as discussed earlier in this video, is a measure of
-# how far from our ideal output the model’s prediction was. Cross-entropy
+# how far from our ideal output the model's prediction was. Cross-entropy
 # loss is a typical loss function for classification models like ours.
-# 
+#
 # The **optimizer** is what drives the learning. Here we have created an
 # optimizer that implements *stochastic gradient descent,* one of the more
 # straightforward optimization algorithms. Besides parameters of the
 # algorithm, like the learning rate (``lr``) and momentum, we also pass in
 # ``net.parameters()``, which is a collection of all the learning weights
 # in the model - which is what the optimizer adjusts.
-# 
+#
 # Finally, all of this is assembled into the training loop. Go ahead and
 # run this cell, as it will likely take a few minutes to execute:
-# 
+#
 
 for epoch in range(2):  # loop over the dataset multiple times
 
@@ -544,8 +563,7 @@ def forward(self, x):
         # print statistics
         running_loss += loss.item()
         if i % 2000 == 1999:    # print every 2000 mini-batches
-            print('[%d, %5d] loss: %.3f' %
-                  (epoch + 1, i + 1, running_loss / 2000))
+            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
             running_loss = 0.0
 
 print('Finished Training')
@@ -556,31 +574,31 @@ def forward(self, x):
 # passes over the training dataset. Each pass has an inner loop that
 # **iterates over the training data** (line 4), serving batches of
 # transformed input images and their correct labels.
-# 
+#
 # **Zeroing the gradients** (line 9) is an important step. Gradients are
 # accumulated over a batch; if we do not reset them for every batch, they
 # will keep accumulating, which will provide incorrect gradient values,
 # making learning impossible.
-# 
+#
 # In line 12, we **ask the model for its predictions** on this batch. In
 # the following line (13), we compute the loss - the difference between
 # ``outputs`` (the model prediction) and ``labels`` (the correct output).
-# 
+#
 # In line 14, we do the ``backward()`` pass, and calculate the gradients
 # that will direct the learning.
-# 
+#
 # In line 15, the optimizer performs one learning step - it uses the
 # gradients from the ``backward()`` call to nudge the learning weights in
 # the direction it thinks will reduce the loss.
-# 
+#
 # The remainder of the loop does some light reporting on the epoch number,
 # how many training instances have been completed, and what the collected
 # loss is over the training loop.
-# 
+#
 # **When you run the cell above,** you should see something like this:
-# 
+#
 # .. code-block:: sh
-# 
+#
 #    [1,  2000] loss: 2.235
 #    [1,  4000] loss: 1.940
 #    [1,  6000] loss: 1.713
@@ -594,20 +612,20 @@ def forward(self, x):
 #    [2, 10000] loss: 1.284
 #    [2, 12000] loss: 1.267
 #    Finished Training
-# 
+#
 # Note that the loss is monotonically descending, indicating that our
 # model is continuing to improve its performance on the training dataset.
-# 
+#
 # As a final step, we should check that the model is actually doing
-# *general* learning, and not simply “memorizing” the dataset. This is
+# *general* learning, and not simply "memorizing" the dataset. This is
 # called **overfitting,** and usually indicates that the dataset is too
 # small (not enough examples for general learning), or that the model has
 # more learning parameters than it needs to correctly model the dataset.
-# 
+#
 # This is the reason datasets are split into training and test subsets -
 # to test the generality of the model, we ask it to make predictions on
-# data it hasn’t trained on:
-# 
+# data it hasn't trained on:
+#
 
 correct = 0
 total = 0
@@ -619,13 +637,11 @@ def forward(self, x):
         total += labels.size(0)
         correct += (predicted == labels).sum().item()
 
-print('Accuracy of the network on the 10000 test images: %d %%' % (
-    100 * correct / total))
+print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.0f} %')
 
 
 #########################################################################
 # If you followed along, you should see that the model is roughly 50%
-# accurate at this point. That’s not exactly state-of-the-art, but it’s
-# far better than the 10% accuracy we’d expect from a random output. This
+# accurate at this point. That's not exactly state-of-the-art, but it's
+# far better than the 10% accuracy we'd expect from a random output. This
 # demonstrates that some general learning did happen in the model.
-#