Skip to content

Commit

Permalink
Cleanup DMA resources on device removal
Browse files Browse the repository at this point in the history
When a PCIe device is removed while file descriptors are still open,
ensure all DMA resources are cleaned up before the IOMMU mappings become
imvalid.  Part of the intent is to avoid a NULL pointer dereference at
fd close when the IOMMU is enabled.

See #34 for details.
  • Loading branch information
joelsmithTT committed Jan 24, 2025
1 parent a4373b1 commit c5469f5
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 5 deletions.
6 changes: 3 additions & 3 deletions chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ int tenstorrent_register_device(struct tenstorrent_device *tt_dev)
tt_dev->dev.id = tt_dev->ordinal;
dev_set_name(&tt_dev->dev, TENSTORRENT "/%d", tt_dev->ordinal);

INIT_LIST_HEAD(&tt_dev->open_fds);
INIT_LIST_HEAD(&tt_dev->open_fds_list);

cdev_init(&tt_dev->chardev, &chardev_fops);
return cdev_device_add(&tt_dev->chardev, &tt_dev->dev);
Expand Down Expand Up @@ -379,7 +379,7 @@ static int tt_cdev_open(struct inode *inode, struct file *file)
file->private_data = private_data;

mutex_lock(&tt_dev->chardev_mutex);
list_add(&private_data->node, &tt_dev->open_fds);
list_add(&private_data->open_fd, &tt_dev->open_fds_list);
mutex_unlock(&tt_dev->chardev_mutex);

increment_cdev_open_count(tt_dev);
Expand Down Expand Up @@ -407,7 +407,7 @@ static int tt_cdev_release(struct inode *inode, struct file *file)
tenstorrent_device_put(tt_dev);

mutex_lock(&tt_dev->chardev_mutex);
list_del(&priv->node);
list_del(&priv->open_fd);
mutex_unlock(&tt_dev->chardev_mutex);

kfree(file->private_data);
Expand Down
2 changes: 1 addition & 1 deletion chardev_private.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ struct chardev_private {

DECLARE_BITMAP(resource_lock, TENSTORRENT_RESOURCE_LOCK_COUNT);

struct list_head node; // for struct tenstorrent_device::open_fds
struct list_head open_fd; // node in struct tenstorrent_device.open_fds_list
};

struct chardev_private *get_tenstorrent_priv(struct file *f);
Expand Down
2 changes: 1 addition & 1 deletion device.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct tenstorrent_device {
struct tt_hwmon_context hwmon_context;
struct tt_attribute_data *attributes;

struct list_head open_fds;
struct list_head open_fds_list; // List of struct chardev_private, linked through open_fds field
};

struct tenstorrent_device_class {
Expand Down
8 changes: 8 additions & 0 deletions enumerate.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,15 @@
#include <linux/mutex.h>
#include <linux/version.h>
#include <linux/pm.h>
#include <linux/list.h>

#include "enumerate.h"
#include "interrupt.h"
#include "chardev.h"
#include "grayskull.h"
#include "module.h"
#include "memory.h"
#include "chardev_private.h"

#if LINUX_VERSION_CODE >= KERNEL_VERSION(6, 0, 0)
#define pci_enable_pcie_error_reporting(dev) do { } while (0)
Expand Down Expand Up @@ -116,6 +119,11 @@ static int tenstorrent_pci_probe(struct pci_dev *dev, const struct pci_device_id
static void tenstorrent_pci_remove(struct pci_dev *dev)
{
struct tenstorrent_device *tt_dev = pci_get_drvdata(dev);
struct chardev_private *priv, *tmp;

list_for_each_entry_safe(priv, tmp, &tt_dev->open_fds_list, open_fd) {
tenstorrent_memory_cleanup(priv);
}

if (tt_dev->attributes) {
struct tt_attribute_data *data = tt_dev->attributes;
Expand Down
4 changes: 4 additions & 0 deletions memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,8 @@ void tenstorrent_memory_cleanup(struct chardev_private *priv)
unsigned int i;
struct peer_resource_mapping *peer_mapping, *tmp_peer_mapping;

mutex_lock(&priv->mutex);

hash_for_each_safe(priv->dmabufs, i, tmp_dmabuf, dmabuf, hash_chain) {
dma_free_coherent(&tt_dev->pdev->dev, dmabuf->size, dmabuf->ptr, dmabuf->phys);

Expand All @@ -780,4 +782,6 @@ void tenstorrent_memory_cleanup(struct chardev_private *priv)
list_del(&peer_mapping->list);
kfree(peer_mapping);
}

mutex_unlock(&priv->mutex);
}

0 comments on commit c5469f5

Please sign in to comment.