xfs
[Top] [All Lists]

[PATCH 2/2] kill xfs_lock_dir_and_entry

To: xfs@xxxxxxxxxxx
Subject: [PATCH 2/2] kill xfs_lock_dir_and_entry
From: Christoph Hellwig <hch@xxxxxx>
Date: Fri, 2 May 2008 12:58:03 +0200
Sender: xfs-bounce@xxxxxxxxxxx
User-agent: Mutt/1.3.28i
When multiple inodes are locked in XFS it happens in order of the inode
number, with the everything but the first inode trylocked if any of
the previous inodes is in the AIL.

Except for the sorting of the inodes this logic is implemented in
xfs_lock_inodes, but also partially duplicated in xfs_lock_dir_and_entry
in a particularly stupid way adds a lock roundtrip if the inode ordering
is not optimal.

This patch adds a new helper xfs_lock_two_inodes that takes two inodes
and locks them in the most optimal way according to the above locking
protocol and uses it for all places that want to lock two inodes.

The only caller of xfs_lock_inodes is xfs_rename which might lock up to
four inodes.


Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Index: linux-2.6-xfs/fs/xfs/xfs_vnodeops.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_vnodeops.c    2008-05-02 08:30:24.000000000 
+0200
+++ linux-2.6-xfs/fs/xfs/xfs_vnodeops.c 2008-05-02 08:30:30.000000000 +0200
@@ -1897,111 +1897,6 @@ std_return:
 }
 
 #ifdef DEBUG
-/*
- * Some counters to see if (and how often) we are hitting some deadlock
- * prevention code paths.
- */
-
-int xfs_rm_locks;
-int xfs_rm_lock_delays;
-int xfs_rm_attempts;
-#endif
-
-/*
- * The following routine will lock the inodes associated with the
- * directory and the named entry in the directory. The locks are
- * acquired in increasing inode number.
- *
- * If the entry is "..", then only the directory is locked. The
- * vnode ref count will still include that from the .. entry in
- * this case.
- *
- * There is a deadlock we need to worry about. If the locked directory is
- * in the AIL, it might be blocking up the log. The next inode we lock
- * could be already locked by another thread waiting for log space (e.g
- * a permanent log reservation with a long running transaction (see
- * xfs_itruncate_finish)). To solve this, we must check if the directory
- * is in the ail and use lock_nowait. If we can't lock, we need to
- * drop the inode lock on the directory and try again. xfs_iunlock will
- * potentially push the tail if we were holding up the log.
- */
-STATIC int
-xfs_lock_dir_and_entry(
-       xfs_inode_t     *dp,
-       xfs_inode_t     *ip)    /* inode of entry 'name' */
-{
-       int             attempts;
-       xfs_ino_t       e_inum;
-       xfs_inode_t     *ips[2];
-       xfs_log_item_t  *lp;
-
-#ifdef DEBUG
-       xfs_rm_locks++;
-#endif
-       attempts = 0;
-
-again:
-       xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
-
-       e_inum = ip->i_ino;
-
-       xfs_itrace_ref(ip);
-
-       /*
-        * We want to lock in increasing inum. Since we've already
-        * acquired the lock on the directory, we may need to release
-        * if if the inum of the entry turns out to be less.
-        */
-       if (e_inum > dp->i_ino) {
-               /*
-                * We are already in the right order, so just
-                * lock on the inode of the entry.
-                * We need to use nowait if dp is in the AIL.
-                */
-
-               lp = (xfs_log_item_t *)dp->i_itemp;
-               if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
-                       if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
-                               attempts++;
-#ifdef DEBUG
-                               xfs_rm_attempts++;
-#endif
-
-                               /*
-                                * Unlock dp and try again.
-                                * xfs_iunlock will try to push the tail
-                                * if the inode is in the AIL.
-                                */
-
-                               xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-                               if ((attempts % 5) == 0) {
-                                       delay(1); /* Don't just spin the CPU */
-#ifdef DEBUG
-                                       xfs_rm_lock_delays++;
-#endif
-                               }
-                               goto again;
-                       }
-               } else {
-                       xfs_ilock(ip, XFS_ILOCK_EXCL);
-               }
-       } else if (e_inum < dp->i_ino) {
-               xfs_iunlock(dp, XFS_ILOCK_EXCL);
-
-               ips[0] = ip;
-               ips[1] = dp;
-               xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
-       }
-       /* else  e_inum == dp->i_ino */
-       /*     This can happen if we're asked to lock /x/..
-        *     the entry is "..", which is also the parent directory.
-        */
-
-       return 0;
-}
-
-#ifdef DEBUG
 int xfs_locked_n;
 int xfs_small_retries;
 int xfs_middle_retries;
@@ -2135,6 +2030,45 @@ again:
 #endif
 }
 
+void
+xfs_lock_two_inodes(
+       xfs_inode_t             *ip0,
+       xfs_inode_t             *ip1,
+       uint                    lock_mode)
+{
+       xfs_inode_t             *temp;
+       int                     attempts = 0;
+       xfs_log_item_t          *lp;
+
+       ASSERT(ip0->i_ino != ip1->i_ino);
+
+       if (ip0->i_ino > ip1->i_ino) {
+               temp = ip0;
+               ip0 = ip1;
+               ip1 = temp;
+       }
+
+ again:
+       xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0));
+
+       /*
+        * If the first lock we have locked is in the AIL, we must TRY to get
+        * the second lock. If we can't get it, we must release the first one
+        * and try again.
+        */
+       lp = (xfs_log_item_t *)ip0->i_itemp;
+       if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+               if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) {
+                       xfs_iunlock(ip0, lock_mode);
+                       if ((++attempts % 5) == 0)
+                               delay(1); /* Don't just spin the CPU */
+                       goto again;
+               }
+       } else {
+               xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1));
+       }
+}
+
 int
 xfs_remove(
        xfs_inode_t             *dp,
@@ -2210,9 +2144,7 @@ xfs_remove(
                goto out_trans_cancel;
        }
 
-       error = xfs_lock_dir_and_entry(dp, ip);
-       if (error)
-               goto out_trans_cancel;
+       xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
 
        /*
         * At this point, we've gotten both the directory and the entry
@@ -2239,9 +2171,6 @@ xfs_remove(
                }
        }
 
-       /*
-        * Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
-        */
        XFS_BMAP_INIT(&free_list, &first_block);
        error = xfs_dir_removename(tp, dp, name, ip->i_ino,
                                        &first_block, &free_list, resblks);
@@ -2347,7 +2276,6 @@ xfs_link(
 {
        xfs_mount_t             *mp = tdp->i_mount;
        xfs_trans_t             *tp;
-       xfs_inode_t             *ips[2];
        int                     error;
        xfs_bmap_free_t         free_list;
        xfs_fsblock_t           first_block;
@@ -2395,15 +2323,7 @@ xfs_link(
                goto error_return;
        }
 
-       if (sip->i_ino < tdp->i_ino) {
-               ips[0] = sip;
-               ips[1] = tdp;
-       } else {
-               ips[0] = tdp;
-               ips[1] = sip;
-       }
-
-       xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+       xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
 
        /*
         * Increment vnode ref counts since xfs_trans_commit &
Index: linux-2.6-xfs/fs/xfs/xfs_dfrag.c
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_dfrag.c       2008-04-26 17:43:14.000000000 
+0200
+++ linux-2.6-xfs/fs/xfs/xfs_dfrag.c    2008-05-02 08:30:30.000000000 +0200
@@ -128,7 +128,6 @@ xfs_swap_extents(
        xfs_swapext_t   *sxp)
 {
        xfs_mount_t     *mp;
-       xfs_inode_t     *ips[2];
        xfs_trans_t     *tp;
        xfs_bstat_t     *sbp = &sxp->sx_stat;
        bhv_vnode_t     *vp, *tvp;
@@ -153,16 +152,7 @@ xfs_swap_extents(
        vp = XFS_ITOV(ip);
        tvp = XFS_ITOV(tip);
 
-       /* Lock in i_ino order */
-       if (ip->i_ino < tip->i_ino) {
-               ips[0] = ip;
-               ips[1] = tip;
-       } else {
-               ips[0] = tip;
-               ips[1] = ip;
-       }
-
-       xfs_lock_inodes(ips, 2, lock_flags);
+       xfs_lock_two_inodes(ip, tip, lock_flags);
        locked = 1;
 
        /* Verify that both files have the same format */
@@ -265,7 +255,7 @@ xfs_swap_extents(
                locked = 0;
                goto error0;
        }
-       xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
+       xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
 
        /*
         * Count the number of extended attribute blocks
Index: linux-2.6-xfs/fs/xfs/xfs_inode.h
===================================================================
--- linux-2.6-xfs.orig/fs/xfs/xfs_inode.h       2008-05-01 22:56:57.000000000 
+0200
+++ linux-2.6-xfs/fs/xfs/xfs_inode.h    2008-05-02 08:30:30.000000000 +0200
@@ -522,6 +522,7 @@ void                xfs_iflush_all(struct xfs_mount *)
 void           xfs_ichgtime(xfs_inode_t *, int);
 xfs_fsize_t    xfs_file_last_byte(xfs_inode_t *);
 void           xfs_lock_inodes(xfs_inode_t **, int, uint);
+void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
 void           xfs_synchronize_atime(xfs_inode_t *);
 void           xfs_mark_inode_dirty_sync(xfs_inode_t *);


<Prev in Thread] Current Thread [Next in Thread>