From dc8fdc57cd3ba0658cf4ab05031695c2d2965f93 Mon Sep 17 00:00:00 2001 From: Karel Zak Date: Fri, 30 Mar 2007 13:10:59 +0200 Subject: [PATCH] mount: fix mtab_lock * the lock function uses F_SETLK / F_SETLKW as a conditional wait. It's more reliable and better for performance to close the MOUNTED_LOCK file in unlock_mtab(), otherwise concurrent process will be wait by while () { link() } loop instead on fcntl(F_SETLKW). Thanks to Jeff Moyer who found the problem two year ago. * when open(MOUNTED_LOCK) failed, we need to try everything again, but the original code didn't zeroize "we_created_lockfile" and the old version in particular case left lock_mtab() without locked /etc/mtab. This is nasty bug. * the original locking code had bad performance due too long sleep (1s), between attempts. Now we're more aggressive and we use 5000ms. The result is that more processes is able to lock mtab in short time slice. Thanks to Peter Rockai who found the problem and suggest a first version of the code with usleep. * now we don't count number of attempts anymore, but we count sum of time which we spend in the mtab_lock(). The number of attempts is not important (and it also depends on CPU performance, load, scheduler, ...), the important thing is how long we spend with locking. Now time limit is 30s. Signed-off-by: Karel Zak --- mount/fstab.c | 128 ++++++++++++++++++++----------- tests/expected/ts-mount-mtablock | 2 +- tests/ts-mount-mtablock | 4 +- 3 files changed, 86 insertions(+), 48 deletions(-) diff --git a/mount/fstab.c b/mount/fstab.c index 2c00dea5f..bdcc7a674 100644 --- a/mount/fstab.c +++ b/mount/fstab.c @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "mount_mntent.h" #include "fstab.h" #include "sundries.h" @@ -392,6 +394,7 @@ getfsvolspec (const char *label) { /* Flag for already existing lock file. */ static int we_created_lockfile = 0; +static int lockfile_fd = -1; /* Flag to indicate that signals have been set up. */ static int signals_have_been_setup = 0; @@ -413,6 +416,8 @@ setlkw_timeout (int sig) { void unlock_mtab (void) { if (we_created_lockfile) { + close(lockfile_fd); + lockfile_fd = -1; unlink (MOUNTED_LOCK); we_created_lockfile = 0; } @@ -438,9 +443,32 @@ unlock_mtab (void) { #define MOUNTLOCK_LINKTARGET MOUNTED_LOCK "%d" #define MOUNTLOCK_LINKTARGET_LTH (sizeof(MOUNTED_LOCK)+20) +/* + * The original mount locking code has used sleep(1) between attempts and + * maximal number of attemps has been 5. + * + * There was very small number of attempts and extremely long waiting (1s) + * that is useless on machines with large number of concurret mount processes. + * + * Now we wait few thousand microseconds between attempts and we have global + * time limit (30s) rather than limit for number of attempts. The advantage + * is that this method also counts time which we spend in fcntl(F_SETLKW) and + * number of attempts is not so much restricted. + * + * -- kzak@redhat.com [2007-Mar-2007] + */ + +/* maximum seconds between first and last attempt */ +#define MOUNTLOCK_MAXTIME 30 + +/* sleep time (in microseconds, max=999999) between attempts */ +#define MOUNTLOCK_WAITTIME 5000 + void lock_mtab (void) { - int tries = 3; + int i; + struct timespec waittime; + struct timeval maxtime; char linktargetfile[MOUNTLOCK_LINKTARGET_LTH]; at_die = unlock_mtab; @@ -452,7 +480,7 @@ lock_mtab (void) { sa.sa_handler = handler; sa.sa_flags = 0; sigfillset (&sa.sa_mask); - + while (sigismember (&sa.sa_mask, ++sig) != -1 && sig != SIGCHLD) { if (sig == SIGALRM) @@ -466,45 +494,55 @@ lock_mtab (void) { sprintf(linktargetfile, MOUNTLOCK_LINKTARGET, getpid ()); + i = open (linktargetfile, O_WRONLY|O_CREAT, 0); + if (i < 0) { + int errsv = errno; + /* linktargetfile does not exist (as a file) + and we cannot create it. Read-only filesystem? + Too many files open in the system? + Filesystem full? */ + die (EX_FILEIO, _("can't create lock file %s: %s " + "(use -n flag to override)"), + linktargetfile, strerror (errsv)); + } + close(i); + + gettimeofday(&maxtime, NULL); + maxtime.tv_sec += MOUNTLOCK_MAXTIME; + + waittime.tv_sec = 0; + waittime.tv_nsec = (1000 * MOUNTLOCK_WAITTIME); + /* Repeat until it was us who made the link */ while (!we_created_lockfile) { + struct timeval now; struct flock flock; - int errsv, fd, i, j; - - i = open (linktargetfile, O_WRONLY|O_CREAT, 0); - if (i < 0) { - int errsv = errno; - /* linktargetfile does not exist (as a file) - and we cannot create it. Read-only filesystem? - Too many files open in the system? - Filesystem full? */ - die (EX_FILEIO, _("can't create lock file %s: %s " - "(use -n flag to override)"), - linktargetfile, strerror (errsv)); - } - close(i); + int errsv, j; j = link(linktargetfile, MOUNTED_LOCK); errsv = errno; - (void) unlink(linktargetfile); - if (j == 0) we_created_lockfile = 1; if (j < 0 && errsv != EEXIST) { + (void) unlink(linktargetfile); die (EX_FILEIO, _("can't link lock file %s: %s " "(use -n flag to override)"), MOUNTED_LOCK, strerror (errsv)); } - fd = open (MOUNTED_LOCK, O_WRONLY); + lockfile_fd = open (MOUNTED_LOCK, O_WRONLY); - if (fd < 0) { - int errsv = errno; + if (lockfile_fd < 0) { /* Strange... Maybe the file was just deleted? */ - if (errno == ENOENT && tries-- > 0) + int errsv = errno; + gettimeofday(&now, NULL); + if (errno == ENOENT && now.tv_sec < maxtime.tv_sec) { + we_created_lockfile = 0; continue; + } + (void) unlink(linktargetfile); die (EX_FILEIO, _("can't open lock file %s: %s " "(use -n flag to override)"), MOUNTED_LOCK, strerror (errsv)); @@ -517,38 +555,38 @@ lock_mtab (void) { if (j == 0) { /* We made the link. Now claim the lock. */ - if (fcntl (fd, F_SETLK, &flock) == -1) { + if (fcntl (lockfile_fd, F_SETLK, &flock) == -1) { if (verbose) { int errsv = errno; printf(_("Can't lock lock file %s: %s\n"), MOUNTED_LOCK, strerror (errsv)); } - /* proceed anyway */ + /* proceed, since it was us who created the lockfile anyway */ } + (void) unlink(linktargetfile); } else { - static int tries = 0; - /* Someone else made the link. Wait. */ - alarm(LOCK_TIMEOUT); - if (fcntl (fd, F_SETLKW, &flock) == -1) { - int errsv = errno; - die (EX_FILEIO, _("can't lock lock file %s: %s"), - MOUNTED_LOCK, (errno == EINTR) ? - _("timed out") : strerror (errsv)); - } - alarm(0); - /* Limit the number of iterations - maybe there - still is some old /etc/mtab~ */ - if (tries++ > 3) { - if (tries > 5) - die (EX_FILEIO, _("Cannot create link %s\n" - "Perhaps there is a stale lock file?\n"), - MOUNTED_LOCK); - sleep(1); - } - } + gettimeofday(&now, NULL); + if (now.tv_sec < maxtime.tv_sec) { + alarm(maxtime.tv_sec - now.tv_sec); + if (fcntl (lockfile_fd, F_SETLKW, &flock) == -1) { + int errsv = errno; + (void) unlink(linktargetfile); + die (EX_FILEIO, _("can't lock lock file %s: %s"), + MOUNTED_LOCK, (errno == EINTR) ? + _("timed out") : strerror (errsv)); + } + alarm(0); - close(fd); + nanosleep(&waittime, NULL); + } else { + (void) unlink(linktargetfile); + die (EX_FILEIO, _("Cannot create link %s\n" + "Perhaps there is a stale lock file?\n"), + MOUNTED_LOCK); + } + close(lockfile_fd); + } } } diff --git a/tests/expected/ts-mount-mtablock b/tests/expected/ts-mount-mtablock index c5b431b6c..ccfc37a15 100644 --- a/tests/expected/ts-mount-mtablock +++ b/tests/expected/ts-mount-mtablock @@ -1 +1 @@ -50 \ No newline at end of file +50000 diff --git a/tests/ts-mount-mtablock b/tests/ts-mount-mtablock index 44834dc30..a3d075ee2 100755 --- a/tests/ts-mount-mtablock +++ b/tests/ts-mount-mtablock @@ -16,8 +16,8 @@ TS_DESC="mtablock" # 2GHz machine)). It has terrible performance due a bad timeouts implemntation # in lock_mtab(). # -NLOOPS=10 -NPROCESSES=5 +NLOOPS=1000 +NPROCESSES=50 ts_init