util-linux/misc-utils/hardlink.c

1179 lines
34 KiB
C
Raw Normal View History

/* hardlink.c - Link multiple identical files together
*
* Copyright (C) 2008 - 2014 Julian Andres Klode <jak@jak-linux.org>
* Copyright (C) 2021 Karel Zak <kzak@redhat.com>
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#define _POSIX_C_SOURCE 200112L /* POSIX functions */
#define _XOPEN_SOURCE 600 /* nftw() */
#include <sys/types.h> /* stat */
#include <sys/stat.h> /* stat */
#include <sys/time.h> /* getrlimit, getrusage */
#include <sys/resource.h> /* getrlimit, getrusage */
#include <fcntl.h> /* posix_fadvise */
#include <ftw.h> /* ftw */
#include <search.h> /* tsearch() and friends */
#include <signal.h> /* SIG*, sigaction */
#include <getopt.h> /* getopt_long() */
#include <ctype.h> /* tolower() */
#include "nls.h"
#include "c.h"
/* Use libpcreposix if it's available, it's cooler */
#if defined(HAVE_libpcre2_posix)
#warning hie
#include <pcre2posix.h>
#undef REG_NOSUB
#define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
#elif defined(HAVE_libpcreposix)
#include <pcreposix.h>
#undef REG_NOSUB
#define REG_NOSUB 0 /* we do want backreferences in PCRE mode */
#else
#include <regex.h> /* regcomp(), regsearch() */
#endif
#ifdef HAVE_XATTR
#include <sys/xattr.h> /* listxattr, getxattr */
#endif
/**
* struct file - Information about a file
* @st: The stat buffer associated with the file
* @next: Next file with the same size
* @basename: The offset off the basename in the filename
* @path: The path of the file
*
* This contains all information we need about a file.
*/
struct file {
struct stat st;
struct file *next;
struct link {
struct link *next;
int basename;
#if __STDC_VERSION__ >= 199901L
char path[];
#elif __GNUC__
char path[0];
#else
char path[1];
#endif
} *links;
};
/**
* enum log_level - Logging levels
* @JLOG_SYSFAT: Fatal error message with errno, will be printed to stderr
* @JLOG_FATAL: Fatal error message with errno, will be printed to stderr
* @JLOG_SYSERR: Error message with errno, will be printed to stderr
* @JLOG_ERROR: Error message, will be printed to stderr
* @JLOG_SUMMARY: Default log level
* @JLOG_INFO: Verbose logging (verbose == 1)
* @JLOG_DEBUG1: Verbosity 2
* @JLOG_DEBUG2: Verbosity 3
*/
enum log_level {
JLOG_SYSFAT = -4,
JLOG_FATAL = -3,
JLOG_SYSERR = -2,
JLOG_ERROR = -1,
JLOG_SUMMARY,
JLOG_INFO,
JLOG_DEBUG1,
JLOG_DEBUG2
};
/**
* struct statistic - Statistics about the file
* @started: Whether we are post command-line processing
* @files: The number of files worked on
* @linked: The number of files replaced by a hardlink to a master
* @xattr_comparisons: The number of extended attribute comparisons
* @comparisons: The number of comparisons
* @saved: The (exaggerated) amount of space saved
* @start_time: The time we started at, in seconds since some unspecified point
*/
static struct statistics {
int started;
size_t files;
size_t linked;
size_t xattr_comparisons;
size_t comparisons;
double saved;
double start_time;
} stats;
/**
* struct options - Processed command-line options
* @include: A linked list of regular expressions for the --include option
* @exclude: A linked list of regular expressions for the --exclude option
* @verbosity: The verbosity. Should be one of #enum log_level
* @respect_mode: Whether to respect file modes (default = TRUE)
* @respect_owner: Whether to respect file owners (uid, gid; default = TRUE)
* @respect_name: Whether to respect file names (default = FALSE)
* @respect_time: Whether to respect file modification times (default = TRUE)
* @respect_xattrs: Whether to respect extended attributes (default = FALSE)
* @maximise: Chose the file with the highest link count as master
* @minimise: Chose the file with the lowest link count as master
* @keep_oldest: Choose the file with oldest timestamp as master (default = FALSE)
* @dry_run: Specifies whether hardlink should not link files (default = FALSE)
* @min_size: Minimum size of files to consider. (default = 1 byte)
*/
static struct options {
struct regex_link {
regex_t preg;
struct regex_link *next;
} *include, *exclude;
signed int verbosity;
unsigned int respect_mode:1;
unsigned int respect_owner:1;
unsigned int respect_name:1;
unsigned int respect_time:1;
unsigned int respect_xattrs:1;
unsigned int maximise:1;
unsigned int minimise:1;
unsigned int keep_oldest:1;
unsigned int dry_run:1;
unsigned long long min_size;
} opts;
/*
* files
*
* A binary tree of files, managed using tsearch(). To see which nodes
* are considered equal, see compare_nodes()
*/
static void *files;
static void *files_by_ino;
/*
* last_signal
*
* The last signal we received. We store the signal here in order to be able
* to break out of loops gracefully and to return from our nftw() handler.
*/
static int last_signal;
__attribute__ ((format(printf, 2, 3)))
/**
* jlog - Logging for hardlink
* @level: The log level
* @format: A format string for printf()
*/
static void jlog(enum log_level level, const char *format, ...)
{
FILE *stream = (level >= 0) ? stdout : stderr;
int errno_ = errno;
va_list args;
if (level <= opts.verbosity) {
if (level <= JLOG_FATAL)
fprintf(stream, "ERROR: ");
else if (level < 0)
fprintf(stream, "WARNING: ");
va_start(args, format);
vfprintf(stream, format, args);
va_end(args);
if (level == JLOG_SYSERR || level == JLOG_SYSFAT)
fprintf(stream, ": %s\n", strerror(errno_));
else
fputc('\n', stream);
}
}
/**
* CMP - Compare two numerical values, return 1, 0, or -1
* @a: First value
* @b: Second value
*
* Used to compare two integers of any size while avoiding overflow.
*/
#define CMP(a, b) ((a) > (b) ? 1 : ((a) < (b) ? -1 : 0))
/**
* format - Print a human-readable name for the given size
* @bytes: A number specifying an amount of bytes
*
* Uses a double. The result with infinity and NaN is most likely
* not pleasant.
*/
static const char *format(double bytes)
{
static char buf[256];
if (bytes >= 1024 * 1024 * 1024)
snprintf(buf, sizeof(buf), "%.2f GiB", (bytes / 1024 / 1024 / 1024));
else if (bytes >= 1024 * 1024)
snprintf(buf, sizeof(buf), "%.2f MiB", (bytes / 1024 / 1024));
else if (bytes >= 1024)
snprintf(buf, sizeof(buf), "%.2f KiB", (bytes / 1024));
else
snprintf(buf, sizeof(buf), "%.0f bytes", bytes);
return buf;
}
/**
* gettime() - Get the current time from the system
*/
static double gettime(void)
{
struct timeval tv = { 0, 0 };
if (gettimeofday(&tv, NULL) != 0)
jlog(JLOG_SYSERR, "Cannot read current time");
return (double) tv.tv_sec + (double) tv.tv_usec / 1000000;
}
/**
* regexec_any - Match against multiple regular expressions
* @pregs: A linked list of regular expressions
* @what: The string to match against
*
* Checks whether any of the regular expressions in the list matches the
* string.
*/
static int regexec_any(struct regex_link *pregs, const char *what)
{
for (; pregs != NULL; pregs = pregs->next)
if (regexec(&pregs->preg, what, 0, NULL, 0) == 0)
return TRUE;
return FALSE;
}
/**
* compare_nodes - Node comparison function
* @_a: The first node (a #struct file)
* @_b: The second node (a #struct file)
*
* Compare the two nodes for the binary tree.
*/
static int compare_nodes(const void *_a, const void *_b)
{
const struct file *a = _a;
const struct file *b = _b;
int diff = 0;
if (diff == 0)
diff = CMP(a->st.st_dev, b->st.st_dev);
if (diff == 0)
diff = CMP(a->st.st_size, b->st.st_size);
return diff;
}
/**
* compare_nodes_ino - Node comparison function
* @_a: The first node (a #struct file)
* @_b: The second node (a #struct file)
*
* Compare the two nodes for the binary tree.
*/
static int compare_nodes_ino(const void *_a, const void *_b)
{
const struct file *a = _a;
const struct file *b = _b;
int diff = 0;
if (diff == 0)
diff = CMP(a->st.st_dev, b->st.st_dev);
if (diff == 0)
diff = CMP(a->st.st_ino, b->st.st_ino);
/* If opts.respect_name is used, we will restrict a struct file to
* contain only links with the same basename to keep the rest simple.
*/
if (diff == 0 && opts.respect_name)
diff = strcmp(a->links->path + a->links->basename,
b->links->path + b->links->basename);
return diff;
}
/**
* print_stats - Print statistics to stdout
*/
static void print_stats(void)
{
jlog(JLOG_SUMMARY, "Mode: %s", opts.dry_run ? "dry-run" : "real");
jlog(JLOG_SUMMARY, "Files: %zu", stats.files);
jlog(JLOG_SUMMARY, "Linked: %zu files", stats.linked);
#ifdef HAVE_XATTR
jlog(JLOG_SUMMARY, "Compared: %zu xattrs", stats.xattr_comparisons);
#endif
jlog(JLOG_SUMMARY, "Compared: %zu files", stats.comparisons);
jlog(JLOG_SUMMARY, "Saved: %s", format(stats.saved));
jlog(JLOG_SUMMARY, "Duration: %.2f seconds", gettime() - stats.start_time);
}
/**
* handle_interrupt - Handle a signal
*
* Returns: %TRUE on SIGINT, SIGTERM; %FALSE on all other signals.
*/
static int handle_interrupt(void)
{
switch (last_signal) {
case SIGINT:
case SIGTERM:
return TRUE;
case SIGUSR1:
print_stats();
putchar('\n');
break;
}
last_signal = 0;
return FALSE;
}
#ifdef HAVE_XATTR
/**
* malloc_or_die -- Wrapper for malloc()
*
* This does the same thing as malloc() except that it aborts if memory
* can't be allocated.
*/
static void *malloc_or_die(size_t size)
{
void *mem = malloc(size);
if (!mem) {
jlog(JLOG_SYSFAT, "Cannot allocate memory");
exit(1);
}
return mem;
}
/**
* llistxattr_or_die - Wrapper for llistxattr()
*
* This does the same thing as llistxattr() except that it aborts if any error
* other than "not supported" is detected.
*/
static ssize_t llistxattr_or_die(const char *path, char *list, size_t size)
{
ssize_t len = llistxattr(path, list, size);
if (len < 0 && errno != ENOTSUP) {
jlog(JLOG_SYSFAT, "Cannot get xattr names for %s", path);
exit(1);
}
return len;
}
/**
* lgetxattr_or_die - Wrapper for lgetxattr()
*
* This does the same thing as lgetxattr() except that it aborts upon error.
*/
static ssize_t lgetxattr_or_die(const char *path, const char *name, void *value,
size_t size)
{
ssize_t len = lgetxattr(path, name, value, size);
if (len < 0) {
jlog(JLOG_SYSFAT, "Cannot get xattr value of %s for %s", name, path);
exit(1);
}
return len;
}
/**
* get_xattr_name_count - Count the number of xattr names
* @names: a non-empty table of concatenated, null-terminated xattr names
* @len: the total length of the table
*
* @Returns the number of xattr names
*/
static int get_xattr_name_count(const char *const names, ssize_t len)
{
int count = 0;
const char *name;
for (name = names; name < (names + len); name += strlen(name) + 1)
count++;
return count;
}
/**
* cmp_xattr_name_ptrs - Compare two pointers to xattr names by comparing
* the names they point to.
*/
static int cmp_xattr_name_ptrs(const void *ptr1, const void *ptr2)
{
return strcmp(*(char *const *) ptr1, *(char *const *) ptr2);
}
/**
* get_sorted_xattr_name_table - Create a sorted table of xattr names.
* @names - table of concatenated, null-terminated xattr names
* @n - the number of names
*
* @Returns allocated table of pointers to the names, sorted alphabetically
*/
static const char **get_sorted_xattr_name_table(const char *names, int n)
{
const char **table = malloc_or_die(n * sizeof(char *));
int i;
for (i = 0; i < n; i++) {
table[i] = names;
names += strlen(names) + 1;
}
qsort(table, n, sizeof(char *), cmp_xattr_name_ptrs);
return table;
}
/**
* file_xattrs_equal - Compare the extended attributes of two files
* @a: The first file
* @b: The second file
*
* @Returns: %TRUE if and only if extended attributes are equal
*/
static int file_xattrs_equal(const struct file *a, const struct file *b)
{
ssize_t len_a;
ssize_t len_b;
char *names_a = NULL;
char *names_b = NULL;
int n_a;
int n_b;
const char **name_ptrs_a = NULL;
const char **name_ptrs_b = NULL;
void *value_a = NULL;
void *value_b = NULL;
int ret = FALSE;
int i;
assert(a->links != NULL);
assert(b->links != NULL);
jlog(JLOG_DEBUG1, "Comparing xattrs of %s to %s", a->links->path,
b->links->path);
stats.xattr_comparisons++;
len_a = llistxattr_or_die(a->links->path, NULL, 0);
len_b = llistxattr_or_die(b->links->path, NULL, 0);
if (len_a <= 0 && len_b <= 0)
return TRUE; // xattrs not supported or neither file has any
if (len_a != len_b)
return FALSE; // total lengths of xattr names differ
names_a = malloc_or_die(len_a);
names_b = malloc_or_die(len_b);
len_a = llistxattr_or_die(a->links->path, names_a, len_a);
len_b = llistxattr_or_die(b->links->path, names_b, len_b);
assert((len_a > 0) && (len_a == len_b));
n_a = get_xattr_name_count(names_a, len_a);
n_b = get_xattr_name_count(names_b, len_b);
if (n_a != n_b)
goto exit; // numbers of xattrs differ
name_ptrs_a = get_sorted_xattr_name_table(names_a, n_a);
name_ptrs_b = get_sorted_xattr_name_table(names_b, n_b);
// We now have two sorted tables of xattr names.
for (i = 0; i < n_a; i++) {
if (handle_interrupt())
goto exit; // user wants to quit
if (strcmp(name_ptrs_a[i], name_ptrs_b[i]) != 0)
goto exit; // names at same slot differ
len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i], NULL, 0);
len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i], NULL, 0);
if (len_a != len_b)
goto exit; // xattrs with same name, different value lengths
value_a = malloc_or_die(len_a);
value_b = malloc_or_die(len_b);
len_a = lgetxattr_or_die(a->links->path, name_ptrs_a[i],
value_a, len_a);
len_b = lgetxattr_or_die(b->links->path, name_ptrs_b[i],
value_b, len_b);
assert((len_a >= 0) && (len_a == len_b));
if (memcmp(value_a, value_b, len_a) != 0)
goto exit; // xattrs with same name, different values
free(value_a);
free(value_b);
value_a = NULL;
value_b = NULL;
}
ret = TRUE;
exit:
free(names_a);
free(names_b);
free(name_ptrs_a);
free(name_ptrs_b);
free(value_a);
free(value_b);
return ret;
}
#else
static int file_xattrs_equal(const struct file *a, const struct file *b)
{
return TRUE;
}
#endif
/**
* file_contents_equal - Compare contents of two files for equality
* @a: The first file
* @b: The second file
*
* Compare the contents of the files for equality
*/
static int file_contents_equal(const struct file *a, const struct file *b)
{
FILE *fa = NULL;
FILE *fb = NULL;
char buf_a[8192];
char buf_b[8192];
int cmp = 0; /* zero => equal */
off_t off = 0; /* current offset */
assert(a->links != NULL);
assert(b->links != NULL);
jlog(JLOG_DEBUG1, "Comparing %s to %s", a->links->path, b->links->path);
stats.comparisons++;
if ((fa = fopen(a->links->path, "rb")) == NULL)
goto err;
if ((fb = fopen(b->links->path, "rb")) == NULL)
goto err;
#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
posix_fadvise(fileno(fa), 0, 0, POSIX_FADV_SEQUENTIAL);
posix_fadvise(fileno(fb), 0, 0, POSIX_FADV_SEQUENTIAL);
#endif
while (!handle_interrupt() && cmp == 0) {
size_t ca;
size_t cb;
ca = fread(buf_a, 1, sizeof(buf_a), fa);
if (ca < sizeof(buf_a) && ferror(fa))
goto err;
cb = fread(buf_b, 1, sizeof(buf_b), fb);
if (cb < sizeof(buf_b) && ferror(fb))
goto err;
off += ca;
if ((ca != cb || ca == 0)) {
cmp = CMP(ca, cb);
break;
}
cmp = memcmp(buf_a, buf_b, ca);
}
out:
if (fa != NULL)
fclose(fa);
if (fb != NULL)
fclose(fb);
return !handle_interrupt() && cmp == 0;
err:
if (fa == NULL || fb == NULL)
jlog(JLOG_SYSERR, "Cannot open %s",
fa ? b->links->path : a->links->path);
else
jlog(JLOG_SYSERR, "Cannot read %s",
ferror(fa) ? a->links->path : b->links->path);
cmp = 1;
goto out;
}
/**
* file_may_link_to - Check whether a file may replace another one
* @a: The first file
* @b: The second file
*
* Check whether the two fies are considered equal and can be linked
* together. If the two files are identical, the result will be FALSE,
* as replacing a link with an identical one is stupid.
*/
static int file_may_link_to(const struct file *a, const struct file *b)
{
return (a->st.st_size != 0 &&
a->st.st_size == b->st.st_size &&
a->links != NULL && b->links != NULL &&
a->st.st_dev == b->st.st_dev &&
a->st.st_ino != b->st.st_ino &&
(!opts.respect_mode || a->st.st_mode == b->st.st_mode) &&
(!opts.respect_owner || a->st.st_uid == b->st.st_uid) &&
(!opts.respect_owner || a->st.st_gid == b->st.st_gid) &&
(!opts.respect_time || a->st.st_mtime == b->st.st_mtime) &&
(!opts.respect_name
|| strcmp(a->links->path + a->links->basename,
b->links->path + b->links->basename) == 0) &&
(!opts.respect_xattrs || file_xattrs_equal(a, b)) &&
file_contents_equal(a, b));
}
/**
* file_compare - Compare two files to decide which should be master
* @a: The first file
* @b: The second file
*
* Check which of the files should be considered greater and thus serve
* as the master when linking (the master is the file that all equal files
* will be replaced with).
*/
static int file_compare(const struct file *a, const struct file *b)
{
int res = 0;
if (a->st.st_dev == b->st.st_dev && a->st.st_ino == b->st.st_ino)
return 0;
if (res == 0 && opts.maximise)
res = CMP(a->st.st_nlink, b->st.st_nlink);
if (res == 0 && opts.minimise)
res = CMP(b->st.st_nlink, a->st.st_nlink);
if (res == 0)
res = opts.keep_oldest ? CMP(b->st.st_mtime, a->st.st_mtime)
: CMP(a->st.st_mtime, b->st.st_mtime);
if (res == 0)
res = CMP(b->st.st_ino, a->st.st_ino);
return res;
}
/**
* file_link - Replace b with a link to a
* @a: The first file
* @b: The second file
*
* Link the file, replacing @b with the current one. The file is first
* linked to a temporary name, and then renamed to the name of @b, making
* the replace atomic (@b will always exist).
*/
static int file_link(struct file *a, struct file *b)
{
file_link:
assert(a->links != NULL);
assert(b->links != NULL);
jlog(JLOG_INFO, "%sLinking %s to %s (-%s)",
opts.dry_run ? "[DryRun] " : "", a->links->path, b->links->path,
format(a->st.st_size));
if (!opts.dry_run) {
size_t len = strlen(b->links->path) + strlen(".hardlink-temporary") + 1;
char *new_path = malloc(len);
if (new_path == NULL) {
jlog(JLOG_SYSFAT, "Cannot allocate memory");
exit(1);
}
snprintf(new_path, len, "%s.hardlink-temporary", b->links->path);
if (link(a->links->path, new_path) != 0) {
jlog(JLOG_SYSERR, "Cannot link %s to %s", a->links->path, new_path);
free(new_path);
return FALSE;
} else if (rename(new_path, b->links->path) != 0) {
jlog(JLOG_SYSERR, "Cannot rename %s to %s", a->links->path,
new_path);
unlink(new_path); /* cleanup failed rename */
free(new_path);
return FALSE;
}
free(new_path);
}
/* Update statistics */
stats.linked++;
/* Increase the link count of this file, and set stat() of other file */
a->st.st_nlink++;
b->st.st_nlink--;
if (b->st.st_nlink == 0)
stats.saved += a->st.st_size;
/* Move the link from file b to a */
{
struct link *new_link = b->links;
b->links = b->links->next;
new_link->next = a->links->next;
a->links->next = new_link;
}
// Do it again
if (b->links)
goto file_link;
return TRUE;
}
/**
* inserter - Callback function for nftw()
* @fpath: The path of the file being visited
* @sb: The stat information of the file
* @typeflag: The type flag
* @ftwbuf: Contains current level of nesting and offset of basename
*
* Called by nftw() for the files. See the manual page for nftw() for
* further information.
*/
static int inserter(const char *fpath, const struct stat *sb, int typeflag,
struct FTW *ftwbuf)
{
struct file *fil;
struct file **node;
size_t pathlen;
int included;
int excluded;
if (handle_interrupt())
return 1;
if (typeflag == FTW_DNR || typeflag == FTW_NS)
jlog(JLOG_SYSERR, "Cannot read %s", fpath);
if (typeflag != FTW_F || !S_ISREG(sb->st_mode))
return 0;
included = regexec_any(opts.include, fpath);
excluded = regexec_any(opts.exclude, fpath);
if ((opts.exclude && excluded && !included) ||
(!opts.exclude && opts.include && !included))
return 0;
stats.files++;
if (sb->st_size < opts.min_size) {
jlog(JLOG_DEBUG1, "Skipped %s (smaller than configured size)", fpath);
return 0;
}
jlog(JLOG_DEBUG2, "Visiting %s (file %zu)", fpath, stats.files);
pathlen = strlen(fpath) + 1;
fil = calloc(1, sizeof(*fil));
if (fil == NULL)
return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
fil->links = calloc(1, sizeof(struct link) + pathlen);
if (fil->links == NULL)
return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
fil->st = *sb;
fil->links->basename = ftwbuf->base;
fil->links->next = NULL;
memcpy(fil->links->path, fpath, pathlen);
node = tsearch(fil, &files_by_ino, compare_nodes_ino);
if (node == NULL)
return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
if (*node != fil) {
/* Already known inode, add link to inode information */
assert((*node)->st.st_dev == sb->st_dev);
assert((*node)->st.st_ino == sb->st_ino);
fil->links->next = (*node)->links;
(*node)->links = fil->links;
free(fil);
} else {
/* New inode, insert into by-size table */
node = tsearch(fil, &files, compare_nodes);
if (node == NULL)
return jlog(JLOG_SYSFAT, "Cannot continue"), 1;
if (*node != fil) {
struct file *l;
if (file_compare(fil, *node) >= 0) {
fil->next = *node;
*node = fil;
} else {
for (l = *node; l != NULL; l = l->next) {
if (l->next != NULL && file_compare(fil, l->next) < 0)
continue;
fil->next = l->next;
l->next = fil;
break;
}
}
}
}
return 0;
}
/**
* visitor - Callback for twalk()
* @nodep: Pointer to a pointer to a #struct file
* @which: At which point this visit is (preorder, postorder, endorder)
* @depth: The depth of the node in the tree
*
* Visit the nodes in the binary tree. For each node, call hardlinker()
* on each #struct file in the linked list of #struct file instances located
* at that node.
*/
static void visitor(const void *nodep, const VISIT which, const int depth)
{
struct file *master = *(struct file **) nodep;
struct file *other;
(void) depth;
if (which != leaf && which != endorder)
return;
for (; master != NULL; master = master->next) {
if (handle_interrupt())
exit(1);
if (master->links == NULL)
continue;
for (other = master->next; other != NULL; other = other->next) {
if (handle_interrupt())
exit(1);
assert(other != other->next);
assert(other->st.st_size == master->st.st_size);
if (other->links == NULL || !file_may_link_to(master, other))
continue;
if (!file_link(master, other) && errno == EMLINK)
master = other;
}
}
}
/**
* version - Print the program version and exit
*/
static int version(void)
{
printf("hardlink 0.3 RC2\n");
printf("Compiled %s at %s\n", __DATE__, __TIME__);
exit(0);
}
/**
* help - Print the program help and exit
* @name: The name of the program executable (argv[0])
*/
static int help(const char *name)
{
printf("Usage: %s [options] directory|file ...\n", name);
puts("Options:");
puts(" -V, --version show program's version number and exit");
puts(" -h, --help show this help message and exit");
puts(" -v, --verbose Increase verbosity (repeat for more verbosity)");
puts(" -n, --dry-run Modify nothing, just print what would happen");
puts(" -f, --respect-name Filenames have to be identical");
puts(" -p, --ignore-mode Ignore changes of file mode");
puts(" -o, --ignore-owner Ignore owner changes");
puts(" -t, --ignore-time Ignore timestamps (when testing for equality)");
#ifdef HAVE_XATTR
puts(" -X, --respect-xattrs Respect extended attributes");
#endif
puts(" -m, --maximize Maximize the hardlink count, remove the file with");
puts(" lowest hardlink cout");
puts(" -M, --minimize Reverse the meaning of -m");
puts(" -O, --keep-oldest Keep the oldest file of multiple equal files");
puts(" (lower precedence than minimize/maximize)");
puts(" -x REGEXP, --exclude=REGEXP");
puts(" Regular expression to exclude files");
puts(" -i REGEXP, --include=REGEXP");
puts(" Regular expression to include files/dirs");
puts(" -s <num>[K,M,G], --minimum-size=<num>[K,M,G]");
puts(" Minimum size for files. Optional suffix");
puts(" allows for using KiB, MiB, or GiB");
puts("");
puts("Compatibility options to Jakub Jelinek's hardlink:");
puts(" -c Compare only file contents, same as -pot");
#ifndef HAVE_GETOPT_LONG
puts("");
puts("Your system only supports the short option names given above.");
#endif
exit(0);
}
/**
* register_regex - Compile and insert a regular expression into list
* @pregs: Pointer to a linked list of regular expressions
* @regex: String containing the regular expression to be compiled
*/
static int register_regex(struct regex_link **pregs, const char *regex)
{
struct regex_link *link;
int err;
link = malloc(sizeof(*link));
if (link == NULL) {
jlog(JLOG_SYSFAT, "Cannot allocate memory");
exit(1);
}
if ((err = regcomp(&link->preg, regex, REG_NOSUB | REG_EXTENDED)) != 0) {
size_t size = regerror(err, &link->preg, NULL, 0);
char *buf = malloc(size + 1);
if (buf == NULL) {
jlog(JLOG_SYSFAT, "Cannot allocate memory");
exit(1);
}
regerror(err, &link->preg, buf, size);
jlog(JLOG_FATAL, "Could not compile regular expression %s: %s",
regex, buf);
free(buf);
free(link);
return 1;
}
link->next = *pregs;
*pregs = link;
return 0;
}
/**
* parse_options - Parse the command line options
* @argc: Number of options
* @argv: Array of options
*/
static int parse_options(int argc, char *argv[])
{
static const char optstr[] = "VhvnfpotXcmMOx:i:s:";
static const struct option long_options[] = {
{"version", no_argument, NULL, 'V'},
{"help", no_argument, NULL, 'h'},
{"verbose", no_argument, NULL, 'v'},
{"dry-run", no_argument, NULL, 'n'},
{"respect-name", no_argument, NULL, 'f'},
{"ignore-mode", no_argument, NULL, 'p'},
{"ignore-owner", no_argument, NULL, 'o'},
{"ignore-time", no_argument, NULL, 't'},
{"respect-xattrs", no_argument, NULL, 'X'},
{"maximize", no_argument, NULL, 'm'},
{"minimize", no_argument, NULL, 'M'},
{"keep-oldest", no_argument, NULL, 'O'},
{"exclude", required_argument, NULL, 'x'},
{"include", required_argument, NULL, 'i'},
{"minimum-size", required_argument, NULL, 's'},
{NULL, 0, NULL, 0}
};
int opt;
char unit = '\0';
opts.respect_mode = TRUE;
opts.respect_owner = TRUE;
opts.respect_time = TRUE;
opts.respect_xattrs = FALSE;
opts.keep_oldest = FALSE;
opts.min_size = 1;
while ((opt = getopt_long(argc, argv, optstr, long_options, NULL)) != -1) {
switch (opt) {
case 'p':
opts.respect_mode = FALSE;
break;
case 'o':
opts.respect_owner = FALSE;
break;
case 't':
opts.respect_time = FALSE;
break;
case 'X':
opts.respect_xattrs = TRUE;
break;
case 'm':
opts.maximise = TRUE;
break;
case 'M':
opts.minimise = TRUE;
break;
case 'O':
opts.keep_oldest = TRUE;
break;
case 'f':
opts.respect_name = TRUE;
break;
case 'v':
opts.verbosity++;
break;
case 'c':
opts.respect_mode = FALSE;
opts.respect_name = FALSE;
opts.respect_owner = FALSE;
opts.respect_time = FALSE;
opts.respect_xattrs = FALSE;
break;
case 'n':
opts.dry_run = 1;
break;
case 'h':
return help(argv[0]);
case 'V':
return version();
case 'x':
if (register_regex(&opts.exclude, optarg) != 0)
return 1;
break;
case 'i':
if (register_regex(&opts.include, optarg) != 0)
return 1;
break;
case 's':
if (sscanf(optarg, "%llu%c", &opts.min_size, &unit) < 1) {
jlog(JLOG_ERROR, "Invalid option given to -s: %s", optarg);
return 1;
}
switch (tolower(unit)) {
case '\0':
break;
case 't':
opts.min_size *= 1024;
case 'g':
opts.min_size *= 1024;
case 'm':
opts.min_size *= 1024;
case 'k':
opts.min_size *= 1024;
break;
default:
jlog(JLOG_ERROR, "Unknown unit indicator %c.", unit);
return 1;
}
jlog(JLOG_DEBUG1, "Using minimum size of %lld bytes.",
opts.min_size);
break;
case '?':
return 1;
default:
jlog(JLOG_ERROR, "Unexpected invalid option: -%c\n", opt);
return 1;
}
}
return 0;
}
/**
* to_be_called_atexit - Cleanup handler, also prints statistics.
*/
static void to_be_called_atexit(void)
{
if (stats.started)
print_stats();
}
/**
* sighandler - Signal handler, sets the global last_signal variable
* @i: The signal number
*/
static void sighandler(int i)
{
if (last_signal != SIGINT)
last_signal = i;
if (i == SIGINT)
putchar('\n');
}
int main(int argc, char *argv[])
{
struct sigaction sa;
sa.sa_handler = sighandler;
sa.sa_flags = SA_RESTART;
sigfillset(&sa.sa_mask);
/* If we receive a SIGINT, end the processing */
sigaction(SIGINT, &sa, NULL);
sigaction(SIGUSR1, &sa, NULL);
/* Pretty print numeric output */
setlocale(LC_NUMERIC, "");
stats.start_time = gettime();
if (atexit(to_be_called_atexit) != 0) {
jlog(JLOG_SYSFAT, "Cannot register exit handler");
return 1;
}
if (parse_options(argc, argv) != 0)
return 1;
if (optind == argc) {
jlog(JLOG_FATAL, "Expected file or directory names");
return 1;
}
stats.started = TRUE;
for (; optind < argc; optind++)
if (nftw(argv[optind], inserter, 20, FTW_PHYS) == -1)
jlog(JLOG_SYSERR, "Cannot process %s", argv[optind]);
twalk(files, visitor);
return 0;
}