summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorChristophe Grenier <grenier@cgsecurity.org>2011-09-23 20:38:31 +0200
committerChristophe Grenier <grenier@cgsecurity.org>2011-09-23 20:38:31 +0200
commit351670b5729a0f421338c4fee62faed6e8e20c63 (patch)
tree290b455201f05930ab8a03d695c12827a69a5794 /src
parentfa3e5583346ff55b9253c9d59b9c69798dff2226 (diff)
Speedup file sorting in NTFS undelete and local file listing by using merge sort instead of insertion sort
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am4
-rw-r--r--src/askloc.c4
-rw-r--r--src/list_sort.c161
-rw-r--r--src/list_sort.h22
-rw-r--r--src/ntfs_udl.c4
5 files changed, 191 insertions, 4 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 70e1368..e62edd0 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -13,8 +13,8 @@ endif
bin_PROGRAMS = testdisk photorec fidentify $(QPHOTOREC)
EXTRA_PROGRAMS = photorecf
-base_C = autoset.c common.c crc.c ewf.c fnctdsk.c hdaccess.c hdcache.c hdwin32.c hidden.c hpa_dco.c intrf.c iso.c list.c log.c log_part.c misc.c msdos.c parti386.c partgpt.c parthumax.c partmac.c partsun.c partnone.c partxbox.c io_redir.c ntfs_io.c ntfs_utl.c partauto.c sudo.c unicode.c win32.c
-base_H = alignio.h autoset.h common.h crc.h ewf.h fnctdsk.h hdaccess.h hdwin32.h hidden.h guid_cmp.h guid_cpy.h hdcache.h hpa_dco.h intrf.h iso.h iso9660.h lang.h list.h log.h log_part.h misc.h types.h io_redir.h msdos.h ntfs_utl.h parti386.h partgpt.h parthumax.h partmac.h partsun.h partxbox.h partauto.h sudo.h unicode.h win32.h
+base_C = autoset.c common.c crc.c ewf.c fnctdsk.c hdaccess.c hdcache.c hdwin32.c hidden.c hpa_dco.c intrf.c iso.c list.c list_sort.c log.c log_part.c misc.c msdos.c parti386.c partgpt.c parthumax.c partmac.c partsun.c partnone.c partxbox.c io_redir.c ntfs_io.c ntfs_utl.c partauto.c sudo.c unicode.c win32.c
+base_H = alignio.h autoset.h common.h crc.h ewf.h fnctdsk.h hdaccess.h hdwin32.h hidden.h guid_cmp.h guid_cpy.h hdcache.h hpa_dco.h intrf.h iso.h iso9660.h lang.h list.h list_sort.h log.h log_part.h misc.h types.h io_redir.h msdos.h ntfs_utl.h parti386.h partgpt.h parthumax.h partmac.h partsun.h partxbox.h partauto.h sudo.h unicode.h win32.h
fs_C = analyse.c bfs.c bsd.c btrfs.c cramfs.c exfat.c fat.c fatx.c ext2.c jfs.c gfs2.c hfs.c hfsp.c hpfs.c luks.c lvm.c md.c netware.c ntfs.c rfs.c savehdr.c sun.c swap.c sysv.c ufs.c vmfs.c xfs.c zfs.c
fs_H = analyse.h bfs.h bsd.h btrfs.h cramfs.h exfat.h fat.h fatx.h ext2.h jfs_superblock.h jfs.h gfs2.h hfs.h hfsp.h hpfs.h luks.h lvm.h md.h netware.h ntfs.h rfs.h savehdr.h sun.h swap.h sysv.h ufs.h vmfs.h xfs.h zfs.h
diff --git a/src/askloc.c b/src/askloc.c
index fd90106..9380cb5 100644
--- a/src/askloc.c
+++ b/src/askloc.c
@@ -54,6 +54,7 @@
#include "intrf.h"
#include "intrfn.h"
#include "list.h"
+#include "list_sort.h"
#include "dir.h"
#include "askloc.h"
#include "log.h"
@@ -248,13 +249,14 @@ char *ask_location(const char*msg, const char *src_dir, const char *dst_org)
}
#endif
file_info->name=strdup(dir_entrie->d_name);
- td_list_add_sorted(&file_info->list, &dir_list.list, filesort);
+ td_list_add_tail(&file_info->list, &dir_list.list);
file_info=(file_info_t*)MALLOC(sizeof(*file_info));
}
}
}
free(file_info);
closedir(dir);
+ td_list_sort(&dir_list.list, filesort);
}
if(dir_list.list.next!=&dir_list.list)
{
diff --git a/src/list_sort.c b/src/list_sort.c
new file mode 100644
index 0000000..92318a5
--- /dev/null
+++ b/src/list_sort.c
@@ -0,0 +1,161 @@
+/*
+ File: list_sort.c
+
+ Copyright (C) 2011 Christophe GRENIER <grenier@cgsecurity.org>
+
+ This software is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write the Free Software Foundation, Inc., 51
+ Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#endif
+#include "types.h"
+#include "list.h"
+#include "list_sort.h"
+
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct td_list_head *merge(
+ int (*cmp)(const struct td_list_head *a, const struct td_list_head *b),
+ struct td_list_head *a, struct td_list_head *b)
+{
+ struct td_list_head head, *tail = &head;
+
+ while (a && b) {
+ /* if equal, take 'a' -- important for sort stability */
+ if ((*cmp)(a, b) <= 0) {
+ tail->next = a;
+ a = a->next;
+ } else {
+ tail->next = b;
+ b = b->next;
+ }
+ tail = tail->next;
+ }
+ tail->next = a?a:b;
+ return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure. This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(
+ int (*cmp)(const struct td_list_head *a, const struct td_list_head *b),
+ struct td_list_head *head,
+ struct td_list_head *a, struct td_list_head *b)
+{
+ struct td_list_head *tail = head;
+
+ while (a && b) {
+ /* if equal, take 'a' -- important for sort stability */
+ if ((*cmp)(a, b) <= 0) {
+ tail->next = a;
+ a->prev = tail;
+ a = a->next;
+ } else {
+ tail->next = b;
+ b->prev = tail;
+ b = b->next;
+ }
+ tail = tail->next;
+ }
+ tail->next = a ? a : b;
+
+ do {
+ /*
+ * In worst cases this loop may run many iterations.
+ * Continue callbacks to the client even though no
+ * element comparison is needed, so the client's cmp()
+ * routine can invoke cond_resched() periodically.
+ */
+ (*cmp)(tail->next, tail->next);
+
+ tail->next->prev = tail;
+ tail = tail->next;
+ } while (tail->next);
+
+ tail->next = head;
+ head->prev = tail;
+}
+
+/**
+ * td_list_sort - sort a list
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
+ *
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
+ */
+void td_list_sort(struct td_list_head *head,
+ int (*cmp)(const struct td_list_head *a, const struct td_list_head *b))
+{
+ struct td_list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+ -- last slot is a sentinel */
+ int lev; /* index into part[] */
+ int max_lev = 0;
+ struct td_list_head *list;
+
+ if (td_list_empty(head))
+ return;
+
+ memset(part, 0, sizeof(part));
+
+ head->prev->next = NULL;
+ list = head->next;
+
+ while (list) {
+ struct td_list_head *cur = list;
+ list = list->next;
+ cur->next = NULL;
+
+ for (lev = 0; part[lev]; lev++) {
+ cur = merge(cmp, part[lev], cur);
+ part[lev] = NULL;
+ }
+ if (lev > max_lev) {
+ if (lev >= MAX_LIST_LENGTH_BITS)
+ {
+ // list passed to td_list_sort() too long for efficiency
+ lev--;
+ }
+ max_lev = lev;
+ }
+ part[lev] = cur;
+ }
+
+ for (lev = 0; lev < max_lev; lev++)
+ if (part[lev])
+ list = merge(cmp, part[lev], list);
+
+ merge_and_restore_back_links(cmp, head, part[max_lev], list);
+}
diff --git a/src/list_sort.h b/src/list_sort.h
new file mode 100644
index 0000000..24913dd
--- /dev/null
+++ b/src/list_sort.h
@@ -0,0 +1,22 @@
+/*
+ File: list_sort.h
+
+ Copyright (C) 2011 Christophe GRENIER <grenier@cgsecurity.org>
+
+ This software is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write the Free Software Foundation, Inc., 51
+ Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+void td_list_sort(struct td_list_head *head,
+ int (*cmp)(const struct td_list_head *a, const struct td_list_head *b));
diff --git a/src/ntfs_udl.c b/src/ntfs_udl.c
index 7ef444e..635fda8 100644
--- a/src/ntfs_udl.c
+++ b/src/ntfs_udl.c
@@ -67,6 +67,7 @@
#endif
#include "list.h"
+#include "list_sort.h"
#include "log.h"
#include "log_part.h"
#include "ntfs_udl.h"
@@ -1181,7 +1182,7 @@ static void scan_disk(ntfs_volume *vol, file_info_t *dir_list)
new_file=ufile_to_file_data(file);
if(new_file!=NULL)
{
- td_list_add_sorted(&new_file->list, &dir_list->list, filesort);
+ td_list_add_tail(&new_file->list, &dir_list->list);
results++;
}
}
@@ -1194,6 +1195,7 @@ done:
free(buffer);
if (attr)
ntfs_attr_close(attr);
+ td_list_sort(&dir_list->list, filesort);
}
#ifdef HAVE_NCURSES