summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHorus32014-05-04 23:05:03 +0200
committerHorus32014-05-04 23:05:03 +0200
commitf060617d9a33f5a75259edda6c22574a8d8d9df9 (patch)
tree9fd0aa805a18e4132dd869eeaeff822100f03f1e
parent09c97f22ecbcbdf152308c60c79e0f9fea5e63e3 (diff)
downloaddotfiles-f060617d9a33f5a75259edda6c22574a8d8d9df9.tar.gz
Find duplicate files and move them to a sub directory.
-rwxr-xr-xbin/data_double.sh70
1 files changed, 70 insertions, 0 deletions
diff --git a/bin/data_double.sh b/bin/data_double.sh
new file mode 100755
index 0000000..655aff2
--- /dev/null
+++ b/bin/data_double.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Example usage, find all doubles in all sub directory
+# for i in *; do if [ -d $i ]; then double.sh $i; fi; done
+#
+# To find duplicates in specific directory
+# double.sh /path/to/dir /path/to/other/dir ./relativ/path
+#
+# With zero paramater, the script uses the working diroctory.
+
+DATABASE="/tmp/double.db"
+DIR="./found_doubles"
+COUNT=0
+
+if [ $# -eq 0 ]; then
+ LOOP=false
+else
+ LOOP=true
+fi
+
+echo ""
+
+while true; do
+
+ if [ $LOOP = true ]; then
+ if [ ! -d "$1" ]; then
+ echo "Can't chdir to $1."
+ exit 1
+ else
+ echo "Changing directory to '$1'."
+ cd "$1"
+ shift
+ fi
+ else
+ echo "Working in directory '$(pwd)'."
+ fi
+
+ sqlite3 $DATABASE "CREATE TABLE files (hash TEXT UNIQUE)"
+ mkdir -p $DIR
+ TMP=0
+ for i in *; do
+ if [ -f "$i" ]; then
+ HASH=$(md5sum "$i" | awk '{ print $1 }')
+ sqlite3 $DATABASE "INSERT INTO files VALUES ('$HASH')" 2>/dev/null
+ if [ ! $? -eq 0 ]; then
+ mv -v "$i" "$DIR"
+ ((COUNT ++))
+ ((TMP ++))
+ fi
+ fi
+ done
+
+ if [ $TMP -eq 0 ]; then
+ rmdir $DIR
+ fi
+
+ if [ $LOOP = true ]; then
+ sqlite3 $DATABASE "DROP TABLE files"
+ echo ""
+ else
+ break
+ fi
+
+ if [ $# -eq 0 ]; then
+ break;
+ fi
+done
+
+echo "Found and moved $COUNT files to $DIR."
+rm -f $DATABASE