From f060617d9a33f5a75259edda6c22574a8d8d9df9 Mon Sep 17 00:00:00 2001 From: Horus3 Date: Sun, 4 May 2014 23:05:03 +0200 Subject: Find duplicate files and move them to a sub directory. --- bin/data_double.sh | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100755 bin/data_double.sh diff --git a/bin/data_double.sh b/bin/data_double.sh new file mode 100755 index 0000000..655aff2 --- /dev/null +++ b/bin/data_double.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Example usage, find all doubles in all sub directory +# for i in *; do if [ -d $i ]; then double.sh $i; fi; done +# +# To find duplicates in specific directory +# double.sh /path/to/dir /path/to/other/dir ./relativ/path +# +# With zero paramater, the script uses the working diroctory. + +DATABASE="/tmp/double.db" +DIR="./found_doubles" +COUNT=0 + +if [ $# -eq 0 ]; then + LOOP=false +else + LOOP=true +fi + +echo "" + +while true; do + + if [ $LOOP = true ]; then + if [ ! -d "$1" ]; then + echo "Can't chdir to $1." + exit 1 + else + echo "Changing directory to '$1'." + cd "$1" + shift + fi + else + echo "Working in directory '$(pwd)'." + fi + + sqlite3 $DATABASE "CREATE TABLE files (hash TEXT UNIQUE)" + mkdir -p $DIR + TMP=0 + for i in *; do + if [ -f "$i" ]; then + HASH=$(md5sum "$i" | awk '{ print $1 }') + sqlite3 $DATABASE "INSERT INTO files VALUES ('$HASH')" 2>/dev/null + if [ ! $? -eq 0 ]; then + mv -v "$i" "$DIR" + ((COUNT ++)) + ((TMP ++)) + fi + fi + done + + if [ $TMP -eq 0 ]; then + rmdir $DIR + fi + + if [ $LOOP = true ]; then + sqlite3 $DATABASE "DROP TABLE files" + echo "" + else + break + fi + + if [ $# -eq 0 ]; then + break; + fi +done + +echo "Found and moved $COUNT files to $DIR." +rm -f $DATABASE -- cgit v1.2.3 From 5cc26b20de7b64faf010f389f297777a365f557b Mon Sep 17 00:00:00 2001 From: Horus3 Date: Sun, 4 May 2014 23:36:14 +0200 Subject: Used to find the filesize of all duplicates found with duplicate.sh. --- bin/data_double.sh | 70 --------------------------------------------------- bin/duplicate.sh | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++ bin/duplicate_size.sh | 13 ++++++++++ 3 files changed, 83 insertions(+), 70 deletions(-) delete mode 100755 bin/data_double.sh create mode 100755 bin/duplicate.sh create mode 100755 bin/duplicate_size.sh diff --git a/bin/data_double.sh b/bin/data_double.sh deleted file mode 100755 index 655aff2..0000000 --- a/bin/data_double.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash - -# Example usage, find all doubles in all sub directory -# for i in *; do if [ -d $i ]; then double.sh $i; fi; done -# -# To find duplicates in specific directory -# double.sh /path/to/dir /path/to/other/dir ./relativ/path -# -# With zero paramater, the script uses the working diroctory. - -DATABASE="/tmp/double.db" -DIR="./found_doubles" -COUNT=0 - -if [ $# -eq 0 ]; then - LOOP=false -else - LOOP=true -fi - -echo "" - -while true; do - - if [ $LOOP = true ]; then - if [ ! -d "$1" ]; then - echo "Can't chdir to $1." - exit 1 - else - echo "Changing directory to '$1'." - cd "$1" - shift - fi - else - echo "Working in directory '$(pwd)'." - fi - - sqlite3 $DATABASE "CREATE TABLE files (hash TEXT UNIQUE)" - mkdir -p $DIR - TMP=0 - for i in *; do - if [ -f "$i" ]; then - HASH=$(md5sum "$i" | awk '{ print $1 }') - sqlite3 $DATABASE "INSERT INTO files VALUES ('$HASH')" 2>/dev/null - if [ ! $? -eq 0 ]; then - mv -v "$i" "$DIR" - ((COUNT ++)) - ((TMP ++)) - fi - fi - done - - if [ $TMP -eq 0 ]; then - rmdir $DIR - fi - - if [ $LOOP = true ]; then - sqlite3 $DATABASE "DROP TABLE files" - echo "" - else - break - fi - - if [ $# -eq 0 ]; then - break; - fi -done - -echo "Found and moved $COUNT files to $DIR." -rm -f $DATABASE diff --git a/bin/duplicate.sh b/bin/duplicate.sh new file mode 100755 index 0000000..655aff2 --- /dev/null +++ b/bin/duplicate.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Example usage, find all doubles in all sub directory +# for i in *; do if [ -d $i ]; then double.sh $i; fi; done +# +# To find duplicates in specific directory +# double.sh /path/to/dir /path/to/other/dir ./relativ/path +# +# With zero paramater, the script uses the working diroctory. + +DATABASE="/tmp/double.db" +DIR="./found_doubles" +COUNT=0 + +if [ $# -eq 0 ]; then + LOOP=false +else + LOOP=true +fi + +echo "" + +while true; do + + if [ $LOOP = true ]; then + if [ ! -d "$1" ]; then + echo "Can't chdir to $1." + exit 1 + else + echo "Changing directory to '$1'." + cd "$1" + shift + fi + else + echo "Working in directory '$(pwd)'." + fi + + sqlite3 $DATABASE "CREATE TABLE files (hash TEXT UNIQUE)" + mkdir -p $DIR + TMP=0 + for i in *; do + if [ -f "$i" ]; then + HASH=$(md5sum "$i" | awk '{ print $1 }') + sqlite3 $DATABASE "INSERT INTO files VALUES ('$HASH')" 2>/dev/null + if [ ! $? -eq 0 ]; then + mv -v "$i" "$DIR" + ((COUNT ++)) + ((TMP ++)) + fi + fi + done + + if [ $TMP -eq 0 ]; then + rmdir $DIR + fi + + if [ $LOOP = true ]; then + sqlite3 $DATABASE "DROP TABLE files" + echo "" + else + break + fi + + if [ $# -eq 0 ]; then + break; + fi +done + +echo "Found and moved $COUNT files to $DIR." +rm -f $DATABASE diff --git a/bin/duplicate_size.sh b/bin/duplicate_size.sh new file mode 100755 index 0000000..d211558 --- /dev/null +++ b/bin/duplicate_size.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +function usage() { + echo "Used to find the filesize of all duplicates found with duplicate.sh." + echo "Usage: eval \$(double_size.sh)" + exit 1 +} + +if [ $# -gt 0 ]; then + usage +fi + +echo "FILE=\$(for i in **/found_doubles/*; do du -sb \"\$i\"; done); echo \$FILE | awk '{ sum+=\$1} END { print sum }'" -- cgit v1.2.3 From 15a5425855aecf57a596372f006f4a8412701e54 Mon Sep 17 00:00:00 2001 From: Horus3 Date: Sun, 4 May 2014 23:44:00 +0200 Subject: Fixed typo in usage() func. --- bin/duplicate_size.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/duplicate_size.sh b/bin/duplicate_size.sh index d211558..7ca2bf1 100755 --- a/bin/duplicate_size.sh +++ b/bin/duplicate_size.sh @@ -2,7 +2,7 @@ function usage() { echo "Used to find the filesize of all duplicates found with duplicate.sh." - echo "Usage: eval \$(double_size.sh)" + echo "Usage: eval \$(duplicate_size.sh)" exit 1 } -- cgit v1.2.3