]>
 
 
zdv2.bktei.com Git - BK-2020-03.git/blob - unitproc/bkfind 
 
 
 
 
 
 
 
   3  # Date: 2020-01-20T17:08Z  
   5  # Author: Steven Baltakatei Sandoval (baltakatei.com)  
   7  # License: This bash script, `bkfind`, is licensed under GPLv3 or  
   8  # later by Steven Baltakatei Sandoval:  
  10  #    `bkfind`, a duplicate file finder  
  11  #    Copyright (C) 2020  Steven Baltakatei Sandoval (baltakatei.com)  
  13  #    This program is free software: you can redistribute it and/or modify  
  14  #    it under the terms of the GNU General Public License as published by  
  15  #    the Free Software Foundation, either version 3 of the License, or  
  18  #    This program is distributed in the hope that it will be useful,  
  19  #    but WITHOUT ANY WARRANTY; without even the implied warranty of  
  20  #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
  21  #    GNU General Public License for more details.  
  23  #    A copy of the GNU General Public License may be found at  
  24  #    <https://www.gnu.org/licenses/>.  
  26  # Description: This is a script that searches a specified directory  
  27  # for files with a file name containing a specified string. It works  
  30  #   - Search specified directory tree for files that have filenames  
  31  #     that contain the specified file's filename. List groups of files  
  32  #     sharing the same hash first then list files with unique hashes.  
  34  # Dependencies: find, rhash, uniq, cut, cat, bash. See end of file  
  38  #   - GNU/Linux Debian 10  
  43  # Use input arguments to define internal script variables.  
  44  DIR1
= " $1 "   # Specified directory  
  45  FILE1
= " $2 "   # Specified file  
  52  RHASH_HASH_TYPE
= "sha512"  
  54  let  HASH_CHAR_LENGTH
= "512 / 4"  # The number of characters returned by the chosen hash function (ex: `rhash --sha512 {}` produces 512/4=128 hexadecimal chars)  
  56  # Strip path information from provided file name.  
  57  FILEBASE1
= $
( basename  " $FILE1 " )  
  61  # Generate list of sha512 hashes and filepaths, save to $HASHLIST1  
  62  HASHLIST1
= "$(find  $DIR1  -type f -iname " * $FILEBASE1 * " -exec rhash --" $RHASH_HASH_TYPE " {} \;)"  
  64  # Specify character position before which characters are dropped from each line with `cut`.  
  65  let  CUT_POSITION
= "1 +  $HASH_CHAR_LENGTH  -  $HASH_DISP_LENGTH "  
  67  #====Files with duplicate hashes====  
  68  # Generate sublist of duplicate entries, save to $DUPLICATES1  
  69  DUPLICATES1
= "$(echo -e " $HASHLIST1 " | sort | uniq -D --check-chars=128)"  
  71  # Format $DUPLICATES1 for readability by grouping, truncating sha512 hash; save to $DUPLICATES2  
  72  DUPLICATES2
= "$(echo -e " $DUPLICATES1 " | uniq --check-chars=128 --group | cut --characters= $CUT_POSITION -)"  
  74  #====Files with unique hashes====  
  75  # Generate sublist of unique entries, save to $UNIQUES1  
  76  UNIQUES1
= "$(echo -e " $HASHLIST1 " | sort | uniq --unique --check-chars=128)"  
  78  # Format $UNIQUES1 for readability by truncating sha512 hash; save to $UNIQUES2  
  79  UNIQUES2
= "$(echo -e " $UNIQUES1 " | cut --characters= $CUT_POSITION -)"  
  83  echo -e  " $DUPLICATES2 "  
  88  #     - find (GNU findutils) 4.6.0.225-235f  
  89  #       Copyright (C) 2019 Free Software Foundation, Inc.  
  90  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.  
  91  #       Written by Eric B. Decker, James Youngman, and Kevin Dalley.  
  94  #       License: RHash License <http://rhash.sourceforge.net/license.php>  
  96  #     - uniq (GNU coreutils) 8.30  
  97  #       Copyright (C) 2018 Free Software Foundation, Inc.  
  98  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.  
  99  #       This is free software: you are free to change and redistribute it.  
 100  #       There is NO WARRANTY, to the extent permitted by law.  
 101  #       Written by Richard M. Stallman and David MacKenzie.  
 103  #     - cut (GNU coreutils) 8.30  
 104  #       Copyright (C) 2018 Free Software Foundation, Inc.  
 105  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.  
 106  #       This is free software: you are free to change and redistribute it.  
 107  #       There is NO WARRANTY, to the extent permitted by law.  
 108  #       Written by David M. Ihnat, David MacKenzie, and Jim Meyering.  
 110  #     - cat (GNU coreutils) 8.30  
 111  #       Copyright (C) 2018 Free Software Foundation, Inc.  
 112  #       License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.  
 113  #       This is free software: you are free to change and redistribute it.  
 114  #       There is NO WARRANTY, to the extent permitted by law.  
 115  #       Written by Torbjorn Granlund and Richard M. Stallman.  
 117  #     - GNU bash, version 5.0.3(1)-release (x86_64-pc-linux-gnu)  
 118  #       Copyright (C) 2019 Free Software Foundation, Inc.  
 119  #       License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>  
 120  #       This is free software; you are free to change and redistribute it.  
 121  #       There is NO WARRANTY, to the extent permitted by law.