| 1 | #!/bin/bash |
| 2 | # Desc: Runs OpenAI Whisper on working directory media files |
| 3 | # Usage: ./transcribe_whisper.sh [dir] 3 |
| 4 | # Input: arg1 input dir |
| 5 | # arg2 CUDA graphics card number (zero-indexed) |
| 6 | # Version: 0.4.0 |
| 7 | # Depends: whisper ( https://github.com/openai/whisper ) |
| 8 | |
| 9 | # Find settings |
| 10 | firegex=".+\(aac\|aif\|aiff\|flac\|m4a\|m4b\|mkv\|mp3\|mp4\|ogg\|opus\|wav\)$"; # update according to `find . -type f | grep -Eo "\.([[:alnum:]])+$" | sort -u` |
| 11 | fsize="10k"; # default: minimum "10k" |
| 12 | fdepth="10"; # find depth |
| 13 | |
| 14 | yell() { echo "$0: $*" >&2; } # print script path and all args to stderr |
| 15 | die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status |
| 16 | must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails |
| 17 | checkInt() { |
| 18 | # Desc: Checks if arg is integer |
| 19 | # Usage: checkInt arg |
| 20 | # Input: arg: integer |
| 21 | # Output: - return code 0 (if arg is integer) |
| 22 | # - return code 1 (if arg is not integer) |
| 23 | # Example: if ! checkInt $arg; then echo "not int"; fi; |
| 24 | # Version: 0.0.2 |
| 25 | local returnState |
| 26 | |
| 27 | #===Process Arg=== |
| 28 | if [[ $# -ne 1 ]]; then |
| 29 | die "ERROR:Invalid number of arguments:$#"; |
| 30 | fi; |
| 31 | |
| 32 | RETEST1='^[0-9]+$'; # Regular Expression to test |
| 33 | if [[ ! "$1" =~ $RETEST1 ]] ; then |
| 34 | returnState="false"; |
| 35 | else |
| 36 | returnState="true"; |
| 37 | fi; |
| 38 | |
| 39 | #===Determine function return code=== |
| 40 | if [ "$returnState" = "true" ]; then |
| 41 | return 0; |
| 42 | else |
| 43 | return 1; |
| 44 | fi; |
| 45 | } # Checks if arg is integer |
| 46 | find_flist() { |
| 47 | # Desc: print file list to stdout via `find` using script parameters |
| 48 | # Input: arg1: path to dir |
| 49 | # var: fdepth find depth |
| 50 | # var: firegex pattern find iregex |
| 51 | # var: fsize find size |
| 52 | if [[ ! -d "$1" ]]; then return 1; fi; |
| 53 | must find "$1" -maxdepth "$fdepth" -type f -iregex "$firegex" -size +"$fsize"; |
| 54 | }; # print file list to stdout from dir with script parameters |
| 55 | main() { |
| 56 | # Input: arg1: dir_in input dir |
| 57 | # arg2: cuda_num cuda GPU index |
| 58 | # var: fdepth (find_flist) find depth |
| 59 | # var: firegex (find_flist) pattern find iregex |
| 60 | # var: fsize (find_flist) find size |
| 61 | dir_in="$1"; |
| 62 | cuda_num="$2"; |
| 63 | if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi; |
| 64 | while read -r line; do |
| 65 | echo "STATUS:Processing:$line" 1>&2; |
| 66 | SECONDS=0; |
| 67 | dir_out="$(dirname "$line"; )"; |
| 68 | ftmp="$line".tmp; |
| 69 | #declare -p line dir_out ftmp; # debug |
| 70 | if [[ ! -f "$ftmp" ]] && \ |
| 71 | [[ ! -f "${line%.*}".srt ]] && \ |
| 72 | [[ ! -f "${line%.*}".vtt ]] && \ |
| 73 | [[ ! -f "${line%.*}".txt ]] && \ |
| 74 | [[ ! -f "${line%.*}".tsv ]] && \ |
| 75 | [[ ! -f "${line%.*}".json ]]; then |
| 76 | touch "$ftmp"; |
| 77 | yell "STATUS:No conflicts detected."; |
| 78 | else |
| 79 | yell "STATUS:Skipping:$line"; |
| 80 | continue; |
| 81 | fi; |
| 82 | whisper "$line" \ |
| 83 | --model large-v3 \ |
| 84 | --output_format all \ |
| 85 | --output_dir "$dir_out" \ |
| 86 | --language en \ |
| 87 | --device cuda:"$cuda_num" && \ |
| 88 | ( |
| 89 | echo "STATUS:$SECONDS:Finished:$line" 1>&2; |
| 90 | rm "$ftmp"; # remove .tmp file |
| 91 | ); |
| 92 | done < <(find_flist "$dir_in" | shuf); |
| 93 | }; # main program |
| 94 | export -f yell die must find_flist; |
| 95 | |
| 96 | main "$@"; |
| 97 | |
| 98 | |