Commit | Line | Data |
---|---|---|
ddcb78b7 | 1 | #!/bin/bash |
1deac7e5 | 2 | # Desc: Runs OpenAI Whisper on working directory media files |
93895460 SBS |
3 | # Usage: ./transcribe_whisper.sh [dir] 3 |
4 | # Input: arg1 input dir | |
5 | # arg2 CUDA graphics card number (zero-indexed) | |
6801e454 | 6 | # Version: 0.4.0 |
ddcb78b7 SBS |
7 | # Depends: whisper ( https://github.com/openai/whisper ) |
8 | ||
1deac7e5 SBS |
9 | # Find settings |
10 | firegex=".+\(aac\|aif\|aiff\|flac\|m4a\|m4b\|mkv\|mp3\|mp4\|ogg\|opus\|wav\)$"; # update according to `find . -type f | grep -Eo "\.([[:alnum:]])+$" | sort -u` | |
11 | fsize="10k"; # default: minimum "10k" | |
12 | fdepth="10"; # find depth | |
13 | ||
ddcb78b7 SBS |
14 | yell() { echo "$0: $*" >&2; } # print script path and all args to stderr |
15 | die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status | |
16 | must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails | |
17 | checkInt() { | |
18 | # Desc: Checks if arg is integer | |
19 | # Usage: checkInt arg | |
20 | # Input: arg: integer | |
21 | # Output: - return code 0 (if arg is integer) | |
22 | # - return code 1 (if arg is not integer) | |
23 | # Example: if ! checkInt $arg; then echo "not int"; fi; | |
24 | # Version: 0.0.2 | |
25 | local returnState | |
26 | ||
27 | #===Process Arg=== | |
28 | if [[ $# -ne 1 ]]; then | |
29 | die "ERROR:Invalid number of arguments:$#"; | |
30 | fi; | |
31 | ||
32 | RETEST1='^[0-9]+$'; # Regular Expression to test | |
33 | if [[ ! "$1" =~ $RETEST1 ]] ; then | |
34 | returnState="false"; | |
35 | else | |
36 | returnState="true"; | |
37 | fi; | |
38 | ||
39 | #===Determine function return code=== | |
40 | if [ "$returnState" = "true" ]; then | |
41 | return 0; | |
42 | else | |
43 | return 1; | |
44 | fi; | |
45 | } # Checks if arg is integer | |
1deac7e5 SBS |
46 | find_flist() { |
47 | # Desc: print file list to stdout via `find` using script parameters | |
48 | # Input: arg1: path to dir | |
49 | # var: fdepth find depth | |
50 | # var: firegex pattern find iregex | |
51 | # var: fsize find size | |
52 | if [[ ! -d "$1" ]]; then return 1; fi; | |
53 | must find "$1" -maxdepth "$fdepth" -type f -iregex "$firegex" -size +"$fsize"; | |
54 | }; # print file list to stdout from dir with script parameters | |
ddcb78b7 | 55 | main() { |
93895460 | 56 | # Input: arg1: dir_in input dir |
6801e454 | 57 | # arg2: cuda_num cuda GPU index |
93895460 SBS |
58 | # var: fdepth (find_flist) find depth |
59 | # var: firegex (find_flist) pattern find iregex | |
60 | # var: fsize (find_flist) find size | |
61 | dir_in="$1"; | |
62 | cuda_num="$2"; | |
ddcb78b7 SBS |
63 | if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi; |
64 | while read -r line; do | |
65 | echo "STATUS:Processing:$line" 1>&2; | |
66 | SECONDS=0; | |
67 | dir_out="$(dirname "$line"; )"; | |
68 | ftmp="$line".tmp; | |
69 | #declare -p line dir_out ftmp; # debug | |
70 | if [[ ! -f "$ftmp" ]] && \ | |
71 | [[ ! -f "${line%.*}".srt ]] && \ | |
72 | [[ ! -f "${line%.*}".vtt ]] && \ | |
73 | [[ ! -f "${line%.*}".txt ]] && \ | |
74 | [[ ! -f "${line%.*}".tsv ]] && \ | |
75 | [[ ! -f "${line%.*}".json ]]; then | |
76 | touch "$ftmp"; | |
77 | yell "STATUS:No conflicts detected."; | |
78 | else | |
79 | yell "STATUS:Skipping:$line"; | |
80 | continue; | |
81 | fi; | |
82 | whisper "$line" \ | |
83 | --model large-v3 \ | |
84 | --output_format all \ | |
85 | --output_dir "$dir_out" \ | |
86 | --language en \ | |
87 | --device cuda:"$cuda_num" && \ | |
88 | ( | |
89 | echo "STATUS:$SECONDS:Finished:$line" 1>&2; | |
90 | rm "$ftmp"; # remove .tmp file | |
91 | ); | |
6801e454 | 92 | done < <(find_flist "$dir_in" | shuf); |
ddcb78b7 | 93 | }; # main program |
1deac7e5 | 94 | export -f yell die must find_flist; |
ddcb78b7 SBS |
95 | |
96 | main "$@"; | |
97 | ||
98 |