feat(user/transcribe_whisper.sh):Apply to various media files
[BK-2020-03.git] / user / transcribe_whisper.sh
CommitLineData
ddcb78b7 1#!/bin/bash
1deac7e5 2# Desc: Runs OpenAI Whisper on working directory media files
ddcb78b7
SBS
3# Usage: ./transcribe_whisper.sh 3
4# Input: arg1 CUDA graphics card number (zero-indexed)
1deac7e5 5# Version: 0.1.0
ddcb78b7
SBS
6# Depends: whisper ( https://github.com/openai/whisper )
7
1deac7e5
SBS
8# Find settings
9firegex=".+\(aac\|aif\|aiff\|flac\|m4a\|m4b\|mkv\|mp3\|mp4\|ogg\|opus\|wav\)$"; # update according to `find . -type f | grep -Eo "\.([[:alnum:]])+$" | sort -u`
10fsize="10k"; # default: minimum "10k"
11fdepth="10"; # find depth
12
ddcb78b7
SBS
13yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
14die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
15must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
16checkInt() {
17 # Desc: Checks if arg is integer
18 # Usage: checkInt arg
19 # Input: arg: integer
20 # Output: - return code 0 (if arg is integer)
21 # - return code 1 (if arg is not integer)
22 # Example: if ! checkInt $arg; then echo "not int"; fi;
23 # Version: 0.0.2
24 local returnState
25
26 #===Process Arg===
27 if [[ $# -ne 1 ]]; then
28 die "ERROR:Invalid number of arguments:$#";
29 fi;
30
31 RETEST1='^[0-9]+$'; # Regular Expression to test
32 if [[ ! "$1" =~ $RETEST1 ]] ; then
33 returnState="false";
34 else
35 returnState="true";
36 fi;
37
38 #===Determine function return code===
39 if [ "$returnState" = "true" ]; then
40 return 0;
41 else
42 return 1;
43 fi;
44} # Checks if arg is integer
1deac7e5
SBS
45find_flist() {
46 # Desc: print file list to stdout via `find` using script parameters
47 # Input: arg1: path to dir
48 # var: fdepth find depth
49 # var: firegex pattern find iregex
50 # var: fsize find size
51 if [[ ! -d "$1" ]]; then return 1; fi;
52 must find "$1" -maxdepth "$fdepth" -type f -iregex "$firegex" -size +"$fsize";
53}; # print file list to stdout from dir with script parameters
ddcb78b7 54main() {
1deac7e5
SBS
55 # Input: var: fdepth (find_flist) find depth
56 # var: firegex (find_flist) pattern find iregex
57 # var: fsize (find_flist) find size
ddcb78b7 58 cuda_num="$1";
1deac7e5 59 dir_in="$(pwd)";
ddcb78b7
SBS
60 if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi;
61 while read -r line; do
62 echo "STATUS:Processing:$line" 1>&2;
63 SECONDS=0;
64 dir_out="$(dirname "$line"; )";
65 ftmp="$line".tmp;
66 #declare -p line dir_out ftmp; # debug
67 if [[ ! -f "$ftmp" ]] && \
68 [[ ! -f "${line%.*}".srt ]] && \
69 [[ ! -f "${line%.*}".vtt ]] && \
70 [[ ! -f "${line%.*}".txt ]] && \
71 [[ ! -f "${line%.*}".tsv ]] && \
72 [[ ! -f "${line%.*}".json ]]; then
73 touch "$ftmp";
74 yell "STATUS:No conflicts detected.";
75 else
76 yell "STATUS:Skipping:$line";
77 continue;
78 fi;
79 whisper "$line" \
80 --model large-v3 \
81 --output_format all \
82 --output_dir "$dir_out" \
83 --language en \
84 --device cuda:"$cuda_num" && \
85 (
86 echo "STATUS:$SECONDS:Finished:$line" 1>&2;
87 rm "$ftmp"; # remove .tmp file
88 );
1deac7e5 89 done < <(find_flist "$dir_in");
ddcb78b7 90}; # main program
1deac7e5 91export -f yell die must find_flist;
ddcb78b7
SBS
92
93main "$@";
94
95