feat(user/transcribe_whisper.sh):Add mp3 transcription script
[BK-2020-03.git] / user / transcribe_whisper.sh
diff --git a/user/transcribe_whisper.sh b/user/transcribe_whisper.sh
new file mode 100644 (file)
index 0000000..a6fdad5
--- /dev/null
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Usage: ./transcribe_whisper.sh 3
+# Input: arg1  CUDA graphics card number (zero-indexed)
+# Version: 0.0.1
+# Depends: whisper ( https://github.com/openai/whisper )
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+checkInt() {
+    # Desc: Checks if arg is integer
+    # Usage: checkInt arg
+    # Input: arg: integer
+    # Output: - return code 0 (if arg is integer)
+    #         - return code 1 (if arg is not integer)
+    # Example: if ! checkInt $arg; then echo "not int"; fi;
+    # Version: 0.0.2
+    local returnState
+
+    #===Process Arg===
+    if [[ $# -ne 1 ]]; then
+       die "ERROR:Invalid number of arguments:$#";
+    fi;
+    
+    RETEST1='^[0-9]+$'; # Regular Expression to test
+    if [[ ! "$1" =~ $RETEST1 ]] ; then
+       returnState="false";
+    else
+       returnState="true";
+    fi;
+
+    #===Determine function return code===
+    if [ "$returnState" = "true" ]; then
+       return 0;
+    else
+       return 1;
+    fi;
+} # Checks if arg is integer
+main() {
+    cuda_num="$1";
+    if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi;
+    while read -r line; do
+        echo "STATUS:Processing:$line" 1>&2;
+        SECONDS=0;        
+        dir_out="$(dirname "$line"; )";
+        ftmp="$line".tmp;
+        #declare -p line dir_out ftmp; # debug
+        if [[ ! -f "$ftmp" ]] && \
+               [[ ! -f "${line%.*}".srt ]] && \
+               [[ ! -f "${line%.*}".vtt ]] && \
+               [[ ! -f "${line%.*}".txt ]] && \
+               [[ ! -f "${line%.*}".tsv ]] && \
+               [[ ! -f "${line%.*}".json ]]; then
+            touch "$ftmp";
+            yell "STATUS:No conflicts detected.";
+        else
+            yell "STATUS:Skipping:$line";
+            continue;
+        fi;
+        whisper "$line" \
+                --model large-v3 \
+                --output_format all \
+                --output_dir "$dir_out" \
+                --language en \
+                --device cuda:"$cuda_num" && \
+            (
+                echo "STATUS:$SECONDS:Finished:$line" 1>&2;
+                rm "$ftmp"; # remove .tmp file
+            );
+    done < <(find . -type f -name "*.mp3" | shuf )
+}; # main program
+
+main "$@";
+
+