feat(user/transcribe_whisper.sh):Add mp3 transcription script
[BK-2020-03.git] / user / transcribe_whisper.sh
CommitLineData
ddcb78b7
SBS
1#!/bin/bash
2# Usage: ./transcribe_whisper.sh 3
3# Input: arg1 CUDA graphics card number (zero-indexed)
4# Version: 0.0.1
5# Depends: whisper ( https://github.com/openai/whisper )
6
7yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
8die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
9must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
10checkInt() {
11 # Desc: Checks if arg is integer
12 # Usage: checkInt arg
13 # Input: arg: integer
14 # Output: - return code 0 (if arg is integer)
15 # - return code 1 (if arg is not integer)
16 # Example: if ! checkInt $arg; then echo "not int"; fi;
17 # Version: 0.0.2
18 local returnState
19
20 #===Process Arg===
21 if [[ $# -ne 1 ]]; then
22 die "ERROR:Invalid number of arguments:$#";
23 fi;
24
25 RETEST1='^[0-9]+$'; # Regular Expression to test
26 if [[ ! "$1" =~ $RETEST1 ]] ; then
27 returnState="false";
28 else
29 returnState="true";
30 fi;
31
32 #===Determine function return code===
33 if [ "$returnState" = "true" ]; then
34 return 0;
35 else
36 return 1;
37 fi;
38} # Checks if arg is integer
39main() {
40 cuda_num="$1";
41 if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi;
42 while read -r line; do
43 echo "STATUS:Processing:$line" 1>&2;
44 SECONDS=0;
45 dir_out="$(dirname "$line"; )";
46 ftmp="$line".tmp;
47 #declare -p line dir_out ftmp; # debug
48 if [[ ! -f "$ftmp" ]] && \
49 [[ ! -f "${line%.*}".srt ]] && \
50 [[ ! -f "${line%.*}".vtt ]] && \
51 [[ ! -f "${line%.*}".txt ]] && \
52 [[ ! -f "${line%.*}".tsv ]] && \
53 [[ ! -f "${line%.*}".json ]]; then
54 touch "$ftmp";
55 yell "STATUS:No conflicts detected.";
56 else
57 yell "STATUS:Skipping:$line";
58 continue;
59 fi;
60 whisper "$line" \
61 --model large-v3 \
62 --output_format all \
63 --output_dir "$dir_out" \
64 --language en \
65 --device cuda:"$cuda_num" && \
66 (
67 echo "STATUS:$SECONDS:Finished:$line" 1>&2;
68 rm "$ftmp"; # remove .tmp file
69 );
70 done < <(find . -type f -name "*.mp3" | shuf )
71}; # main program
72
73main "$@";
74
75