From: Steven Baltakatei Sandoval Date: Mon, 29 Apr 2024 22:26:56 +0000 (+0000) Subject: feat(user/transcribe_whisper.sh):Add mp3 transcription script X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/commitdiff_plain/ddcb78b71ab49dc48d484a63bba85a4a827b71cd?ds=inline;hp=82fdec41fbecfe3fa12acc4d3cdf38d5c88a5bad feat(user/transcribe_whisper.sh):Add mp3 transcription script --- diff --git a/user/transcribe_whisper.sh b/user/transcribe_whisper.sh new file mode 100644 index 0000000..a6fdad5 --- /dev/null +++ b/user/transcribe_whisper.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Usage: ./transcribe_whisper.sh 3 +# Input: arg1 CUDA graphics card number (zero-indexed) +# Version: 0.0.1 +# Depends: whisper ( https://github.com/openai/whisper ) + +yell() { echo "$0: $*" >&2; } # print script path and all args to stderr +die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +checkInt() { + # Desc: Checks if arg is integer + # Usage: checkInt arg + # Input: arg: integer + # Output: - return code 0 (if arg is integer) + # - return code 1 (if arg is not integer) + # Example: if ! checkInt $arg; then echo "not int"; fi; + # Version: 0.0.2 + local returnState + + #===Process Arg=== + if [[ $# -ne 1 ]]; then + die "ERROR:Invalid number of arguments:$#"; + fi; + + RETEST1='^[0-9]+$'; # Regular Expression to test + if [[ ! "$1" =~ $RETEST1 ]] ; then + returnState="false"; + else + returnState="true"; + fi; + + #===Determine function return code=== + if [ "$returnState" = "true" ]; then + return 0; + else + return 1; + fi; +} # Checks if arg is integer +main() { + cuda_num="$1"; + if ! checkInt "$cuda_num"; then die "FATAL:No graphics card selected."; fi; + while read -r line; do + echo "STATUS:Processing:$line" 1>&2; + SECONDS=0; + dir_out="$(dirname "$line"; )"; + ftmp="$line".tmp; + #declare -p line dir_out ftmp; # debug + if [[ ! -f "$ftmp" ]] && \ + [[ ! -f "${line%.*}".srt ]] && \ + [[ ! -f "${line%.*}".vtt ]] && \ + [[ ! -f "${line%.*}".txt ]] && \ + [[ ! -f "${line%.*}".tsv ]] && \ + [[ ! -f "${line%.*}".json ]]; then + touch "$ftmp"; + yell "STATUS:No conflicts detected."; + else + yell "STATUS:Skipping:$line"; + continue; + fi; + whisper "$line" \ + --model large-v3 \ + --output_format all \ + --output_dir "$dir_out" \ + --language en \ + --device cuda:"$cuda_num" && \ + ( + echo "STATUS:$SECONDS:Finished:$line" 1>&2; + rm "$ftmp"; # remove .tmp file + ); + done < <(find . -type f -name "*.mp3" | shuf ) +}; # main program + +main "$@"; + +