feat(user/get_ytpljson.sh):Add script to get YT playlist metadata
authorSteven Baltakatei Sandoval <baltakatei@gmail.com>
Mon, 20 Feb 2023 12:39:14 +0000 (12:39 +0000)
committerSteven Baltakatei Sandoval <baltakatei@gmail.com>
Mon, 20 Feb 2023 12:39:14 +0000 (12:39 +0000)
- Note: Requests YouTube playlist metadata in JSON format from
  `https://www.googleapis.com`.

doc/user/get_ytpljson.sh.org [new file with mode: 0644]
user/get_ytpljson.sh [new file with mode: 0755]

diff --git a/doc/user/get_ytpljson.sh.org b/doc/user/get_ytpljson.sh.org
new file mode 100644 (file)
index 0000000..5136424
--- /dev/null
@@ -0,0 +1,36 @@
+* get_ytpljson.sh Information
+
+#+TITLE: get_ytpljson.sh Information
+#+AUTHOR: Steven Baltakatei Sandoval
+#+DATE:2023-02-20
+#+EMAIL:baltakatei@gmail.com
+#+LANGUAGE: en
+#+OPTIONS: toc:nil
+
+Created by [[https://baltakatei.com][Steven Baltakatei Sandoval]] on
+2023-02-20T12:38+00
+under a [[https://creativecommons.org/licenses/by-sa/4.0/][CC BY-SA 4.0]] (🅭🅯🄎4.0) license and last updated on
+2023-02-20T12:38+00.
+
+** Summary
+This script downloads and saves YouTube playlist metadata as a JSON
+file in the current working directory.
+
+** Versions
+| Version | Description                                     |
+|---------+-------------------------------------------------|
+|   0.0.1 | Initial version compatible with YouTUbe API v3. |
+|         |                                                 |
+
+** Background
+Google provides an API for downloading playlist metadata in resposne
+to receiving a correctly formatted URL containing the playlist ID and
+an API key.
+
+** References
+- "Obtaining authorization credentials"
+  https://developers.google.com/youtube/registering_an_application
+- "Implementation: Playlists"
+  https://developers.google.com/youtube/v3/guides/implementation/playlists
+- "Implementation: Pagination",
+  https://developers.google.com/youtube/v3/guides/implementation/pagination
diff --git a/user/get_ytpljson.sh b/user/get_ytpljson.sh
new file mode 100755 (executable)
index 0000000..4f72265
--- /dev/null
@@ -0,0 +1,140 @@
+#!/bin/bash
+# Usage: get_ytpljson.sh arg1 arg2
+# Input: posargs: arg1: YouTube playlist ID
+#                 arg2: Google API key
+# Output: file: JSON file
+# Version: 0.0.1
+
+max_api_calls="100";
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+get_response() {
+    # Input: arg1: YouTube playlist ID
+    #        arg2: Google API key
+    #        arg3: pageToken (optional)
+    # Output: stdout: JSON response from googleapis.com
+    # Depends: curl 7.81.0
+    #          BK-2020-03: die()
+    
+    local PLAYLIST_ID API_KEY PAGE_TOKEN URL;
+
+    # Set the playlist ID and API key
+    PLAYLIST_ID="$1";
+    API_KEY="$2";
+    PAGE_TOKEN="$3";
+
+    # Check inputs
+    if [[ $# -lt 2 ]]; then die "FATAL:Incorrect arg count:$#"; fi;
+    
+    # Base URL
+    URL="https://www.googleapis.com/youtube/v3/playlistItems?part=snippet";
+
+    # Append playlist ID
+    URL="$URL""&playlistId=""$PLAYLIST_ID";
+
+    # Append API key
+    URL="$URL""&key=""$API_KEY";
+
+    # Append page token if it exists
+    if [[ -n "$PAGE_TOKEN" ]]; then
+        URL="$URL""&pageToken=""$PAGE_TOKEN";
+    fi;
+    
+    curl -s "$URL";
+    #curl -s "https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId=$PLAYLIST_ID&key=$API_KEY"; # example
+    
+}; # Stdout: JSON from YouTube v3 API
+check_next_page() {
+    # Input: arg1: json string
+    # Depends: jq 1.6
+
+    # Checks if key ".nextPageToken" present
+    if jq -e '.nextPageToken' < <(printf "%s" "$1") 1>/dev/random 2>&1; then
+        return 0;
+    else
+        return 1;
+    fi;
+
+}; # returns true if '.nextPageToken' present
+get_next_page() {
+    # Input: arg1: json string containing the key 'nextPageToken'
+    # Output: stdout: the value of the first 'nextPageToken' key
+    #         exit code: 0: key '.nextPageToken' detected
+    #                    1: key '.nextPageToken' not detected
+    # Depends: jq 1.6
+    local output;
+    if [[ $# -ne 1 ]]; then die "Incorrect arg count:$#"; fi;
+
+    if jq -e '.nextPageToken' < <(printf "%s" "$1") 1>/dev/random 2>&1; then
+        output="$(jq -r '.nextPageToken' < <(printf "%s" "$1") | head -n1)";
+        printf "%s" "$output";
+        return 0;
+    else
+        return 1;
+    fi;
+}; # stdout: value from key "pageToken"
+
+main() {
+    # Depends: bash 5.1.16, GNU Coreutils 8.32 (date)
+    #          BK-2020-03: yell()
+    # Ref/Attrib: [1]: "Obtaining authorization credentials" https://developers.google.com/youtube/registering_an_application
+    #             [2]: "Implementation: Playlists" https://developers.google.com/youtube/v3/guides/implementation/playlists
+    #             [3]: "Implementation: Pagination", https://developers.google.com/youtube/v3/guides/implementation/pagination
+    
+    local n out_path;
+    declare -a out_list;
+    
+    # Check input
+    if [[ $# -ne 2 ]]; then die "FATAL:Incorrect number of args:$#"; fi;
+
+    # Set the playlist ID and API key
+    playlistId="$1"; # See ref [2]
+    apiKey="$2"; # See ref [1]
+
+    # Set dynamic variables according to environment
+    out_dir="$(pwd)"; # output to present working directory
+    out_filename="$(date +%Y%m%dT%H%M%S%z)"_"$playlistId"..playlist_items.json;
+    out_path="$out_dir"/"$out_filename";
+    
+    # Make initial curl request to the YouTube Data API
+    response="$(get_response "$playlistId" "$apiKey")";
+
+    # # debug
+    # if check_next_page "$response"; then
+    #     yell "DEBUG:nextPageToken detected";
+    # fi;
+
+    # Make follow-up requests. See ref [3]
+    n=0;
+    while check_next_page "$response"; do        
+        # Get page token from response
+        pageToken="$(get_next_page "$response")";
+        # Update response
+        response="$(get_response "$playlistId" "$apiKey" "$pageToken")";
+        # Record response
+        out_list+=("$response");
+        
+        # Sanity check
+        if [[ $n -gt $max_api_calls ]]; then die "FATAL:Too many API calls:$n"; fi;
+        ((n++));
+    done;
+
+    # Write results
+    printf "%s\n" "${out_list[@]}" > "$out_path";
+
+    # Print stats
+    yell "STATUS:Performed $n API calls."
+    out_lc="$(printf "%s\n" "${out_list[@]}" | wc -l)";
+    yell "STATUS:Wrote $out_lc lines to $out_path";
+    
+    # Use jq to extract the publishedAt field for each playlist item
+    #PUBLISHED_AT=$(echo "$response" | jq -r '.items[].snippet.publishedAt')
+
+}; # main program
+
+main "$@";
+
+# Author: Steven Baltakatei Sandoval
+# License: GPLv3+