From f9779d87f3ff8a10737c3032c8594fce56761379 Mon Sep 17 00:00:00 2001 From: Steven Baltakatei Sandoval Date: Mon, 20 Feb 2023 12:39:14 +0000 Subject: [PATCH] feat(user/get_ytpljson.sh):Add script to get YT playlist metadata - Note: Requests YouTube playlist metadata in JSON format from `https://www.googleapis.com`. --- doc/user/get_ytpljson.sh.org | 36 +++++++++ user/get_ytpljson.sh | 140 +++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+) create mode 100644 doc/user/get_ytpljson.sh.org create mode 100755 user/get_ytpljson.sh diff --git a/doc/user/get_ytpljson.sh.org b/doc/user/get_ytpljson.sh.org new file mode 100644 index 0000000..5136424 --- /dev/null +++ b/doc/user/get_ytpljson.sh.org @@ -0,0 +1,36 @@ +* get_ytpljson.sh Information + +#+TITLE: get_ytpljson.sh Information +#+AUTHOR: Steven Baltakatei Sandoval +#+DATE:2023-02-20 +#+EMAIL:baltakatei@gmail.com +#+LANGUAGE: en +#+OPTIONS: toc:nil + +Created by [[https://baltakatei.com][Steven Baltakatei Sandoval]] on +2023-02-20T12:38+00 +under a [[https://creativecommons.org/licenses/by-sa/4.0/][CC BY-SA 4.0]] (🅭🅯🄎4.0) license and last updated on +2023-02-20T12:38+00. + +** Summary +This script downloads and saves YouTube playlist metadata as a JSON +file in the current working directory. + +** Versions +| Version | Description | +|---------+-------------------------------------------------| +| 0.0.1 | Initial version compatible with YouTUbe API v3. | +| | | + +** Background +Google provides an API for downloading playlist metadata in resposne +to receiving a correctly formatted URL containing the playlist ID and +an API key. + +** References +- "Obtaining authorization credentials" + https://developers.google.com/youtube/registering_an_application +- "Implementation: Playlists" + https://developers.google.com/youtube/v3/guides/implementation/playlists +- "Implementation: Pagination", + https://developers.google.com/youtube/v3/guides/implementation/pagination diff --git a/user/get_ytpljson.sh b/user/get_ytpljson.sh new file mode 100755 index 0000000..4f72265 --- /dev/null +++ b/user/get_ytpljson.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# Usage: get_ytpljson.sh arg1 arg2 +# Input: posargs: arg1: YouTube playlist ID +# arg2: Google API key +# Output: file: JSON file +# Version: 0.0.1 + +max_api_calls="100"; + +yell() { echo "$0: $*" >&2; } # print script path and all args to stderr +die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +get_response() { + # Input: arg1: YouTube playlist ID + # arg2: Google API key + # arg3: pageToken (optional) + # Output: stdout: JSON response from googleapis.com + # Depends: curl 7.81.0 + # BK-2020-03: die() + + local PLAYLIST_ID API_KEY PAGE_TOKEN URL; + + # Set the playlist ID and API key + PLAYLIST_ID="$1"; + API_KEY="$2"; + PAGE_TOKEN="$3"; + + # Check inputs + if [[ $# -lt 2 ]]; then die "FATAL:Incorrect arg count:$#"; fi; + + # Base URL + URL="https://www.googleapis.com/youtube/v3/playlistItems?part=snippet"; + + # Append playlist ID + URL="$URL""&playlistId=""$PLAYLIST_ID"; + + # Append API key + URL="$URL""&key=""$API_KEY"; + + # Append page token if it exists + if [[ -n "$PAGE_TOKEN" ]]; then + URL="$URL""&pageToken=""$PAGE_TOKEN"; + fi; + + curl -s "$URL"; + #curl -s "https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId=$PLAYLIST_ID&key=$API_KEY"; # example + +}; # Stdout: JSON from YouTube v3 API +check_next_page() { + # Input: arg1: json string + # Depends: jq 1.6 + + # Checks if key ".nextPageToken" present + if jq -e '.nextPageToken' < <(printf "%s" "$1") 1>/dev/random 2>&1; then + return 0; + else + return 1; + fi; + +}; # returns true if '.nextPageToken' present +get_next_page() { + # Input: arg1: json string containing the key 'nextPageToken' + # Output: stdout: the value of the first 'nextPageToken' key + # exit code: 0: key '.nextPageToken' detected + # 1: key '.nextPageToken' not detected + # Depends: jq 1.6 + local output; + if [[ $# -ne 1 ]]; then die "Incorrect arg count:$#"; fi; + + if jq -e '.nextPageToken' < <(printf "%s" "$1") 1>/dev/random 2>&1; then + output="$(jq -r '.nextPageToken' < <(printf "%s" "$1") | head -n1)"; + printf "%s" "$output"; + return 0; + else + return 1; + fi; +}; # stdout: value from key "pageToken" + +main() { + # Depends: bash 5.1.16, GNU Coreutils 8.32 (date) + # BK-2020-03: yell() + # Ref/Attrib: [1]: "Obtaining authorization credentials" https://developers.google.com/youtube/registering_an_application + # [2]: "Implementation: Playlists" https://developers.google.com/youtube/v3/guides/implementation/playlists + # [3]: "Implementation: Pagination", https://developers.google.com/youtube/v3/guides/implementation/pagination + + local n out_path; + declare -a out_list; + + # Check input + if [[ $# -ne 2 ]]; then die "FATAL:Incorrect number of args:$#"; fi; + + # Set the playlist ID and API key + playlistId="$1"; # See ref [2] + apiKey="$2"; # See ref [1] + + # Set dynamic variables according to environment + out_dir="$(pwd)"; # output to present working directory + out_filename="$(date +%Y%m%dT%H%M%S%z)"_"$playlistId"..playlist_items.json; + out_path="$out_dir"/"$out_filename"; + + # Make initial curl request to the YouTube Data API + response="$(get_response "$playlistId" "$apiKey")"; + + # # debug + # if check_next_page "$response"; then + # yell "DEBUG:nextPageToken detected"; + # fi; + + # Make follow-up requests. See ref [3] + n=0; + while check_next_page "$response"; do + # Get page token from response + pageToken="$(get_next_page "$response")"; + # Update response + response="$(get_response "$playlistId" "$apiKey" "$pageToken")"; + # Record response + out_list+=("$response"); + + # Sanity check + if [[ $n -gt $max_api_calls ]]; then die "FATAL:Too many API calls:$n"; fi; + ((n++)); + done; + + # Write results + printf "%s\n" "${out_list[@]}" > "$out_path"; + + # Print stats + yell "STATUS:Performed $n API calls." + out_lc="$(printf "%s\n" "${out_list[@]}" | wc -l)"; + yell "STATUS:Wrote $out_lc lines to $out_path"; + + # Use jq to extract the publishedAt field for each playlist item + #PUBLISHED_AT=$(echo "$response" | jq -r '.items[].snippet.publishedAt') + +}; # main program + +main "$@"; + +# Author: Steven Baltakatei Sandoval +# License: GPLv3+ -- 2.30.2