feat(unitproc/bkt-replace_contractions):Add Bash function
[BK-2020-03.git] / user / get_ytpljson.sh
1 #!/bin/bash
2 # Usage: get_ytpljson.sh arg1 arg2
3 # Input: posargs: arg1: YouTube playlist ID
4 # arg2: Google API key
5 # Output: file: JSON file
6 # Version: 0.0.1
7
8 max_api_calls="100";
9
10 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
11 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
12 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
13 get_response() {
14 # Input: arg1: YouTube playlist ID
15 # arg2: Google API key
16 # arg3: pageToken (optional)
17 # Output: stdout: JSON response from googleapis.com
18 # Depends: curl 7.81.0
19 # BK-2020-03: die()
20
21 local PLAYLIST_ID API_KEY PAGE_TOKEN URL;
22
23 # Set the playlist ID and API key
24 PLAYLIST_ID="$1";
25 API_KEY="$2";
26 PAGE_TOKEN="$3";
27
28 # Check inputs
29 if [[ $# -lt 2 ]]; then die "FATAL:Incorrect arg count:$#"; fi;
30
31 # Base URL
32 URL="https://www.googleapis.com/youtube/v3/playlistItems?part=snippet";
33
34 # Append playlist ID
35 URL="$URL""&playlistId=""$PLAYLIST_ID";
36
37 # Append API key
38 URL="$URL""&key=""$API_KEY";
39
40 # Append page token if it exists
41 if [[ -n "$PAGE_TOKEN" ]]; then
42 URL="$URL""&pageToken=""$PAGE_TOKEN";
43 fi;
44
45 curl -s "$URL";
46 #curl -s "https://www.googleapis.com/youtube/v3/playlistItems?part=snippet&playlistId=$PLAYLIST_ID&key=$API_KEY"; # example
47
48 }; # Stdout: JSON from YouTube v3 API
49 check_next_page() {
50 # Input: arg1: json string
51 # Depends: jq 1.6
52
53 # Checks if key ".nextPageToken" present
54 if jq -e '.nextPageToken' < <(printf "%s" "$1") 1>/dev/random 2>&1; then
55 return 0;
56 else
57 return 1;
58 fi;
59
60 }; # returns true if '.nextPageToken' present
61 get_next_page() {
62 # Input: arg1: json string containing the key 'nextPageToken'
63 # Output: stdout: the value of the first 'nextPageToken' key
64 # exit code: 0: key '.nextPageToken' detected
65 # 1: key '.nextPageToken' not detected
66 # Depends: jq 1.6
67 local output;
68 if [[ $# -ne 1 ]]; then die "Incorrect arg count:$#"; fi;
69
70 if jq -e '.nextPageToken' < <(printf "%s" "$1") 1>/dev/random 2>&1; then
71 output="$(jq -r '.nextPageToken' < <(printf "%s" "$1") | head -n1)";
72 printf "%s" "$output";
73 return 0;
74 else
75 return 1;
76 fi;
77 }; # stdout: value from key "pageToken"
78
79 main() {
80 # Depends: bash 5.1.16, GNU Coreutils 8.32 (date)
81 # BK-2020-03: yell()
82 # Ref/Attrib: [1]: "Obtaining authorization credentials" https://developers.google.com/youtube/registering_an_application
83 # [2]: "Implementation: Playlists" https://developers.google.com/youtube/v3/guides/implementation/playlists
84 # [3]: "Implementation: Pagination", https://developers.google.com/youtube/v3/guides/implementation/pagination
85
86 local n out_path;
87 declare -a out_list;
88
89 # Check input
90 if [[ $# -ne 2 ]]; then die "FATAL:Incorrect number of args:$#"; fi;
91
92 # Set the playlist ID and API key
93 playlistId="$1"; # See ref [2]
94 apiKey="$2"; # See ref [1]
95
96 # Set dynamic variables according to environment
97 out_dir="$(pwd)"; # output to present working directory
98 out_filename="$(date +%Y%m%dT%H%M%S%z)"_"$playlistId"..playlist_items.json;
99 out_path="$out_dir"/"$out_filename";
100
101 # Make initial curl request to the YouTube Data API
102 response="$(get_response "$playlistId" "$apiKey")";
103
104 # # debug
105 # if check_next_page "$response"; then
106 # yell "DEBUG:nextPageToken detected";
107 # fi;
108
109 # Make follow-up requests. See ref [3]
110 n=0;
111 while check_next_page "$response"; do
112 # Get page token from response
113 pageToken="$(get_next_page "$response")";
114 # Update response
115 response="$(get_response "$playlistId" "$apiKey" "$pageToken")";
116 # Record response
117 out_list+=("$response");
118
119 # Sanity check
120 if [[ $n -gt $max_api_calls ]]; then die "FATAL:Too many API calls:$n"; fi;
121 ((n++));
122 done;
123
124 # Write results
125 printf "%s\n" "${out_list[@]}" > "$out_path";
126
127 # Print stats
128 yell "STATUS:Performed $n API calls."
129 out_lc="$(printf "%s\n" "${out_list[@]}" | wc -l)";
130 yell "STATUS:Wrote $out_lc lines to $out_path";
131
132 # Use jq to extract the publishedAt field for each playlist item
133 #PUBLISHED_AT=$(echo "$response" | jq -r '.items[].snippet.publishedAt')
134
135 }; # main program
136
137 main "$@";
138
139 # Author: Steven Baltakatei Sandoval
140 # License: GPLv3+