]> zdv2.bktei.com Git - BK-2020-03.git/blob - user/mw_wc2sp.sh
feat(user/mw_wc2sp.sh):Use top and bottom nav links
[BK-2020-03.git] / user / mw_wc2sp.sh
1 #!/bin/bash
2 # Desc: Convert wikicode to subpages
3 # Usage: mw_wc2sp.sh [path file]
4 # Input: arg1 path input wikicode file
5 # Output: files wikicode file tree
6 # Depends: Bash 5.1.16, GNU Coreutils 8.32
7 # Version: 0.4.0
8
9 re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern
10 d_out=./wikicode/; # default output dir
11 f_spl="subpage_list.txt"; # subpage title list
12 p_spl="${d_out}/subpages/${f_spl}";
13 f_splv="subpage_list_validated.txt"; # subpage title list (validated)
14 p_splv="${d_out}/subpages/${f_splv}";
15 f_splwc="subpages.wc"; # subpage list wikicode
16 p_splwc="${d_out}/${f_splwc}";
17
18 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
19 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
20 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
21 get_path_fork_level() {
22 # Desc: Get fork level from two paths
23 # Input: arg1 str path
24 # arg2 str path
25 # Output: stdout int fork level
26 # Version: 0.0.1
27 local path1="$1";
28 local path2="$2";
29
30 # Squeeze multiple slashes and remove trailing slashes
31 path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )";
32 path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )";
33
34 # Check for mixed absolute/relative paths
35 if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then
36 flag_root=true;
37 # Remove initial /
38 path1="$(echo "$path1" | sed -e 's:^/::' )";
39 path2="$(echo "$path2" | sed -e 's:^/::' )";
40 elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then
41 flag_root=false;
42 else
43 declare -p path1 path2 flag_root;
44 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2;
45 return 1;
46 fi;
47
48 # Save path as arrays with `/` as element delimiter
49 local IFS='/';
50 read -ra parts1 <<< "$path1";
51 read -ra parts2 <<< "$path2";
52
53 # Get fork level by counting identical path elements from rootside
54 local fork_level=0;
55 for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do
56 if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi;
57 ((fork_level++));
58 done;
59
60 echo "$fork_level";
61 #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug
62 return 0;
63 }; # Get fork level int from two paths
64 prune_path_rootside() {
65 # Desc: Prunes a path from the root-side to a specified prune level.
66 # Input: arg1 str path
67 # arg2 int prune level (0-indexed)
68 # Depends: GNU sed 4.8
69 # Version: 0.0.1
70 local path="$1";
71 local prune_level="$2";
72
73 # Check for absolute or relative path
74 if [[ "$path" =~ ^/ ]]; then
75 flag_root=true;
76 # Remove initial /
77 path="$(echo "$path" | sed -e 's:^/::' )";
78 else
79 flag_root=false;
80 fi;
81
82 # Save path as array with `/` as element delimiter
83 local IFS='/';
84 read -ra parts <<< "$path";
85
86 # Assemble pruned path from prune_level
87 local pruned_path="";
88 for (( i=prune_level; i<${#parts[@]}; i++ )); do
89 pruned_path+="${parts[i]}/";
90 done;
91
92 # Trim trailing `/` delimiter
93 pruned_path=$(echo "$pruned_path" | sed 's:/*$::');
94
95 # Restore initial / if appropriate
96 if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then
97 pruned_path=/"$pruned_path";
98 fi;
99
100 # Output pruned path
101 echo "$pruned_path";
102 #declare -p path prune_level parts pruned_path && printf "========\n"; # debug
103 return 0;
104 }; # prune path rootside to int specified level
105 get_path_hierarchy_level() {
106 # Desc: Outputs hierarchy level of input paths
107 # Example: $ cat lines.txt | get_path_hierarchy_level
108 # Input: stdin str lines with /-delimited paths
109 # Output: stdout int hierarchy level of each path
110 # Version: 0.0.1
111
112 local line level;
113 local flag_root;
114 local -a output;
115
116 n=0;
117 while read -r line; do
118 # Check for mixed absolute/relative paths.
119 if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then
120 flag_root=true;
121 else
122 flag_root=false;
123 fi;
124 if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \
125 { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then
126 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1;
127 fi;
128
129 # Squeeze multiple slashes and remove trailing slashes
130 line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )";
131
132 # Count the number of slashes to determine hierarchy level
133 level="$(echo "$line" | awk -F'/' '{print NF-1}' )";
134 if [[ "$flag_root" == "true" ]]; then ((level--)); fi;
135
136 # Append to output
137 output+=("$level");
138 #declare -p flag_root level; # debug
139 ((n++));
140 done;
141 # Print output
142 printf "%s\n" "${output[@]}";
143 }; # return hierarchy level of lines as integers
144 validate_subpage_list() {
145 # Desc: Check for illegal characters in subpage titles
146 # Input: stdin unvalidated subpage list
147 # Output: stdout validated subpage list
148 # Depends: BK-2020-03 read_stdin(), yell(), die()
149 # GNU sed v4.8
150 while read -r line; do
151
152 # Reject chars illegal in Mediawiki page titles.
153 re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, }
154 if [[ "$line" =~ $re_illegal ]]; then
155 die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line";
156 fi;
157
158 # Reject trailing spaces.
159 re_ts=' $'; # match trailing space
160 if [[ "$line" =~ $re_ts ]]; then
161 die "FATAL:Trailing spaces not allowed:$line";
162 fi;
163
164 # Replace some chars with HTML-style codes
165 ## replace ampersand & with &#38 # must be first
166 ## replace double quote " with &#34
167 ## replace single quote ' with &#39
168 line="$(sed \
169 -e 's/&/\&#38;/g' \
170 -e 's/"/\&#34;/g' \
171 -e "s/'/\&#39;/g" \
172 <<< "$line" )" || { echo "FATAL:Error running sed."; };
173 printf "%s\n" "$line";
174 done || {
175 echo "FATAL:Error reading stdin." 1>&2; return 1; };
176 };
177 check_input() {
178 local path_in="$1";
179 if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi;
180 }; # check input
181 assemble_subpage_ftree() {
182 # Desc: Identify subpage markers in input wikicode file to create
183 # subpage list and subpage content files
184 # Input: var fp_in path input file
185 # var re_sp regex for identifying subpage markers
186 # var d_out path directory for output
187 # var p_spl path subpage list file
188 #declare -p re_sp d_out f_spl p_spl fp_in; # debug
189
190 yell "STATUS:Running assemble_subpage_ftree()."; # debug
191
192 spc_path="${d_out}/subpages/presubpage.content"; # default destination for content before subpage detected
193
194 ## Process input line-by-line
195 while read -r line; do
196 # declare -p line re_sp; # debug
197 ### Check for subpage marker
198 if [[ "$line" =~ $re_sp ]]; then
199 #### Identify new subpage path
200 sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )";
201 # declare -p sp_path; # debug
202 #### Update subpage content file path
203 spc_path="${d_out}/subpages/${sp_path}.content";
204 spc_dir="$(dirname "$spc_path"; )";
205 #declare -p spc_path spc_dir; # debug
206 #### Prepare file destination
207 if [[ ! -d "$spc_dir" ]]; then
208 must mkdir -p "$spc_dir" && \
209 yell "STATUS:Created dir:${spc_dir}";
210 fi;
211 if [[ -f "$spc_path" ]]; then
212 die "FATAL:File already exists:${spc_path}";
213 else
214 must touch "$spc_path";
215 fi;
216 #### Append subpage path to subpage list
217 printf "%s\n" "$sp_path" >> "$p_spl";
218 fi;
219 ### Write subpage content
220 must printf "%s\n" "$line" >> "$spc_path";
221 done < "${fp_in}";
222
223 yell "STATUS:Finished assemble_subpage_ftree()."; # debug
224 }; # process input wikicode into subpage content files and subpage list
225 print_wc_content() {
226 # Input: var p_spc path subpage content
227 # Output: stdout
228 printf -- "\n----<onlyinclude>\n";
229 cat "$p_spc";
230 printf -- "\n</onlyinclude>----\n";
231 }; # print wikicode content
232 print_wc_footer() {
233 # Output: stdout
234 printf -- "\n==References==\n<references />\n"
235 printf -- "\n==Footnotes==\n<references group=fn />\n";
236 printf -- "\n==Comments==\n<references group=cmt />\n";
237 printf -- "\n<!-- End of Page -->\n";
238 printf -- "\n";
239 }; # print wikicode footer
240 print_wc_nav() {
241 # Desc: Print navigation wikilinks
242 # Input: var lprev
243 # var lnext
244 # var link_prev
245 # var link_next
246 # Output stdout
247
248 # Print navigation link wikicode
249 if [[ -z "$lprev" ]]; then
250 printf "\n[[%s|Next]], [[../|Up]]\n" "$link_next";
251 elif [[ -n "$lnext" ]]; then
252 printf "\n[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev";
253 elif [[ -z "$lnext" ]]; then
254 printf "\n[[%s|Previous]], [[../|Up]]\n" "$link_prev";
255 else
256 yell "FATAL:Here be dragons.";
257 fi;
258 }; # print wikicode navigation links
259 create_output_wikicode() {
260 # Desc: Use subpage list and subpage content files to create
261 # output subpage wikicode.
262 # Input: var p_spl path subpage list file
263 # var p_splv path subpage list file (validated)
264 # file ${p_spl} subpage list file
265 # file ${p_splv} subpage list file (validated)
266 # var d_out path directory for output
267 # Depends: get_path_fork_level()
268 # prune_path_rootside()
269 # get_path_hierarchy_level()
270 # validate_subpage_list()
271 # Output: files subpages in $d_out
272
273 yell "Running create_output_wikicode()."; # debug
274
275 # Read subpage list files into arrays.
276 local -a lines_spl lines_splv;
277 mapfile -t lines_spl < "$p_spl";
278 mapfile -t lines_splv < "$p_splv";
279 ## Add extra blank lines for couple line comparisons
280 lines_spl+=('');
281 lines_splv+=('');
282 declare -p lines_spl; # debug
283
284 # Check that subpage list files have same line counts
285 lc_spl="${#lines_spl[@]}";
286 lc_splv="${#lines_splv[@]}";
287 if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then
288 die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)";
289 fi;
290 declare -p lc_spl lc_splv; # debug
291
292 # Initialize subpage list wikicode file
293 must touch "$p_splwc";
294 printf "==Stats==\n\n==Subpages==\n" >> "$p_splwc";
295
296 # Read content files according to subpage list file
297 # Note: $i corresponds to “next” line ($lnext). Therefore, use
298 # $((i-1)) to access the “current” ($lcurr) line. This offset is
299 # because subpage list lines are compared using lagging line
300 # comparison.
301 for i in "${!lines_spl[@]}"; do
302 declare -p i; # debug;
303
304 # Check subpage content files
305 f_spc="${lines_spl[i-1]}.content";
306 p_spc="${d_out}/subpages/${f_spc}";
307 declare -p f_spc p_spc;
308 ## Exit if subpage content file missing
309 if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then
310 die "FATAL:Subpage content file missing:$p_spc"; fi;
311
312 # Prepare output subpage wikicode files
313 f_spwc="${lines_splv[i-1]}.wc";
314 p_spwc="${d_out}/subpages/${f_spwc}"; # use validated subpage name
315 declare -p f_spwc p_spwc; # debug
316 if [[ "$i" -gt 0 ]]; then
317 must touch "$p_spwc";
318 ## Append subpage list wikicode file
319 printf "* [[/%s]]\n" "${lines_splv[i-1]}" >> "$p_splwc";
320 fi;
321
322 # Advance input lines
323 lprev="$lcurr";
324 lcurr="$lnext";
325 lnext="${lines_splv[i]}";
326 declare -p lprev lcurr lnext; # debug
327
328 # Update hierarchy tracker states
329 lprev_hier="$lcurr_hier";
330 lcurr_hier="$lnext_hier";
331 lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)";
332
333 # Skip first iteration
334 if [[ "$i" -eq 0 ]]; then
335 yell "$i:DEBUG:Skipping first iteration."; # debug
336 printf -- "----\n" 1>&2; # debug
337 continue; fi;
338
339 # Get path fork levels
340 fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")";
341 fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")";
342
343 # Count relative ups needed (`../`)
344 relups_next="$((lcurr_hier - fork_level_next + 1))";
345 relups_prev="$((lcurr_hier - fork_level_prev + 1))";
346
347 # Initialize Next and Prev links with relative ups to fork.
348 link_next="";
349 for (( j=0; j<relups_next; j++ )); do link_next+="../"; done;
350 if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive
351 link_prev="";
352 for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done;
353
354 # Append branchs from fork to Next and Prev targets
355 link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")";
356 link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")";
357
358 # Print subpage content
359 print_wc_nav >> "$p_spwc";
360 print_wc_content >> "$p_spwc";
361 print_wc_nav >> "$p_spwc";
362 print_wc_footer >> "$p_spwc";
363
364 declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug
365 declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug
366 declare -p link_next link_prev; # debug
367 printf "====================\n" # debug
368 done;
369
370 # Add footer to subpage list wikicode file
371 print_wc_footer >> "$p_splwc";
372
373 yell "STATUS:Finished create_output_wikicode()."; # debug
374 }; # generate output subpage wikicode
375 main() {
376 check_input "$@";
377 declare -g fp_in="$1"; # input file path
378 assemble_subpage_ftree;
379 validate_subpage_list < "$p_spl" > "$p_splv";
380 create_output_wikicode;
381 }; # main program
382
383 main "$@";
384
385 # Author: Steven Baltakatei Sandoval
386 # License: GPLv3+
387
388
389 # Example input:
390 # ```
391 # <!-- @subpage:Introduction -->
392 # This is an introducton.
393 # <!-- @subpage:Foreword -->
394 # This is a foreword.
395
396 # <!-- @subpage:Part 1/Chapter 1 -->
397 # Blah.
398 # <!-- @subpage:Part 1/Chapter 2 -->
399 # Blah.
400 # <!-- @subpage:Part 1/Chapter 2/Section A -->
401 # Blabbity blah.
402 # <!-- @subpage:Part 2/ -->
403 # Blah.
404 # <!-- @subpage:Part 2/Chapter 1 -->
405 # More blah.
406 # ```