4973f243a7f50a5a3d153498f10176a732773c47
[BK-2020-03.git] / user / mw_wc2sp.sh
1 #!/bin/bash
2 # Desc: Convert wikicode to subpages
3 # Usage: mw_wc2sp.sh [path file]
4 # Input: arg1 path input wikicode file
5 # Output: files wikicode file tree
6 # Depends: Bash 5.1.16, GNU Coreutils 8.32
7 # Version: 0.1.0
8
9 re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern
10 d_out=./wikicode/; # default output dir
11 f_spl="subpage_list.txt"; # subpage title list
12 p_spl="${d_out}/${f_spl}";
13 f_splv="subpage_list_validated.txt"; # subpage title list (validated)
14 p_splv="${d_out}/${f_splv}";
15 f_splwc="subpage_list.wc"; # subpage list wikicode
16 p_splwc="${d_out}/${f_splwc}";
17
18 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
19 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
20 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
21 get_path_fork_level() {
22 # Desc: Get fork level from two paths
23 # Input: arg1 str path
24 # arg2 str path
25 # Output: stdout int fork level
26 # Version: 0.0.1
27 local path1="$1";
28 local path2="$2";
29
30 # Squeeze multiple slashes and remove trailing slashes
31 path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )";
32 path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )";
33
34 # Check for mixed absolute/relative paths
35 if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then
36 flag_root=true;
37 # Remove initial /
38 path1="$(echo "$path1" | sed -e 's:^/::' )";
39 path2="$(echo "$path2" | sed -e 's:^/::' )";
40 elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then
41 flag_root=false;
42 else
43 declare -p path1 path2 flag_root;
44 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2;
45 return 1;
46 fi;
47
48 # Save path as arrays with `/` as element delimiter
49 local IFS='/';
50 read -ra parts1 <<< "$path1";
51 read -ra parts2 <<< "$path2";
52
53 # Get fork level by counting identical path elements from rootside
54 local fork_level=0;
55 for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do
56 if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi;
57 ((fork_level++));
58 done;
59
60 echo "$fork_level";
61 #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug
62 return 0;
63 }; # Get fork level int from two paths
64 prune_path_rootside() {
65 # Desc: Prunes a path from the root-side to a specified prune level.
66 # Input: arg1 str path
67 # arg2 int prune level (0-indexed)
68 # Depends: GNU sed 4.8
69 # Version: 0.0.1
70 local path="$1";
71 local prune_level="$2";
72
73 # Check for absolute or relative path
74 if [[ "$path" =~ ^/ ]]; then
75 flag_root=true;
76 # Remove initial /
77 path="$(echo "$path" | sed -e 's:^/::' )";
78 else
79 flag_root=false;
80 fi;
81
82 # Save path as array with `/` as element delimiter
83 local IFS='/';
84 read -ra parts <<< "$path";
85
86 # Assemble pruned path from prune_level
87 local pruned_path="";
88 for (( i=prune_level; i<${#parts[@]}; i++ )); do
89 pruned_path+="${parts[i]}/";
90 done;
91
92 # Trim trailing `/` delimiter
93 pruned_path=$(echo "$pruned_path" | sed 's:/*$::');
94
95 # Restore initial / if appropriate
96 if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then
97 pruned_path=/"$pruned_path";
98 fi;
99
100 # Output pruned path
101 echo "$pruned_path";
102 #declare -p path prune_level parts pruned_path && printf "========\n"; # debug
103 return 0;
104 }; # prune path rootside to int specified level
105 get_path_hierarchy_level() {
106 # Desc: Outputs hierarchy level of input paths
107 # Example: $ cat lines.txt | get_path_hierarchy_level
108 # Input: stdin str lines with /-delimited paths
109 # Output: stdout int hierarchy level of each path
110 # Version: 0.0.1
111
112 local line level;
113 local flag_root;
114 local -a output;
115
116 n=0;
117 while read -r line; do
118 # Check for mixed absolute/relative paths.
119 if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then
120 flag_root=true;
121 else
122 flag_root=false;
123 fi;
124 if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \
125 { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then
126 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1;
127 fi;
128
129 # Squeeze multiple slashes and remove trailing slashes
130 line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )";
131
132 # Count the number of slashes to determine hierarchy level
133 level="$(echo "$line" | awk -F'/' '{print NF-1}' )";
134 if [[ "$flag_root" == "true" ]]; then ((level--)); fi;
135
136 # Append to output
137 output+=("$level");
138 #declare -p flag_root level; # debug
139 ((n++));
140 done;
141 # Print output
142 printf "%s\n" "${output[@]}";
143 }; # return hierarchy level of lines as integers
144 validate_subpage_list() {
145 # Desc: Check for illegal characters in subpage titles
146 # Input: stdin unvalidated subpage list
147 # Output: stdout validated subpage list
148 # Depends: BK-2020-03 read_stdin(), yell(), die()
149 # GNU sed v4.8
150 while read -r line; do
151
152 # Reject chars illegal in Mediawiki page titles.
153 re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, }
154 if [[ "$line" =~ $re_illegal ]]; then
155 die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line";
156 fi;
157
158 # Reject trailing spaces.
159 re_ts=' $'; # match trailing space
160 if [[ "$line" =~ $re_ts ]]; then
161 die "FATAL:Trailing spaces not allowed:$line";
162 fi;
163
164 # Replace some chars with HTML-style codes
165 ## replace ampersand & with &#38 # must be first
166 ## replace double quote " with &#34
167 ## replace single quote ' with &#39
168 line="$(sed \
169 -e 's/&/\&#38;/g' \
170 -e 's/"/\&#34;/g' \
171 -e "s/'/\&#39;/g" \
172 <<< "$line" )" || { echo "FATAL:Error running sed."; };
173 printf "%s\n" "$line";
174 done || {
175 echo "FATAL:Error reading stdin." 1>&2; return 1; };
176 };
177 check_input() {
178 local path_in="$1";
179 if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi;
180 }; # check input
181 assemble_subpage_ftree() {
182 # Desc: Identify subpage markers in input wikicode file to create
183 # subpage list and subpage content files
184 # Input: var fp_in path input file
185 # var re_sp regex for identifying subpage markers
186 # var d_out path directory for output
187 # var p_spl path subpage list file
188 #declare -p re_sp d_out f_spl p_spl fp_in; # debug
189
190 yell "STATUS:Running assemble_subpage_ftree()."; # debug
191
192 spc_path="${d_out}/presubpage.content"; # default destination for content before subpage detected
193
194 ## Process input line-by-line
195 while read -r line; do
196 #declare -p line re_sp; # debug
197 ### Check for subpage marker
198 if [[ "$line" =~ $re_sp ]]; then
199 #### Identify new subpage path
200 sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )";
201 # declare -p sp_path; # debug
202 #### Update subpage content file path
203 spc_path="${d_out}/${sp_path}.content";
204 spc_dir="$(dirname "$spc_path"; )";
205 #declare -p spc_path spc_dir; # debug
206 #### Prepare file destination
207 if [[ ! -d "$spc_dir" ]]; then
208 must mkdir -p "$spc_dir" && \
209 yell "STATUS:Created dir:${spc_dir}";
210 fi;
211 if [[ -f "$spc_path" ]]; then
212 die "FATAL:File already exists:${spc_path}";
213 else
214 must touch "$spc_path";
215 fi;
216 #### Append subpage path to subpage list
217 printf "%s\n" "$sp_path" >> "$p_spl";
218 fi;
219 ### Write subpage content
220 must printf "%s\n" "$line" >> "$spc_path";
221 done < "${fp_in}";
222
223 yell "STATUS:Finished assemble_subpage_ftree()."; # debug
224 }; # process input wikicode into subpage content files and subpage list
225 print_wc_content() {
226 # Input: var p_spc path subpage content
227 # Output: stdout
228 printf -- "\n----<onlyinclude>\n";
229 cat "$p_spc";
230 printf -- "\n</onlyinclude>----\n";
231 }; # print wikicode content
232 print_wc_footer() {
233 # Output: stdout
234 printf -- "\n==References==\n<references />\n"
235 printf -- "\n==Footnotes==\n<references group=fn />\n";
236 printf -- "\n==Comments==\n<references group=cmt />\n";
237 printf -- "\n";
238 }; # print wikicode footer
239 create_output_wikicode() {
240 # Desc: Use subpage list and subpage content files to create
241 # output subpage wikicode.
242 # Input: var p_spl path subpage list file
243 # var p_splv path subpage list file (validated)
244 # file ${p_spl} subpage list file
245 # file ${p_splv} subpage list file (validated)
246 # var d_out path directory for output
247 # Depends: get_path_fork_level()
248 # prune_path_rootside()
249 # get_path_hierarchy_level()
250 # validate_subpage_list()
251 # Output: files subpages in $d_out
252
253 yell "Running create_output_wikicode()."; # debug
254
255 # Read subpage list files into arrays.
256 local -a lines_spl lines_splv;
257 mapfile -t lines_spl < "$p_spl";
258 mapfile -t lines_splv < "$p_splv";
259 ## Add extra blank lines for couple line comparisons
260 lines_spl+=('');
261 lines_splv+=('');
262 declare -p lines_spl; # debug
263
264 # Check that subpage list files have same line counts
265 lc_spl="${#lines_spl[@]}";
266 lc_splv="${#lines_splv[@]}";
267 if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then
268 die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)";
269 fi;
270 declare -p lc_spl lc_splv; # debug
271
272 # Initialize subpage list wikicode file
273 must touch "$p_splwc";
274 printf "==Stats==\n\n==Subpages==\n" >> "$p_splwc";
275
276 # Read content files according to subpage list file
277 # Note: $i corresponds to “next” line ($lnext). Therefore, use
278 # $((i-1)) to access the “current” ($lcurr) line. This offset is
279 # because subpage list lines are compared using lagging line
280 # comparison.
281 for i in "${!lines_spl[@]}"; do
282 declare -p i; # debug;
283
284 # Check subpage content files
285 f_spc="${lines_spl[i-1]}.content";
286 p_spc="${d_out}/${f_spc}";
287 declare -p f_spc p_spc;
288 ## Exit if subpage content file missing
289 if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then
290 die "FATAL:Subpage content file missing:$p_spc"; fi;
291
292 # Prepare output subpage wikicode files
293 f_spwc="${lines_splv[i-1]}.wc";
294 p_spwc="${d_out}/${f_spwc}"; # use validated subpage name
295 declare -p f_spwc p_spwc; # debug
296 if [[ "$i" -gt 0 ]]; then
297 must touch "$p_spwc";
298 ## Append subpage list wikicode file
299 printf "* [[/%s]]\n" "${lines_splv[i-1]}" >> "$p_splwc";
300 fi;
301
302 # Advance input lines
303 lprev="$lcurr";
304 lcurr="$lnext";
305 lnext="${lines_splv[i]}";
306 declare -p lprev lcurr lnext; # debug
307
308 # Update hierarchy tracker states
309 lprev_hier="$lcurr_hier";
310 lcurr_hier="$lnext_hier";
311 lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)";
312
313 # Skip first iteration
314 if [[ "$i" -eq 0 ]]; then
315 yell "$i:DEBUG:Skipping first iteration."; # debug
316 printf -- "----\n" 1>&2; # debug
317 continue; fi;
318
319 # Get path fork levels
320 fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")";
321 fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")";
322
323 # Count relative ups needed (`../`)
324 relups_next="$((lcurr_hier - fork_level_next + 1))";
325 relups_prev="$((lcurr_hier - fork_level_prev + 1))";
326
327 # Initialize Next and Prev links with relative ups to fork.
328 link_next="";
329 for (( j=0; j<relups_next; j++ )); do link_next+="../"; done;
330 if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive
331 link_prev="";
332 for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done;
333
334 # Append branchs from fork to Next and Prev targets
335 link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")";
336 link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")";
337
338 # Print navigation link wikicode
339 if [[ -z "$lprev" ]]; then
340 printf "[[%s|Next]], [[../|Up]]\n" "$link_next" >> "$p_spwc";
341 elif [[ -n "$lnext" ]]; then
342 printf "[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev" >> "$p_spwc";
343 elif [[ -z "$lnext" ]]; then
344 printf "[[%s|Previous]], [[../|Up]]\n" "$link_prev" >> "$p_spwc";
345 else
346 yell "FATAL:Here be dragons.";
347 fi;
348
349 # Print subpage content
350 print_wc_content >> "$p_spwc";
351 print_wc_footer >> "$p_spwc";
352
353 declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug
354 declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug
355 declare -p link_next link_prev; # debug
356 printf "====================\n" # debug
357 done;
358
359 # Add footer to subpage list wikicode file
360 print_wc_footer >> "$p_splwc";
361
362 yell "STATUS:Finished create_output_wikicode()."; # debug
363 }; # generate output subpage wikicode
364 main() {
365 check_input "$@";
366 declare -g fp_in="$1"; # input file path
367 assemble_subpage_ftree;
368 validate_subpage_list < "$p_spl" > "$p_splv";
369 create_output_wikicode;
370 }; # main program
371
372 main "$@";
373
374 # Author: Steven Baltakatei Sandoval
375 # License: GPLv3+
376
377
378 # Example input:
379 # ```
380 # <!-- @subpage:Introduction -->
381 # This is an introducton.
382 # <!-- @subpage:Foreword -->
383 # This is a foreword.
384
385 # <!-- @subpage:Part 1/Chapter 1 -->
386 # Blah.
387 # <!-- @subpage:Part 1/Chapter 2 -->
388 # Blah.
389 # <!-- @subpage:Part 1/Chapter 2/Section A -->
390 # Blabbity blah.
391 # <!-- @subpage:Part 2/ -->
392 # Blah.
393 # <!-- @subpage:Part 2/Chapter 1 -->
394 # More blah.
395 # ```