421adb702cc9be53a603c30c7aa0b3215b73b60d
[BK-2020-03.git] / user / mw_wc2sp.sh
1 #!/bin/bash
2 # Desc: Convert wikicode to subpages
3 # Usage: mw_wc2sp.sh [path file]
4 # Input: arg1 path input wikicode file
5 # Output: files wikicode file tree
6 # Depends: Bash 5.1.16, GNU Coreutils 8.32
7 # Version: 0.0.1
8
9 re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern
10 d_out=./wikicode/; # default output dir
11 f_spl="subpage_list.txt";
12 p_spl="${d_out}/${f_spl}";
13 f_splv="subpage_list_validated.txt";
14 p_splv="${d_out}/${f_splv}";
15
16 yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
17 die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
18 must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
19 get_path_fork_level() {
20 # Desc: Get fork level from two paths
21 # Input: arg1 str path
22 # arg2 str path
23 # Output: stdout int fork level
24 # Version: 0.0.1
25 local path1="$1";
26 local path2="$2";
27
28 # Squeeze multiple slashes and remove trailing slashes
29 path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )";
30 path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )";
31
32 # Check for mixed absolute/relative paths
33 if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then
34 flag_root=true;
35 # Remove initial /
36 path1="$(echo "$path1" | sed -e 's:^/::' )";
37 path2="$(echo "$path2" | sed -e 's:^/::' )";
38 elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then
39 flag_root=false;
40 else
41 declare -p path1 path2 flag_root;
42 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2;
43 return 1;
44 fi;
45
46 # Save path as arrays with `/` as element delimiter
47 local IFS='/';
48 read -ra parts1 <<< "$path1";
49 read -ra parts2 <<< "$path2";
50
51 # Get fork level by counting identical path elements from rootside
52 local fork_level=0;
53 for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do
54 if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi;
55 ((fork_level++));
56 done;
57
58 echo "$fork_level";
59 #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug
60 return 0;
61 }; # Get fork level int from two paths
62 prune_path_rootside() {
63 # Desc: Prunes a path from the root-side to a specified prune level.
64 # Input: arg1 str path
65 # arg2 int prune level (0-indexed)
66 # Depends: GNU sed 4.8
67 # Version: 0.0.1
68 local path="$1";
69 local prune_level="$2";
70
71 # Check for absolute or relative path
72 if [[ "$path" =~ ^/ ]]; then
73 flag_root=true;
74 # Remove initial /
75 path="$(echo "$path" | sed -e 's:^/::' )";
76 else
77 flag_root=false;
78 fi;
79
80 # Save path as array with `/` as element delimiter
81 local IFS='/';
82 read -ra parts <<< "$path";
83
84 # Assemble pruned path from prune_level
85 local pruned_path="";
86 for (( i=prune_level; i<${#parts[@]}; i++ )); do
87 pruned_path+="${parts[i]}/";
88 done;
89
90 # Trim trailing `/` delimiter
91 pruned_path=$(echo "$pruned_path" | sed 's:/*$::');
92
93 # Restore initial / if appropriate
94 if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then
95 pruned_path=/"$pruned_path";
96 fi;
97
98 # Output pruned path
99 echo "$pruned_path";
100 #declare -p path prune_level parts pruned_path && printf "========\n"; # debug
101 return 0;
102 }; # prune path rootside to int specified level
103 get_path_hierarchy_level() {
104 # Desc: Outputs hierarchy level of input paths
105 # Example: $ cat lines.txt | get_path_hierarchy_level
106 # Input: stdin str lines with /-delimited paths
107 # Output: stdout int hierarchy level of each path
108 # Version: 0.0.1
109
110 local line level;
111 local flag_root;
112 local -a output;
113
114 n=0;
115 while read -r line; do
116 # Check for mixed absolute/relative paths.
117 if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then
118 flag_root=true;
119 else
120 flag_root=false;
121 fi;
122 if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \
123 { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then
124 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1;
125 fi;
126
127 # Squeeze multiple slashes and remove trailing slashes
128 line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )";
129
130 # Count the number of slashes to determine hierarchy level
131 level="$(echo "$line" | awk -F'/' '{print NF-1}' )";
132 if [[ "$flag_root" == "true" ]]; then ((level--)); fi;
133
134 # Append to output
135 output+=("$level");
136 #declare -p flag_root level; # debug
137 ((n++));
138 done;
139 # Print output
140 printf "%s\n" "${output[@]}";
141 }; # return hierarchy level of lines as integers
142 validate_subpage_list() {
143 # Desc: Check for illegal characters in subpage titles
144 # Input: stdin unvalidated subpage list
145 # Output: stdout validated subpage list
146 # Depends: BK-2020-03 read_stdin(), yell(), die()
147 # GNU sed v4.8
148 while read -r line; do
149
150 # Reject chars illegal in Mediawiki page titles.
151 re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, }
152 if [[ "$line" =~ $re_illegal ]]; then
153 die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line";
154 fi;
155
156 # Reject trailing spaces.
157 re_ts=' $'; # match trailing space
158 if [[ "$line" =~ $re_ts ]]; then
159 die "FATAL:Trailing spaces not allowed:$line";
160 fi;
161
162 # Replace some chars with HTML-style codes
163 ## replace ampersand & with &#38 # must be first
164 ## replace double quote " with &#34
165 ## replace single quote ' with &#39
166 line="$(sed \
167 -e 's/&/\&#38;/g' \
168 -e 's/"/\&#34;/g' \
169 -e "s/'/\&#39;/g" \
170 <<< "$line" )" || { echo "FATAL:Error running sed."; };
171 printf "%s\n" "$line";
172 done || {
173 echo "FATAL:Error reading stdin." 1>&2; return 1; };
174 };
175 check_input() {
176 local path_in="$1";
177 if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi;
178 }; # check input
179 assemble_subpage_ftree() {
180 # Desc: Identify subpage markers in input wikicode file to create
181 # subpage list and subpage content files
182 # Input: var fp_in path input file
183 # var re_sp regex for identifying subpage markers
184 # var d_out path directory for output
185 # var p_spl path subpage list file
186 #declare -p re_sp d_out f_spl p_spl fp_in; # debug
187
188 yell "STATUS:Running assemble_subpage_ftree()."; # debug
189
190 spc_path="${d_out}/presubpage.content"; # default destination for content before subpage detected
191
192 ## Process input line-by-line
193 while read -r line; do
194 #declare -p line re_sp; # debug
195 ### Check for subpage marker
196 if [[ "$line" =~ $re_sp ]]; then
197 #### Identify new subpage path
198 sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )";
199 # declare -p sp_path; # debug
200 #### Update subpage content file path
201 spc_path="${d_out}/${sp_path}.content";
202 spc_dir="$(dirname "$spc_path"; )";
203 #declare -p spc_path spc_dir; # debug
204 #### Prepare file destination
205 if [[ ! -d "$spc_dir" ]]; then
206 must mkdir -p "$spc_dir" && \
207 yell "STATUS:Created dir:${spc_dir}";
208 fi;
209 if [[ -f "$spc_path" ]]; then
210 die "FATAL:File already exists:${spc_path}";
211 else
212 must touch "$spc_path";
213 fi;
214 #### Append subpage path to subpage list
215 printf "%s\n" "$sp_path" >> "$p_spl";
216 fi;
217 ### Write subpage content
218 must printf "%s\n" "$line" >> "$spc_path";
219 done < "${fp_in}";
220
221 yell "STATUS:Finished assemble_subpage_ftree()."; # debug
222 }; # process input wikicode into subpage content files and subpage list
223 create_output_wikicode() {
224 # Desc: Use subpage list and subpage content files to create
225 # output subpage wikicode.
226 # Input: var p_spl path subpage list file
227 # var p_splv path subpage list file (validated)
228 # file ${p_spl} subpage list file
229 # file ${p_splv} subpage list file (validated)
230 # var d_out path directory for output
231 # Depends: get_path_fork_level()
232 # prune_path_rootside()
233 # get_path_hierarchy_level()
234 # validate_subpage_list()
235 # Output: files subpages in $d_out
236
237 yell "Running create_output_wikicode()."; # debug
238
239 # Read subpage list files into arrays.
240 local -a lines_spl lines_splv;
241 mapfile -t lines_spl < "$p_spl";
242 mapfile -t lines_splv < "$p_splv";
243 ## Add extra blank lines for couple line comparisons
244 lines_spl+=('');
245 lines_splv+=('');
246 declare -p lines_spl; # debug
247
248 # Check that subpage list files have same line counts
249 lc_spl="${#lines_spl[@]}";
250 lc_splv="${#lines_splv[@]}";
251 if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then
252 die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)";
253 fi;
254 declare -p lc_spl lc_splv; # debug
255
256 # Read content files according to subpage list file
257 # Note: $i corresponds to “next” line ($lnext). Therefore, use
258 # $((i-1)) to access the “current” ($lcurr) line. This offset is
259 # because subpage list lines are compared using lagging line
260 # comparison.
261 for i in "${!lines_spl[@]}"; do
262 declare -p i; # debug;
263
264 # Check subpage content files
265 f_spc="${lines_spl[i-1]}.content";
266 p_spc="${d_out}/${f_spc}";
267 declare -p f_spc p_spc;
268 ## Exit if subpage content file missing
269 if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then
270 die "FATAL:Subpage content file missing:$p_spc"; fi;
271
272 # Prepare output subpage wikicode files
273 f_spwc="${lines_splv[i-1]}.wc";
274 p_spwc="${d_out}/${f_spwc}"; # use validated subpage name
275 declare -p f_spwc p_spwc; # debug
276 if [[ "$i" -gt 0 ]]; then must touch "$p_spwc"; fi;
277
278 # Advance input lines
279 lprev="$lcurr";
280 lcurr="$lnext";
281 lnext="${lines_splv[i]}";
282 declare -p lprev lcurr lnext; # debug
283
284 # Update hierarchy tracker states
285 lprev_hier="$lcurr_hier";
286 lcurr_hier="$lnext_hier";
287 lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)";
288
289 # Skip first iteration
290 if [[ "$i" -eq 0 ]]; then
291 yell "$i:DEBUG:Skipping first iteration."; # debug
292 printf -- "----\n" 1>&2; # debug
293 continue; fi;
294
295 # Get path fork levels
296 fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")";
297 fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")";
298
299 # Count relative ups needed (`../`)
300 relups_next="$((lcurr_hier - fork_level_next + 1))";
301 relups_prev="$((lcurr_hier - fork_level_prev + 1))";
302
303 # Initialize Next and Prev links with relative ups to fork.
304 link_next="";
305 for (( j=0; j<relups_next; j++ )); do link_next+="../"; done;
306 if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive
307 link_prev="";
308 for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done;
309
310 # Append branchs from fork to Next and Prev targets
311 link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")";
312 link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")";
313
314 # Print navigation link wikicode
315 if [[ -z "$lprev" ]]; then
316 printf "[[%s|Next]], [[../|Up]]\n" "$link_next" >> "$p_spwc";
317 elif [[ -n "$lnext" ]]; then
318 printf "[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev" >> "$p_spwc";
319 elif [[ -z "$lnext" ]]; then
320 printf "[[%s|Previous]], [[../|Up]]\n" "$link_prev" >> "$p_spwc";
321 else
322 yell "FATAL:Here be dragons.";
323 fi;
324
325 # Print subpage content
326 printf -- "\n----<onlyinclude>\n" >> "$p_spwc";
327 cat "$p_spc" >> "$p_spwc";
328 printf -- "\n</onlyinclude>----\n" >> "$p_spwc";
329 printf -- "\n==References==\n<references />\n" >> "$p_spwc";
330 printf -- "\n==Footnotes==\n<references group=fn />\n" >> "$p_spwc";
331 printf -- "\n==Comments==\n<references group=cmt />\n" >> "$p_spwc";
332 printf -- "\n"; >> "$p_spwc";
333
334 declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug
335 declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug
336 declare -p link_next link_prev; # debug
337 printf "====================\n" # debug
338 done;
339
340 yell "STATUS:Finished create_output_wikicode()."; # debug
341 }; # generate output subpage wikicode
342 main() {
343 check_input "$@";
344 declare -g fp_in="$1"; # input file path
345 assemble_subpage_ftree;
346 validate_subpage_list < "$p_spl" > "$p_splv";
347 create_output_wikicode;
348 }; # main program
349
350 main "$@";
351
352 # Author: Steven Baltakatei Sandoval
353 # License: GPLv3+
354
355
356 # Example input:
357 # ```
358 # <!-- @subpage:Introduction -->
359 # This is an introducton.
360 # <!-- @subpage:Foreword -->
361 # This is a foreword.
362
363 # <!-- @subpage:Part 1/Chapter 1 -->
364 # Blah.
365 # <!-- @subpage:Part 1/Chapter 2 -->
366 # Blah.
367 # <!-- @subpage:Part 1/Chapter 2/Section A -->
368 # Blabbity blah.
369 # <!-- @subpage:Part 2/ -->
370 # Blah.
371 # <!-- @subpage:Part 2/Chapter 1 -->
372 # More blah.
373 # ```