2 # Desc: Convert wikicode to subpages
3 # Usage: mw_wc2sp.sh [path file]
4 # Input: arg1 path input wikicode file
5 # Output: files wikicode file tree
6 # Depends: Bash 5.1.16, GNU Coreutils 8.32
9 re_sp
='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern
10 d_out
=.
/wikicode
/; # default output dir
11 f_spl
="subpage_list.txt"; # subpage title list
12 p_spl
="${d_out}/subpages/${f_spl}";
13 f_splv
="subpage_list_validated.txt"; # subpage title list (validated)
14 p_splv
="${d_out}/subpages/${f_splv}";
15 f_splwc
="subpages.wc"; # subpage list wikicode
16 p_splwc
="${d_out}/${f_splwc}";
18 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr
19 die
() { yell
"$*"; exit 111; } # same as yell() but non-zero exit status
20 must
() { "$@" || die
"cannot $*"; } # runs args as command, reports args if command fails
21 get_path_fork_level
() {
22 # Desc: Get fork level from two paths
23 # Input: arg1 str path
25 # Output: stdout int fork level
30 # Squeeze multiple slashes and remove trailing slashes
31 path1
="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )";
32 path2
="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )";
34 # Check for mixed absolute/relative paths
35 if [[ "$path1" =~ ^
/ ]] && [[ "$path2" =~ ^
/ ]]; then
38 path1
="$(echo "$path1" | sed -e 's:^/::' )";
39 path2
="$(echo "$path2" | sed -e 's:^/::' )";
40 elif [[ ! "$path1" =~ ^
/ ]] && [[ ! "$path2" =~ ^
/ ]]; then
43 declare -p path1 path2 flag_root
;
44 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2;
48 # Save path as arrays with `/` as element delimiter
50 read -ra parts1
<<< "$path1";
51 read -ra parts2
<<< "$path2";
53 # Get fork level by counting identical path elements from rootside
55 for (( i
=0; i
<${#parts1[@]} && i
<${#parts2[@]}; i
++ )); do
56 if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi;
61 #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug
63 }; # Get fork level int from two paths
64 prune_path_rootside
() {
65 # Desc: Prunes a path from the root-side to a specified prune level.
66 # Input: arg1 str path
67 # arg2 int prune level (0-indexed)
68 # Depends: GNU sed 4.8
71 local prune_level
="$2";
73 # Check for absolute or relative path
74 if [[ "$path" =~ ^
/ ]]; then
77 path
="$(echo "$path" | sed -e 's:^/::' )";
82 # Save path as array with `/` as element delimiter
84 read -ra parts
<<< "$path";
86 # Assemble pruned path from prune_level
88 for (( i
=prune_level
; i
<${#parts[@]}; i
++ )); do
89 pruned_path
+="${parts[i]}/";
92 # Trim trailing `/` delimiter
93 pruned_path
=$
(echo "$pruned_path" |
sed 's:/*$::');
95 # Restore initial / if appropriate
96 if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then
97 pruned_path
=/"$pruned_path";
102 #declare -p path prune_level parts pruned_path && printf "========\n"; # debug
104 }; # prune path rootside to int specified level
105 get_path_hierarchy_level
() {
106 # Desc: Outputs hierarchy level of input paths
107 # Example: $ cat lines.txt | get_path_hierarchy_level
108 # Input: stdin str lines with /-delimited paths
109 # Output: stdout int hierarchy level of each path
117 while read -r line
; do
118 # Check for mixed absolute/relative paths.
119 if [[ $n -le 0 ]] && [[ "$line" =~ ^
/ ]]; then
124 if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^
/ ]]; } || \
125 { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^
/ ]]; } then
126 echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1;
129 # Squeeze multiple slashes and remove trailing slashes
130 line
="$(echo "$line" | tr -s '/' | sed 's:/*$::' )";
132 # Count the number of slashes to determine hierarchy level
133 level
="$(echo "$line" | awk -F'/' '{print NF-1}' )";
134 if [[ "$flag_root" == "true" ]]; then ((level--
)); fi;
138 #declare -p flag_root level; # debug
142 printf "%s\n" "${output[@]}";
143 }; # return hierarchy level of lines as integers
144 validate_subpage_list
() {
145 # Desc: Check for illegal characters in subpage titles
146 # Input: stdin unvalidated subpage list
147 # Output: stdout validated subpage list
148 # Depends: BK-2020-03 read_stdin(), yell(), die()
150 while read -r line
; do
152 # Reject chars illegal in Mediawiki page titles.
153 re_illegal
='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, }
154 if [[ "$line" =~
$re_illegal ]]; then
155 die
"FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line";
158 # Reject trailing spaces.
159 re_ts
=' $'; # match trailing space
160 if [[ "$line" =~
$re_ts ]]; then
161 die
"FATAL:Trailing spaces not allowed:$line";
164 # Replace some chars with HTML-style codes
165 ## replace ampersand & with & # must be first
166 ## replace double quote " with "
167 ## replace single quote ' with '
172 <<< "$line" )" || { echo "FATAL
:Error running
sed.
"; };
173 printf "%s
\n" "$line";
175 echo "FATAL
:Error reading stdin.
" 1>&2; return 1; };
179 if [[ ! -f "$path_in" ]]; then die "FATAL
:Not a
file path
:$1"; fi;
181 assemble_subpage_ftree() {
182 # Desc: Identify subpage markers in input wikicode file to create
183 # subpage list and subpage content files
184 # Input: var fp_in path input file
185 # var re_sp regex for identifying subpage markers
186 # var d_out path directory for output
187 # var p_spl path subpage list file
188 #declare -p re_sp d_out f_spl p_spl fp_in; # debug
190 yell "STATUS
:Running assemble_subpage_ftree
().
"; # debug
192 spc_path="${d_out}/subpages
/presubpage.content
"; # default destination for content before subpage detected
194 ## Process input line-by-line
195 while read -r line; do
196 #declare -p line re_sp; # debug
197 ### Check for subpage marker
198 if [[ "$line" =~ $re_sp ]]; then
199 #### Identify new subpage path
200 sp_path="$
(echo "$line" |
sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )";
201 # declare -p sp_path; # debug
202 #### Update subpage content file path
203 spc_path="${d_out}/subpages
/${sp_path}.content
";
204 spc_dir="$
(dirname "$spc_path"; )";
205 #declare -p spc_path spc_dir; # debug
206 #### Prepare file destination
207 if [[ ! -d "$spc_dir" ]]; then
208 must mkdir -p "$spc_dir" && \
209 yell "STATUS
:Created dir
:${spc_dir}";
211 if [[ -f "$spc_path" ]]; then
212 die "FATAL
:File already exists
:${spc_path}";
214 must touch "$spc_path";
216 #### Append subpage path to subpage list
217 printf "%s
\n" "$sp_path" >> "$p_spl";
219 ### Write subpage content
220 must printf "%s
\n" "$line" >> "$spc_path";
223 yell "STATUS
:Finished assemble_subpage_ftree
().
"; # debug
224 }; # process input wikicode into subpage content files and subpage list
226 # Input: var p_spc path subpage content
228 printf -- "\n----<onlyinclude
>\n";
230 printf -- "\n</onlyinclude
>----\n";
231 }; # print wikicode content
234 printf -- "\n==References
==\n<references
/>\n"
235 printf -- "\n==Footnotes
==\n<references group
=fn
/>\n";
236 printf -- "\n==Comments
==\n<references group
=cmt
/>\n";
238 }; # print wikicode footer
239 create_output_wikicode() {
240 # Desc: Use subpage list and subpage content files to create
241 # output subpage wikicode.
242 # Input: var p_spl path subpage list file
243 # var p_splv path subpage list file (validated)
244 # file ${p_spl} subpage list file
245 # file ${p_splv} subpage list file (validated)
246 # var d_out path directory for output
247 # Depends: get_path_fork_level()
248 # prune_path_rootside()
249 # get_path_hierarchy_level()
250 # validate_subpage_list()
251 # Output: files subpages in $d_out
253 yell "Running create_output_wikicode
().
"; # debug
255 # Read subpage list files into arrays.
256 local -a lines_spl lines_splv;
257 mapfile -t lines_spl < "$p_spl";
258 mapfile -t lines_splv < "$p_splv";
259 ## Add extra blank lines for couple line comparisons
262 declare -p lines_spl; # debug
264 # Check that subpage list files have same line counts
265 lc_spl="${#lines_spl[@]}";
266 lc_splv="${#lines_splv[@]}";
267 if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then
268 die "FATAL
:Different line counts
for subpage lists
:$
(declare -p lc_spl lc_splv
;)";
270 declare -p lc_spl lc_splv; # debug
272 # Initialize subpage list wikicode file
273 must touch "$p_splwc";
274 printf "==Stats
==\n\n==Subpages
==\n" >> "$p_splwc";
276 # Read content files according to subpage list file
277 # Note: $i corresponds to “next” line ($lnext). Therefore, use
278 # $((i-1)) to access the “current” ($lcurr) line. This offset is
279 # because subpage list lines are compared using lagging line
281 for i in "${!lines_spl[@]}"; do
282 declare -p i; # debug;
284 # Check subpage content files
285 f_spc="${lines_spl[i-1]}.content
";
286 p_spc="${d_out}/subpages
/${f_spc}";
287 declare -p f_spc p_spc;
288 ## Exit if subpage content file missing
289 if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then
290 die "FATAL
:Subpage content
file missing
:$p_spc"; fi;
292 # Prepare output subpage wikicode files
293 f_spwc="${lines_splv[i-1]}.
wc";
294 p_spwc="${d_out}/subpages
/${f_spwc}"; # use validated subpage name
295 declare -p f_spwc p_spwc; # debug
296 if [[ "$i" -gt 0 ]]; then
297 must touch "$p_spwc";
298 ## Append subpage list wikicode file
299 printf "* [[/%s
]]\n" "${lines_splv[i-1]}" >> "$p_splwc";
302 # Advance input lines
305 lnext="${lines_splv[i]}";
306 declare -p lprev lcurr lnext; # debug
308 # Update hierarchy tracker states
309 lprev_hier="$lcurr_hier";
310 lcurr_hier="$lnext_hier";
311 lnext_hier="$
(echo "$lnext" | get_path_hierarchy_level
)";
313 # Skip first iteration
314 if [[ "$i" -eq 0 ]]; then
315 yell "$i:DEBUG
:Skipping first iteration.
"; # debug
316 printf -- "----\n" 1>&2; # debug
319 # Get path fork levels
320 fork_level_next="$
(get_path_fork_level
"$lcurr" "$lnext")";
321 fork_level_prev="$
(get_path_fork_level
"$lcurr" "$lprev")";
323 # Count relative ups needed (`../`)
324 relups_next="$
((lcurr_hier
- fork_level_next
+ 1))";
325 relups_prev="$
((lcurr_hier
- fork_level_prev
+ 1))";
327 # Initialize Next and Prev links with relative ups to fork.
329 for (( j=0; j<relups_next; j++ )); do link_next+="..
/"; done;
330 if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive
332 for (( j=0; j<relups_prev; j++ )); do link_prev+="..
/"; done;
334 # Append branchs from fork to Next and Prev targets
335 link_next+="$
(prune_path_rootside
"$lnext" "$fork_level_next")";
336 link_prev+="$
(prune_path_rootside
"$lprev" "$fork_level_prev")";
338 # Print navigation link wikicode
339 if [[ -z "$lprev" ]]; then
340 printf "[[%s|Next
]], [[..
/|Up
]]\n" "$link_next" >> "$p_spwc";
341 elif [[ -n "$lnext" ]]; then
342 printf "[[%s|Next
]], [[%s|Previous
]], [[..
/|Up
]]\n" "$link_next" "$link_prev" >> "$p_spwc";
343 elif [[ -z "$lnext" ]]; then
344 printf "[[%s|Previous
]], [[..
/|Up
]]\n" "$link_prev" >> "$p_spwc";
346 yell "FATAL
:Here be dragons.
";
349 # Print subpage content
350 print_wc_content >> "$p_spwc";
351 print_wc_footer >> "$p_spwc";
353 declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug
354 declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug
355 declare -p link_next link_prev; # debug
356 printf "====================\n" # debug
359 # Add footer to subpage list wikicode file
360 print_wc_footer >> "$p_splwc";
362 yell "STATUS
:Finished create_output_wikicode
().
"; # debug
363 }; # generate output subpage wikicode
366 declare -g fp_in="$1"; # input file path
367 assemble_subpage_ftree;
368 validate_subpage_list < "$p_spl" > "$p_splv";
369 create_output_wikicode;
374 # Author: Steven Baltakatei Sandoval
380 # <!-- @subpage:Introduction -->
381 # This is an introducton.
382 # <!-- @subpage:Foreword -->
383 # This is a foreword.
385 # <!-- @subpage:Part 1/Chapter 1 -->
387 # <!-- @subpage:Part 1/Chapter 2 -->
389 # <!-- @subpage:Part 1/Chapter 2/Section A -->
391 # <!-- @subpage:Part 2/ -->
393 # <!-- @subpage:Part 2/Chapter 1 -->