Commit | Line | Data |
---|---|---|
fea1bafc SBS |
1 | #!/bin/bash |
2 | # Desc: Convert wikicode to subpages | |
3 | # Usage: mw_wc2sp.sh [path file] | |
4 | # Input: arg1 path input wikicode file | |
5 | # Output: files wikicode file tree | |
6 | # Depends: Bash 5.1.16, GNU Coreutils 8.32 | |
a80b26ff | 7 | # Version: 0.1.0 |
fea1bafc SBS |
8 | |
9 | re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern | |
10 | d_out=./wikicode/; # default output dir | |
a80b26ff | 11 | f_spl="subpage_list.txt"; # subpage title list |
fea1bafc | 12 | p_spl="${d_out}/${f_spl}"; |
a80b26ff | 13 | f_splv="subpage_list_validated.txt"; # subpage title list (validated) |
fea1bafc | 14 | p_splv="${d_out}/${f_splv}"; |
a80b26ff SBS |
15 | f_splwc="subpage_list.wc"; # subpage list wikicode |
16 | p_splwc="${d_out}/${f_splwc}"; | |
fea1bafc SBS |
17 | |
18 | yell() { echo "$0: $*" >&2; } # print script path and all args to stderr | |
19 | die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status | |
20 | must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails | |
21 | get_path_fork_level() { | |
22 | # Desc: Get fork level from two paths | |
23 | # Input: arg1 str path | |
24 | # arg2 str path | |
25 | # Output: stdout int fork level | |
26 | # Version: 0.0.1 | |
27 | local path1="$1"; | |
28 | local path2="$2"; | |
29 | ||
30 | # Squeeze multiple slashes and remove trailing slashes | |
31 | path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )"; | |
32 | path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )"; | |
33 | ||
34 | # Check for mixed absolute/relative paths | |
35 | if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then | |
36 | flag_root=true; | |
37 | # Remove initial / | |
38 | path1="$(echo "$path1" | sed -e 's:^/::' )"; | |
39 | path2="$(echo "$path2" | sed -e 's:^/::' )"; | |
40 | elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then | |
41 | flag_root=false; | |
42 | else | |
43 | declare -p path1 path2 flag_root; | |
44 | echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; | |
45 | return 1; | |
46 | fi; | |
47 | ||
48 | # Save path as arrays with `/` as element delimiter | |
49 | local IFS='/'; | |
50 | read -ra parts1 <<< "$path1"; | |
51 | read -ra parts2 <<< "$path2"; | |
52 | ||
53 | # Get fork level by counting identical path elements from rootside | |
54 | local fork_level=0; | |
55 | for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do | |
56 | if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi; | |
57 | ((fork_level++)); | |
58 | done; | |
59 | ||
60 | echo "$fork_level"; | |
61 | #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug | |
62 | return 0; | |
63 | }; # Get fork level int from two paths | |
64 | prune_path_rootside() { | |
65 | # Desc: Prunes a path from the root-side to a specified prune level. | |
66 | # Input: arg1 str path | |
67 | # arg2 int prune level (0-indexed) | |
68 | # Depends: GNU sed 4.8 | |
69 | # Version: 0.0.1 | |
70 | local path="$1"; | |
71 | local prune_level="$2"; | |
72 | ||
73 | # Check for absolute or relative path | |
74 | if [[ "$path" =~ ^/ ]]; then | |
75 | flag_root=true; | |
76 | # Remove initial / | |
77 | path="$(echo "$path" | sed -e 's:^/::' )"; | |
78 | else | |
79 | flag_root=false; | |
80 | fi; | |
81 | ||
82 | # Save path as array with `/` as element delimiter | |
83 | local IFS='/'; | |
84 | read -ra parts <<< "$path"; | |
85 | ||
86 | # Assemble pruned path from prune_level | |
87 | local pruned_path=""; | |
88 | for (( i=prune_level; i<${#parts[@]}; i++ )); do | |
89 | pruned_path+="${parts[i]}/"; | |
90 | done; | |
91 | ||
92 | # Trim trailing `/` delimiter | |
93 | pruned_path=$(echo "$pruned_path" | sed 's:/*$::'); | |
94 | ||
95 | # Restore initial / if appropriate | |
96 | if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then | |
97 | pruned_path=/"$pruned_path"; | |
98 | fi; | |
99 | ||
100 | # Output pruned path | |
101 | echo "$pruned_path"; | |
102 | #declare -p path prune_level parts pruned_path && printf "========\n"; # debug | |
103 | return 0; | |
104 | }; # prune path rootside to int specified level | |
105 | get_path_hierarchy_level() { | |
106 | # Desc: Outputs hierarchy level of input paths | |
107 | # Example: $ cat lines.txt | get_path_hierarchy_level | |
108 | # Input: stdin str lines with /-delimited paths | |
109 | # Output: stdout int hierarchy level of each path | |
110 | # Version: 0.0.1 | |
111 | ||
112 | local line level; | |
113 | local flag_root; | |
114 | local -a output; | |
115 | ||
116 | n=0; | |
117 | while read -r line; do | |
118 | # Check for mixed absolute/relative paths. | |
119 | if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then | |
120 | flag_root=true; | |
121 | else | |
122 | flag_root=false; | |
123 | fi; | |
124 | if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \ | |
125 | { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then | |
126 | echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1; | |
127 | fi; | |
128 | ||
129 | # Squeeze multiple slashes and remove trailing slashes | |
130 | line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )"; | |
131 | ||
132 | # Count the number of slashes to determine hierarchy level | |
133 | level="$(echo "$line" | awk -F'/' '{print NF-1}' )"; | |
134 | if [[ "$flag_root" == "true" ]]; then ((level--)); fi; | |
135 | ||
136 | # Append to output | |
137 | output+=("$level"); | |
138 | #declare -p flag_root level; # debug | |
139 | ((n++)); | |
140 | done; | |
141 | # Print output | |
142 | printf "%s\n" "${output[@]}"; | |
143 | }; # return hierarchy level of lines as integers | |
144 | validate_subpage_list() { | |
145 | # Desc: Check for illegal characters in subpage titles | |
146 | # Input: stdin unvalidated subpage list | |
147 | # Output: stdout validated subpage list | |
148 | # Depends: BK-2020-03 read_stdin(), yell(), die() | |
149 | # GNU sed v4.8 | |
150 | while read -r line; do | |
151 | ||
152 | # Reject chars illegal in Mediawiki page titles. | |
153 | re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, } | |
154 | if [[ "$line" =~ $re_illegal ]]; then | |
155 | die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line"; | |
156 | fi; | |
157 | ||
158 | # Reject trailing spaces. | |
159 | re_ts=' $'; # match trailing space | |
160 | if [[ "$line" =~ $re_ts ]]; then | |
161 | die "FATAL:Trailing spaces not allowed:$line"; | |
162 | fi; | |
163 | ||
164 | # Replace some chars with HTML-style codes | |
165 | ## replace ampersand & with & # must be first | |
166 | ## replace double quote " with " | |
167 | ## replace single quote ' with ' | |
168 | line="$(sed \ | |
169 | -e 's/&/\&/g' \ | |
170 | -e 's/"/\"/g' \ | |
171 | -e "s/'/\'/g" \ | |
172 | <<< "$line" )" || { echo "FATAL:Error running sed."; }; | |
173 | printf "%s\n" "$line"; | |
174 | done || { | |
175 | echo "FATAL:Error reading stdin." 1>&2; return 1; }; | |
176 | }; | |
177 | check_input() { | |
178 | local path_in="$1"; | |
179 | if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi; | |
180 | }; # check input | |
181 | assemble_subpage_ftree() { | |
182 | # Desc: Identify subpage markers in input wikicode file to create | |
183 | # subpage list and subpage content files | |
184 | # Input: var fp_in path input file | |
185 | # var re_sp regex for identifying subpage markers | |
186 | # var d_out path directory for output | |
187 | # var p_spl path subpage list file | |
188 | #declare -p re_sp d_out f_spl p_spl fp_in; # debug | |
189 | ||
190 | yell "STATUS:Running assemble_subpage_ftree()."; # debug | |
191 | ||
192 | spc_path="${d_out}/presubpage.content"; # default destination for content before subpage detected | |
193 | ||
194 | ## Process input line-by-line | |
195 | while read -r line; do | |
196 | #declare -p line re_sp; # debug | |
197 | ### Check for subpage marker | |
198 | if [[ "$line" =~ $re_sp ]]; then | |
199 | #### Identify new subpage path | |
200 | sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )"; | |
201 | # declare -p sp_path; # debug | |
202 | #### Update subpage content file path | |
203 | spc_path="${d_out}/${sp_path}.content"; | |
204 | spc_dir="$(dirname "$spc_path"; )"; | |
205 | #declare -p spc_path spc_dir; # debug | |
206 | #### Prepare file destination | |
207 | if [[ ! -d "$spc_dir" ]]; then | |
208 | must mkdir -p "$spc_dir" && \ | |
209 | yell "STATUS:Created dir:${spc_dir}"; | |
210 | fi; | |
211 | if [[ -f "$spc_path" ]]; then | |
212 | die "FATAL:File already exists:${spc_path}"; | |
213 | else | |
214 | must touch "$spc_path"; | |
215 | fi; | |
216 | #### Append subpage path to subpage list | |
217 | printf "%s\n" "$sp_path" >> "$p_spl"; | |
218 | fi; | |
219 | ### Write subpage content | |
220 | must printf "%s\n" "$line" >> "$spc_path"; | |
221 | done < "${fp_in}"; | |
222 | ||
223 | yell "STATUS:Finished assemble_subpage_ftree()."; # debug | |
224 | }; # process input wikicode into subpage content files and subpage list | |
a80b26ff SBS |
225 | print_wc_content() { |
226 | # Input: var p_spc path subpage content | |
227 | # Output: stdout | |
228 | printf -- "\n----<onlyinclude>\n"; | |
229 | cat "$p_spc"; | |
230 | printf -- "\n</onlyinclude>----\n"; | |
231 | }; # print wikicode content | |
232 | print_wc_footer() { | |
233 | # Output: stdout | |
234 | printf -- "\n==References==\n<references />\n" | |
235 | printf -- "\n==Footnotes==\n<references group=fn />\n"; | |
236 | printf -- "\n==Comments==\n<references group=cmt />\n"; | |
237 | printf -- "\n"; | |
238 | }; # print wikicode footer | |
fea1bafc SBS |
239 | create_output_wikicode() { |
240 | # Desc: Use subpage list and subpage content files to create | |
241 | # output subpage wikicode. | |
242 | # Input: var p_spl path subpage list file | |
243 | # var p_splv path subpage list file (validated) | |
244 | # file ${p_spl} subpage list file | |
245 | # file ${p_splv} subpage list file (validated) | |
246 | # var d_out path directory for output | |
247 | # Depends: get_path_fork_level() | |
248 | # prune_path_rootside() | |
249 | # get_path_hierarchy_level() | |
250 | # validate_subpage_list() | |
251 | # Output: files subpages in $d_out | |
252 | ||
253 | yell "Running create_output_wikicode()."; # debug | |
254 | ||
255 | # Read subpage list files into arrays. | |
256 | local -a lines_spl lines_splv; | |
257 | mapfile -t lines_spl < "$p_spl"; | |
258 | mapfile -t lines_splv < "$p_splv"; | |
259 | ## Add extra blank lines for couple line comparisons | |
260 | lines_spl+=(''); | |
261 | lines_splv+=(''); | |
262 | declare -p lines_spl; # debug | |
263 | ||
264 | # Check that subpage list files have same line counts | |
265 | lc_spl="${#lines_spl[@]}"; | |
266 | lc_splv="${#lines_splv[@]}"; | |
267 | if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then | |
268 | die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)"; | |
269 | fi; | |
270 | declare -p lc_spl lc_splv; # debug | |
a80b26ff SBS |
271 | |
272 | # Initialize subpage list wikicode file | |
273 | must touch "$p_splwc"; | |
274 | printf "==Stats==\n\n==Subpages==\n" >> "$p_splwc"; | |
fea1bafc SBS |
275 | |
276 | # Read content files according to subpage list file | |
277 | # Note: $i corresponds to “next” line ($lnext). Therefore, use | |
278 | # $((i-1)) to access the “current” ($lcurr) line. This offset is | |
279 | # because subpage list lines are compared using lagging line | |
280 | # comparison. | |
281 | for i in "${!lines_spl[@]}"; do | |
282 | declare -p i; # debug; | |
283 | ||
284 | # Check subpage content files | |
285 | f_spc="${lines_spl[i-1]}.content"; | |
286 | p_spc="${d_out}/${f_spc}"; | |
287 | declare -p f_spc p_spc; | |
288 | ## Exit if subpage content file missing | |
289 | if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then | |
290 | die "FATAL:Subpage content file missing:$p_spc"; fi; | |
291 | ||
292 | # Prepare output subpage wikicode files | |
293 | f_spwc="${lines_splv[i-1]}.wc"; | |
294 | p_spwc="${d_out}/${f_spwc}"; # use validated subpage name | |
295 | declare -p f_spwc p_spwc; # debug | |
a80b26ff SBS |
296 | if [[ "$i" -gt 0 ]]; then |
297 | must touch "$p_spwc"; | |
298 | ## Append subpage list wikicode file | |
299 | printf "* [[/%s]]\n" "${lines_splv[i-1]}" >> "$p_splwc"; | |
300 | fi; | |
fea1bafc SBS |
301 | |
302 | # Advance input lines | |
303 | lprev="$lcurr"; | |
304 | lcurr="$lnext"; | |
305 | lnext="${lines_splv[i]}"; | |
306 | declare -p lprev lcurr lnext; # debug | |
307 | ||
308 | # Update hierarchy tracker states | |
309 | lprev_hier="$lcurr_hier"; | |
310 | lcurr_hier="$lnext_hier"; | |
311 | lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)"; | |
312 | ||
313 | # Skip first iteration | |
314 | if [[ "$i" -eq 0 ]]; then | |
315 | yell "$i:DEBUG:Skipping first iteration."; # debug | |
316 | printf -- "----\n" 1>&2; # debug | |
317 | continue; fi; | |
318 | ||
319 | # Get path fork levels | |
320 | fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")"; | |
321 | fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")"; | |
322 | ||
323 | # Count relative ups needed (`../`) | |
324 | relups_next="$((lcurr_hier - fork_level_next + 1))"; | |
325 | relups_prev="$((lcurr_hier - fork_level_prev + 1))"; | |
326 | ||
327 | # Initialize Next and Prev links with relative ups to fork. | |
328 | link_next=""; | |
329 | for (( j=0; j<relups_next; j++ )); do link_next+="../"; done; | |
330 | if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive | |
331 | link_prev=""; | |
332 | for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done; | |
333 | ||
334 | # Append branchs from fork to Next and Prev targets | |
335 | link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")"; | |
336 | link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")"; | |
337 | ||
338 | # Print navigation link wikicode | |
339 | if [[ -z "$lprev" ]]; then | |
340 | printf "[[%s|Next]], [[../|Up]]\n" "$link_next" >> "$p_spwc"; | |
341 | elif [[ -n "$lnext" ]]; then | |
342 | printf "[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev" >> "$p_spwc"; | |
343 | elif [[ -z "$lnext" ]]; then | |
344 | printf "[[%s|Previous]], [[../|Up]]\n" "$link_prev" >> "$p_spwc"; | |
345 | else | |
346 | yell "FATAL:Here be dragons."; | |
347 | fi; | |
348 | ||
349 | # Print subpage content | |
a80b26ff SBS |
350 | print_wc_content >> "$p_spwc"; |
351 | print_wc_footer >> "$p_spwc"; | |
fea1bafc SBS |
352 | |
353 | declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug | |
354 | declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug | |
355 | declare -p link_next link_prev; # debug | |
356 | printf "====================\n" # debug | |
357 | done; | |
358 | ||
a80b26ff SBS |
359 | # Add footer to subpage list wikicode file |
360 | print_wc_footer >> "$p_splwc"; | |
361 | ||
fea1bafc SBS |
362 | yell "STATUS:Finished create_output_wikicode()."; # debug |
363 | }; # generate output subpage wikicode | |
364 | main() { | |
365 | check_input "$@"; | |
366 | declare -g fp_in="$1"; # input file path | |
367 | assemble_subpage_ftree; | |
368 | validate_subpage_list < "$p_spl" > "$p_splv"; | |
369 | create_output_wikicode; | |
370 | }; # main program | |
371 | ||
372 | main "$@"; | |
373 | ||
374 | # Author: Steven Baltakatei Sandoval | |
375 | # License: GPLv3+ | |
376 | ||
377 | ||
378 | # Example input: | |
379 | # ``` | |
380 | # <!-- @subpage:Introduction --> | |
381 | # This is an introducton. | |
382 | # <!-- @subpage:Foreword --> | |
383 | # This is a foreword. | |
384 | ||
385 | # <!-- @subpage:Part 1/Chapter 1 --> | |
386 | # Blah. | |
387 | # <!-- @subpage:Part 1/Chapter 2 --> | |
388 | # Blah. | |
389 | # <!-- @subpage:Part 1/Chapter 2/Section A --> | |
390 | # Blabbity blah. | |
391 | # <!-- @subpage:Part 2/ --> | |
392 | # Blah. | |
393 | # <!-- @subpage:Part 2/Chapter 1 --> | |
394 | # More blah. | |
395 | # ``` |