Commit | Line | Data |
---|---|---|
fea1bafc SBS |
1 | #!/bin/bash |
2 | # Desc: Convert wikicode to subpages | |
3 | # Usage: mw_wc2sp.sh [path file] | |
4 | # Input: arg1 path input wikicode file | |
5 | # Output: files wikicode file tree | |
6 | # Depends: Bash 5.1.16, GNU Coreutils 8.32 | |
7 | # Version: 0.0.1 | |
8 | ||
9 | re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern | |
10 | d_out=./wikicode/; # default output dir | |
11 | f_spl="subpage_list.txt"; | |
12 | p_spl="${d_out}/${f_spl}"; | |
13 | f_splv="subpage_list_validated.txt"; | |
14 | p_splv="${d_out}/${f_splv}"; | |
15 | ||
16 | yell() { echo "$0: $*" >&2; } # print script path and all args to stderr | |
17 | die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status | |
18 | must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails | |
19 | get_path_fork_level() { | |
20 | # Desc: Get fork level from two paths | |
21 | # Input: arg1 str path | |
22 | # arg2 str path | |
23 | # Output: stdout int fork level | |
24 | # Version: 0.0.1 | |
25 | local path1="$1"; | |
26 | local path2="$2"; | |
27 | ||
28 | # Squeeze multiple slashes and remove trailing slashes | |
29 | path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )"; | |
30 | path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )"; | |
31 | ||
32 | # Check for mixed absolute/relative paths | |
33 | if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then | |
34 | flag_root=true; | |
35 | # Remove initial / | |
36 | path1="$(echo "$path1" | sed -e 's:^/::' )"; | |
37 | path2="$(echo "$path2" | sed -e 's:^/::' )"; | |
38 | elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then | |
39 | flag_root=false; | |
40 | else | |
41 | declare -p path1 path2 flag_root; | |
42 | echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; | |
43 | return 1; | |
44 | fi; | |
45 | ||
46 | # Save path as arrays with `/` as element delimiter | |
47 | local IFS='/'; | |
48 | read -ra parts1 <<< "$path1"; | |
49 | read -ra parts2 <<< "$path2"; | |
50 | ||
51 | # Get fork level by counting identical path elements from rootside | |
52 | local fork_level=0; | |
53 | for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do | |
54 | if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi; | |
55 | ((fork_level++)); | |
56 | done; | |
57 | ||
58 | echo "$fork_level"; | |
59 | #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug | |
60 | return 0; | |
61 | }; # Get fork level int from two paths | |
62 | prune_path_rootside() { | |
63 | # Desc: Prunes a path from the root-side to a specified prune level. | |
64 | # Input: arg1 str path | |
65 | # arg2 int prune level (0-indexed) | |
66 | # Depends: GNU sed 4.8 | |
67 | # Version: 0.0.1 | |
68 | local path="$1"; | |
69 | local prune_level="$2"; | |
70 | ||
71 | # Check for absolute or relative path | |
72 | if [[ "$path" =~ ^/ ]]; then | |
73 | flag_root=true; | |
74 | # Remove initial / | |
75 | path="$(echo "$path" | sed -e 's:^/::' )"; | |
76 | else | |
77 | flag_root=false; | |
78 | fi; | |
79 | ||
80 | # Save path as array with `/` as element delimiter | |
81 | local IFS='/'; | |
82 | read -ra parts <<< "$path"; | |
83 | ||
84 | # Assemble pruned path from prune_level | |
85 | local pruned_path=""; | |
86 | for (( i=prune_level; i<${#parts[@]}; i++ )); do | |
87 | pruned_path+="${parts[i]}/"; | |
88 | done; | |
89 | ||
90 | # Trim trailing `/` delimiter | |
91 | pruned_path=$(echo "$pruned_path" | sed 's:/*$::'); | |
92 | ||
93 | # Restore initial / if appropriate | |
94 | if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then | |
95 | pruned_path=/"$pruned_path"; | |
96 | fi; | |
97 | ||
98 | # Output pruned path | |
99 | echo "$pruned_path"; | |
100 | #declare -p path prune_level parts pruned_path && printf "========\n"; # debug | |
101 | return 0; | |
102 | }; # prune path rootside to int specified level | |
103 | get_path_hierarchy_level() { | |
104 | # Desc: Outputs hierarchy level of input paths | |
105 | # Example: $ cat lines.txt | get_path_hierarchy_level | |
106 | # Input: stdin str lines with /-delimited paths | |
107 | # Output: stdout int hierarchy level of each path | |
108 | # Version: 0.0.1 | |
109 | ||
110 | local line level; | |
111 | local flag_root; | |
112 | local -a output; | |
113 | ||
114 | n=0; | |
115 | while read -r line; do | |
116 | # Check for mixed absolute/relative paths. | |
117 | if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then | |
118 | flag_root=true; | |
119 | else | |
120 | flag_root=false; | |
121 | fi; | |
122 | if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \ | |
123 | { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then | |
124 | echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1; | |
125 | fi; | |
126 | ||
127 | # Squeeze multiple slashes and remove trailing slashes | |
128 | line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )"; | |
129 | ||
130 | # Count the number of slashes to determine hierarchy level | |
131 | level="$(echo "$line" | awk -F'/' '{print NF-1}' )"; | |
132 | if [[ "$flag_root" == "true" ]]; then ((level--)); fi; | |
133 | ||
134 | # Append to output | |
135 | output+=("$level"); | |
136 | #declare -p flag_root level; # debug | |
137 | ((n++)); | |
138 | done; | |
139 | # Print output | |
140 | printf "%s\n" "${output[@]}"; | |
141 | }; # return hierarchy level of lines as integers | |
142 | validate_subpage_list() { | |
143 | # Desc: Check for illegal characters in subpage titles | |
144 | # Input: stdin unvalidated subpage list | |
145 | # Output: stdout validated subpage list | |
146 | # Depends: BK-2020-03 read_stdin(), yell(), die() | |
147 | # GNU sed v4.8 | |
148 | while read -r line; do | |
149 | ||
150 | # Reject chars illegal in Mediawiki page titles. | |
151 | re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, } | |
152 | if [[ "$line" =~ $re_illegal ]]; then | |
153 | die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line"; | |
154 | fi; | |
155 | ||
156 | # Reject trailing spaces. | |
157 | re_ts=' $'; # match trailing space | |
158 | if [[ "$line" =~ $re_ts ]]; then | |
159 | die "FATAL:Trailing spaces not allowed:$line"; | |
160 | fi; | |
161 | ||
162 | # Replace some chars with HTML-style codes | |
163 | ## replace ampersand & with & # must be first | |
164 | ## replace double quote " with " | |
165 | ## replace single quote ' with ' | |
166 | line="$(sed \ | |
167 | -e 's/&/\&/g' \ | |
168 | -e 's/"/\"/g' \ | |
169 | -e "s/'/\'/g" \ | |
170 | <<< "$line" )" || { echo "FATAL:Error running sed."; }; | |
171 | printf "%s\n" "$line"; | |
172 | done || { | |
173 | echo "FATAL:Error reading stdin." 1>&2; return 1; }; | |
174 | }; | |
175 | check_input() { | |
176 | local path_in="$1"; | |
177 | if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi; | |
178 | }; # check input | |
179 | assemble_subpage_ftree() { | |
180 | # Desc: Identify subpage markers in input wikicode file to create | |
181 | # subpage list and subpage content files | |
182 | # Input: var fp_in path input file | |
183 | # var re_sp regex for identifying subpage markers | |
184 | # var d_out path directory for output | |
185 | # var p_spl path subpage list file | |
186 | #declare -p re_sp d_out f_spl p_spl fp_in; # debug | |
187 | ||
188 | yell "STATUS:Running assemble_subpage_ftree()."; # debug | |
189 | ||
190 | spc_path="${d_out}/presubpage.content"; # default destination for content before subpage detected | |
191 | ||
192 | ## Process input line-by-line | |
193 | while read -r line; do | |
194 | #declare -p line re_sp; # debug | |
195 | ### Check for subpage marker | |
196 | if [[ "$line" =~ $re_sp ]]; then | |
197 | #### Identify new subpage path | |
198 | sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )"; | |
199 | # declare -p sp_path; # debug | |
200 | #### Update subpage content file path | |
201 | spc_path="${d_out}/${sp_path}.content"; | |
202 | spc_dir="$(dirname "$spc_path"; )"; | |
203 | #declare -p spc_path spc_dir; # debug | |
204 | #### Prepare file destination | |
205 | if [[ ! -d "$spc_dir" ]]; then | |
206 | must mkdir -p "$spc_dir" && \ | |
207 | yell "STATUS:Created dir:${spc_dir}"; | |
208 | fi; | |
209 | if [[ -f "$spc_path" ]]; then | |
210 | die "FATAL:File already exists:${spc_path}"; | |
211 | else | |
212 | must touch "$spc_path"; | |
213 | fi; | |
214 | #### Append subpage path to subpage list | |
215 | printf "%s\n" "$sp_path" >> "$p_spl"; | |
216 | fi; | |
217 | ### Write subpage content | |
218 | must printf "%s\n" "$line" >> "$spc_path"; | |
219 | done < "${fp_in}"; | |
220 | ||
221 | yell "STATUS:Finished assemble_subpage_ftree()."; # debug | |
222 | }; # process input wikicode into subpage content files and subpage list | |
223 | create_output_wikicode() { | |
224 | # Desc: Use subpage list and subpage content files to create | |
225 | # output subpage wikicode. | |
226 | # Input: var p_spl path subpage list file | |
227 | # var p_splv path subpage list file (validated) | |
228 | # file ${p_spl} subpage list file | |
229 | # file ${p_splv} subpage list file (validated) | |
230 | # var d_out path directory for output | |
231 | # Depends: get_path_fork_level() | |
232 | # prune_path_rootside() | |
233 | # get_path_hierarchy_level() | |
234 | # validate_subpage_list() | |
235 | # Output: files subpages in $d_out | |
236 | ||
237 | yell "Running create_output_wikicode()."; # debug | |
238 | ||
239 | # Read subpage list files into arrays. | |
240 | local -a lines_spl lines_splv; | |
241 | mapfile -t lines_spl < "$p_spl"; | |
242 | mapfile -t lines_splv < "$p_splv"; | |
243 | ## Add extra blank lines for couple line comparisons | |
244 | lines_spl+=(''); | |
245 | lines_splv+=(''); | |
246 | declare -p lines_spl; # debug | |
247 | ||
248 | # Check that subpage list files have same line counts | |
249 | lc_spl="${#lines_spl[@]}"; | |
250 | lc_splv="${#lines_splv[@]}"; | |
251 | if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then | |
252 | die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)"; | |
253 | fi; | |
254 | declare -p lc_spl lc_splv; # debug | |
255 | ||
256 | # Read content files according to subpage list file | |
257 | # Note: $i corresponds to “next” line ($lnext). Therefore, use | |
258 | # $((i-1)) to access the “current” ($lcurr) line. This offset is | |
259 | # because subpage list lines are compared using lagging line | |
260 | # comparison. | |
261 | for i in "${!lines_spl[@]}"; do | |
262 | declare -p i; # debug; | |
263 | ||
264 | # Check subpage content files | |
265 | f_spc="${lines_spl[i-1]}.content"; | |
266 | p_spc="${d_out}/${f_spc}"; | |
267 | declare -p f_spc p_spc; | |
268 | ## Exit if subpage content file missing | |
269 | if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then | |
270 | die "FATAL:Subpage content file missing:$p_spc"; fi; | |
271 | ||
272 | # Prepare output subpage wikicode files | |
273 | f_spwc="${lines_splv[i-1]}.wc"; | |
274 | p_spwc="${d_out}/${f_spwc}"; # use validated subpage name | |
275 | declare -p f_spwc p_spwc; # debug | |
276 | if [[ "$i" -gt 0 ]]; then must touch "$p_spwc"; fi; | |
277 | ||
278 | # Advance input lines | |
279 | lprev="$lcurr"; | |
280 | lcurr="$lnext"; | |
281 | lnext="${lines_splv[i]}"; | |
282 | declare -p lprev lcurr lnext; # debug | |
283 | ||
284 | # Update hierarchy tracker states | |
285 | lprev_hier="$lcurr_hier"; | |
286 | lcurr_hier="$lnext_hier"; | |
287 | lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)"; | |
288 | ||
289 | # Skip first iteration | |
290 | if [[ "$i" -eq 0 ]]; then | |
291 | yell "$i:DEBUG:Skipping first iteration."; # debug | |
292 | printf -- "----\n" 1>&2; # debug | |
293 | continue; fi; | |
294 | ||
295 | # Get path fork levels | |
296 | fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")"; | |
297 | fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")"; | |
298 | ||
299 | # Count relative ups needed (`../`) | |
300 | relups_next="$((lcurr_hier - fork_level_next + 1))"; | |
301 | relups_prev="$((lcurr_hier - fork_level_prev + 1))"; | |
302 | ||
303 | # Initialize Next and Prev links with relative ups to fork. | |
304 | link_next=""; | |
305 | for (( j=0; j<relups_next; j++ )); do link_next+="../"; done; | |
306 | if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive | |
307 | link_prev=""; | |
308 | for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done; | |
309 | ||
310 | # Append branchs from fork to Next and Prev targets | |
311 | link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")"; | |
312 | link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")"; | |
313 | ||
314 | # Print navigation link wikicode | |
315 | if [[ -z "$lprev" ]]; then | |
316 | printf "[[%s|Next]], [[../|Up]]\n" "$link_next" >> "$p_spwc"; | |
317 | elif [[ -n "$lnext" ]]; then | |
318 | printf "[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev" >> "$p_spwc"; | |
319 | elif [[ -z "$lnext" ]]; then | |
320 | printf "[[%s|Previous]], [[../|Up]]\n" "$link_prev" >> "$p_spwc"; | |
321 | else | |
322 | yell "FATAL:Here be dragons."; | |
323 | fi; | |
324 | ||
325 | # Print subpage content | |
326 | printf -- "\n----<onlyinclude>\n" >> "$p_spwc"; | |
327 | cat "$p_spc" >> "$p_spwc"; | |
328 | printf -- "\n</onlyinclude>----\n" >> "$p_spwc"; | |
329 | printf -- "\n==References==\n<references />\n" >> "$p_spwc"; | |
330 | printf -- "\n==Footnotes==\n<references group=fn />\n" >> "$p_spwc"; | |
331 | printf -- "\n==Comments==\n<references group=cmt />\n" >> "$p_spwc"; | |
332 | printf -- "\n"; >> "$p_spwc"; | |
333 | ||
334 | declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug | |
335 | declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug | |
336 | declare -p link_next link_prev; # debug | |
337 | printf "====================\n" # debug | |
338 | done; | |
339 | ||
340 | yell "STATUS:Finished create_output_wikicode()."; # debug | |
341 | }; # generate output subpage wikicode | |
342 | main() { | |
343 | check_input "$@"; | |
344 | declare -g fp_in="$1"; # input file path | |
345 | assemble_subpage_ftree; | |
346 | validate_subpage_list < "$p_spl" > "$p_splv"; | |
347 | create_output_wikicode; | |
348 | }; # main program | |
349 | ||
350 | main "$@"; | |
351 | ||
352 | # Author: Steven Baltakatei Sandoval | |
353 | # License: GPLv3+ | |
354 | ||
355 | ||
356 | # Example input: | |
357 | # ``` | |
358 | # <!-- @subpage:Introduction --> | |
359 | # This is an introducton. | |
360 | # <!-- @subpage:Foreword --> | |
361 | # This is a foreword. | |
362 | ||
363 | # <!-- @subpage:Part 1/Chapter 1 --> | |
364 | # Blah. | |
365 | # <!-- @subpage:Part 1/Chapter 2 --> | |
366 | # Blah. | |
367 | # <!-- @subpage:Part 1/Chapter 2/Section A --> | |
368 | # Blabbity blah. | |
369 | # <!-- @subpage:Part 2/ --> | |
370 | # Blah. | |
371 | # <!-- @subpage:Part 2/Chapter 1 --> | |
372 | # More blah. | |
373 | # ``` |