2 # Desc: Convert wikicode to subpages 
   3 # Usage: mw_wc2sp.sh [path file] 
   4 # Input:  arg1  path  input wikicode file 
   5 # Output: files       wikicode file tree 
   6 # Depends: Bash 5.1.16, GNU Coreutils 8.32 
   9 re_sp
='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern 
  10 d_out
=.
/wikicode
/; # default output dir 
  11 f_spl
="subpage_list.txt"; # subpage title list 
  12 p_spl
="${d_out}/subpages/${f_spl}"; 
  13 f_splv
="subpage_list_validated.txt"; # subpage title list (validated) 
  14 p_splv
="${d_out}/subpages/${f_splv}"; 
  15 f_splwc
="subpages.wc"; # subpage list wikicode 
  16 p_splwc
="${d_out}/${f_splwc}"; 
  18 yell
() { echo "$0: $*" >&2; } # print script path and all args to stderr 
  19 die
() { yell 
"$*"; exit 111; } # same as yell() but non-zero exit status 
  20 must
() { "$@" || die 
"cannot $*"; } # runs args as command, reports args if command fails 
  21 get_path_fork_level
() { 
  22     # Desc: Get fork level from two paths 
  23     # Input:  arg1    str  path 
  25     # Output: stdout  int  fork level 
  30     # Squeeze multiple slashes and remove trailing slashes 
  31     path1
="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )"; 
  32     path2
="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )"; 
  34     # Check for mixed absolute/relative paths 
  35     if [[ "$path1" =~ ^
/ ]] && [[ "$path2" =~ ^
/ ]]; then 
  38         path1
="$(echo "$path1" | sed -e 's:^/::' )"; 
  39         path2
="$(echo "$path2" | sed -e 's:^/::' )"; 
  40     elif [[ ! "$path1" =~ ^
/ ]] && [[ ! "$path2" =~ ^
/ ]]; then 
  43         declare -p path1 path2 flag_root
; 
  44         echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; 
  48     # Save path as arrays with `/` as element delimiter 
  50     read -ra parts1 
<<< "$path1"; 
  51     read -ra parts2 
<<< "$path2"; 
  53     # Get fork level by counting identical path elements from rootside 
  55     for (( i
=0; i
<${#parts1[@]} && i
<${#parts2[@]}; i
++ )); do 
  56         if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi; 
  61     #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug 
  63 }; # Get fork level int from two paths 
  64 prune_path_rootside
() { 
  65     # Desc: Prunes a path from the root-side to a specified prune level. 
  66     # Input: arg1  str  path 
  67     #        arg2  int  prune level (0-indexed) 
  68     # Depends: GNU sed 4.8 
  71     local prune_level
="$2"; 
  73     # Check for absolute or relative path 
  74     if [[ "$path" =~ ^
/ ]]; then 
  77         path
="$(echo "$path" | sed -e 's:^/::' )"; 
  82     # Save path as array with `/` as element delimiter 
  84     read -ra parts 
<<< "$path"; 
  86     # Assemble pruned path from prune_level 
  88     for (( i
=prune_level
; i
<${#parts[@]}; i
++ )); do 
  89         pruned_path
+="${parts[i]}/"; 
  92     # Trim trailing `/` delimiter 
  93     pruned_path
=$
(echo "$pruned_path" | 
sed 's:/*$::'); 
  95     # Restore initial / if appropriate 
  96     if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then 
  97         pruned_path
=/"$pruned_path"; 
 102     #declare -p path prune_level parts pruned_path && printf "========\n"; # debug 
 104 }; # prune path rootside to int specified level 
 105 get_path_hierarchy_level
() { 
 106     # Desc: Outputs hierarchy level of input paths 
 107     # Example: $ cat lines.txt | get_path_hierarchy_level 
 108     # Input: stdin    str  lines with /-delimited paths 
 109     # Output: stdout  int  hierarchy level of each path 
 117     while read -r line
; do 
 118         # Check for mixed absolute/relative paths. 
 119         if [[ $n -le 0 ]] && [[ "$line" =~ ^
/ ]]; then 
 124         if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^
/ ]]; } || \
 
 125            { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^
/ ]]; } then 
 126             echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1; 
 129         # Squeeze multiple slashes and remove trailing slashes 
 130         line
="$(echo "$line" | tr -s '/' | sed 's:/*$::' )"; 
 132         # Count the number of slashes to determine hierarchy level 
 133         level
="$(echo "$line" | awk -F'/' '{print NF-1}' )"; 
 134         if [[ "$flag_root" == "true" ]]; then ((level--
)); fi; 
 138         #declare -p flag_root level; # debug 
 142     printf "%s\n" "${output[@]}"; 
 143 }; # return hierarchy level of lines as integers 
 144 validate_subpage_list
() { 
 145     # Desc: Check for illegal characters in subpage titles 
 146     # Input:  stdin   unvalidated subpage list 
 147     # Output: stdout  validated subpage list 
 148     # Depends: BK-2020-03 read_stdin(), yell(), die() 
 150     while read -r line
; do 
 152         # Reject chars illegal in Mediawiki page titles. 
 153         re_illegal
='[][><|}{#_]'; #  match illegal page names chars #, <, >, [, ], _, {, |, } 
 154         if [[ "$line" =~ 
$re_illegal ]]; then 
 155             die 
"FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line"; 
 158         # Reject trailing spaces. 
 159         re_ts
=' $';  # match trailing space 
 160         if [[ "$line" =~ 
$re_ts ]]; then 
 161             die 
"FATAL:Trailing spaces not allowed:$line"; 
 164         # Replace some chars with HTML-style codes 
 165         ## replace ampersand    & with &  # must be first 
 166         ## replace double quote " with " 
 167         ## replace single quote ' with ' 
 172                   <<< "$line" )" || { echo "FATAL
:Error running 
sed.
"; }; 
 173         printf "%s
\n" "$line"; 
 175         echo "FATAL
:Error reading stdin.
" 1>&2; return 1; }; 
 179     if [[ ! -f "$path_in" ]]; then die "FATAL
:Not a 
file path
:$1"; fi; 
 181 assemble_subpage_ftree() { 
 182     # Desc: Identify subpage markers in input wikicode file to create 
 183     #   subpage list and subpage content files 
 184     # Input: var  fp_in  path   input file 
 185     #        var  re_sp  regex  for identifying subpage markers 
 186     #        var  d_out  path   directory for output 
 187     #        var  p_spl  path   subpage list file 
 188     #declare -p re_sp d_out f_spl p_spl fp_in; # debug 
 190     yell "STATUS
:Running assemble_subpage_ftree
().
"; # debug 
 192     spc_path="${d_out}/subpages
/presubpage.content
"; # default destination for content before subpage detected 
 194     ## Process input line-by-line 
 195     while read -r line; do 
 196         # declare -p line re_sp; # debug 
 197         ### Check for subpage marker 
 198         if [[ "$line" =~ $re_sp ]]; then 
 199             #### Identify new subpage path 
 200             sp_path="$
(echo "$line" | 
sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )"; 
 201             # declare -p sp_path; # debug 
 202             #### Update subpage content file path 
 203             spc_path="${d_out}/subpages
/${sp_path}.content
"; 
 204             spc_dir="$
(dirname "$spc_path"; )"; 
 205             #declare -p spc_path spc_dir; # debug 
 206             #### Prepare file destination 
 207             if [[ ! -d "$spc_dir" ]]; then 
 208                 must mkdir -p "$spc_dir" && \ 
 209                     yell "STATUS
:Created dir
:${spc_dir}"; 
 211             if [[ -f "$spc_path" ]]; then 
 212                 die "FATAL
:File already exists
:${spc_path}"; 
 214                 must touch "$spc_path"; 
 216             #### Append subpage path to subpage list 
 217             printf "%s
\n" "$sp_path" >> "$p_spl"; 
 219         ### Write subpage content 
 220         must printf "%s
\n" "$line" >> "$spc_path"; 
 223     yell "STATUS
:Finished assemble_subpage_ftree
().
"; # debug 
 224 }; # process input wikicode into subpage content files and subpage list 
 226     # Input: var  p_spc  path  subpage content 
 228     printf -- "\n----<onlyinclude
>\n"; 
 230     printf -- "\n</onlyinclude
>----\n"; 
 231 }; # print wikicode content 
 234     printf -- "\n==References
==\n<references 
/>\n" 
 235     printf -- "\n==Footnotes
==\n<references group
=fn 
/>\n"; 
 236     printf -- "\n==Comments
==\n<references group
=cmt 
/>\n"; 
 238 }; # print wikicode footer 
 239 create_output_wikicode() { 
 240     # Desc: Use subpage list and subpage content files to create 
 241     #   output subpage wikicode. 
 242     # Input: var    p_spl   path   subpage list file 
 243     #        var    p_splv  path   subpage list file (validated) 
 244     #        file   ${p_spl}       subpage list file 
 245     #        file   ${p_splv}      subpage list file (validated) 
 246     #        var    d_out   path   directory for output 
 247     # Depends: get_path_fork_level() 
 248     #          prune_path_rootside() 
 249     #          get_path_hierarchy_level() 
 250     #          validate_subpage_list() 
 251     # Output: files  subpages in $d_out 
 253     yell "Running create_output_wikicode
().
"; # debug 
 255     # Read subpage list files into arrays. 
 256     local -a lines_spl lines_splv; 
 257     mapfile -t lines_spl < "$p_spl"; 
 258     mapfile -t lines_splv < "$p_splv"; 
 259     ## Add extra blank lines for couple line comparisons 
 262     declare -p lines_spl; # debug 
 264     # Check that subpage list files have same line counts 
 265     lc_spl="${#lines_spl[@]}"; 
 266     lc_splv="${#lines_splv[@]}"; 
 267     if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then 
 268         die "FATAL
:Different line counts 
for subpage lists
:$
(declare -p lc_spl lc_splv
;)"; 
 270     declare -p lc_spl lc_splv; # debug 
 272     # Initialize subpage list wikicode file 
 273     must touch "$p_splwc"; 
 274     printf "==Stats
==\n\n==Subpages
==\n" >> "$p_splwc"; 
 276     # Read content files according to subpage list file     
 277     #   Note: $i corresponds to “next” line ($lnext). Therefore, use 
 278     #   $((i-1)) to access the “current” ($lcurr) line. This offset is 
 279     #   because subpage list lines are compared using lagging line 
 281     for i in "${!lines_spl[@]}"; do 
 282         declare -p i; # debug; 
 284         # Check subpage content files 
 285         f_spc="${lines_spl[i-1]}.content
";  
 286         p_spc="${d_out}/subpages
/${f_spc}"; 
 287         declare -p f_spc p_spc; 
 288         ## Exit if subpage content file missing 
 289         if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then 
 290             die "FATAL
:Subpage content 
file missing
:$p_spc"; fi; 
 292         # Prepare output subpage wikicode files 
 293         f_spwc="${lines_splv[i-1]}.
wc"; 
 294         p_spwc="${d_out}/subpages
/${f_spwc}"; # use validated subpage name 
 295         declare -p f_spwc p_spwc; # debug 
 296         if [[ "$i" -gt 0 ]]; then 
 297             must touch "$p_spwc"; 
 298             ## Append subpage list wikicode file 
 299             printf "* [[/%s
]]\n" "${lines_splv[i-1]}" >> "$p_splwc"; 
 302         # Advance input lines 
 305         lnext="${lines_splv[i]}"; 
 306         declare -p lprev lcurr lnext;  # debug 
 308         # Update hierarchy tracker states 
 309         lprev_hier="$lcurr_hier"; 
 310         lcurr_hier="$lnext_hier"; 
 311         lnext_hier="$
(echo "$lnext" | get_path_hierarchy_level
)"; 
 313         # Skip first iteration 
 314         if [[ "$i" -eq 0 ]]; then 
 315             yell "$i:DEBUG
:Skipping first iteration.
";  # debug 
 316             printf -- "----\n" 1>&2;  # debug 
 319         # Get path fork levels 
 320         fork_level_next="$
(get_path_fork_level 
"$lcurr" "$lnext")"; 
 321         fork_level_prev="$
(get_path_fork_level 
"$lcurr" "$lprev")"; 
 323         # Count relative ups needed (`../`) 
 324         relups_next="$
((lcurr_hier 
- fork_level_next 
+ 1))"; 
 325         relups_prev="$
((lcurr_hier 
- fork_level_prev 
+ 1))"; 
 327         # Initialize Next and Prev links with relative ups to fork. 
 329         for (( j=0; j<relups_next; j++ )); do link_next+="..
/"; done; 
 330         if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive 
 332         for (( j=0; j<relups_prev; j++ )); do link_prev+="..
/"; done; 
 334         # Append branchs from fork to Next and Prev targets 
 335         link_next+="$
(prune_path_rootside 
"$lnext" "$fork_level_next")"; 
 336         link_prev+="$
(prune_path_rootside 
"$lprev" "$fork_level_prev")"; 
 338         # Print navigation link wikicode 
 339         if [[ -z "$lprev" ]]; then 
 340             printf "[[%s|Next
]], [[..
/|Up
]]\n" "$link_next" >> "$p_spwc"; 
 341         elif [[ -n "$lnext" ]]; then 
 342             printf "[[%s|Next
]], [[%s|Previous
]], [[..
/|Up
]]\n" "$link_next" "$link_prev" >> "$p_spwc"; 
 343         elif [[ -z "$lnext" ]]; then 
 344             printf "[[%s|Previous
]], [[..
/|Up
]]\n" "$link_prev" >> "$p_spwc"; 
 346             yell "FATAL
:Here be dragons.
"; 
 349         # Print subpage content 
 350         print_wc_content >> "$p_spwc"; 
 351         print_wc_footer >> "$p_spwc"; 
 353         declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug 
 354         declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug 
 355         declare -p link_next link_prev; # debug 
 356         printf "====================\n" # debug 
 359     # Add footer to subpage list wikicode file 
 360     print_wc_footer >> "$p_splwc"; 
 362     yell "STATUS
:Finished create_output_wikicode
().
"; # debug 
 363 }; # generate output subpage wikicode 
 366     declare -g fp_in="$1"; # input file path 
 367     assemble_subpage_ftree; 
 368     validate_subpage_list < "$p_spl" > "$p_splv"; 
 369     create_output_wikicode; 
 374 # Author: Steven Baltakatei Sandoval 
 380 # <!-- @subpage:Introduction --> 
 381 # This is an introducton. 
 382 # <!-- @subpage:Foreword --> 
 383 # This is a foreword. 
 385 # <!-- @subpage:Part 1/Chapter 1 --> 
 387 # <!-- @subpage:Part 1/Chapter 2 --> 
 389 # <!-- @subpage:Part 1/Chapter 2/Section A --> 
 391 # <!-- @subpage:Part 2/ --> 
 393 # <!-- @subpage:Part 2/Chapter 1 -->