+#!/bin/bash
+# Desc: Convert wikicode to subpages
+# Usage: mw_wc2sp.sh [path file]
+# Input: arg1 path input wikicode file
+# Output: files wikicode file tree
+# Depends: Bash 5.1.16, GNU Coreutils 8.32
+# Version: 0.0.1
+
+re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern
+d_out=./wikicode/; # default output dir
+f_spl="subpage_list.txt";
+p_spl="${d_out}/${f_spl}";
+f_splv="subpage_list_validated.txt";
+p_splv="${d_out}/${f_splv}";
+
+yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
+die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
+must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
+get_path_fork_level() {
+ # Desc: Get fork level from two paths
+ # Input: arg1 str path
+ # arg2 str path
+ # Output: stdout int fork level
+ # Version: 0.0.1
+ local path1="$1";
+ local path2="$2";
+
+ # Squeeze multiple slashes and remove trailing slashes
+ path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )";
+ path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )";
+
+ # Check for mixed absolute/relative paths
+ if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then
+ flag_root=true;
+ # Remove initial /
+ path1="$(echo "$path1" | sed -e 's:^/::' )";
+ path2="$(echo "$path2" | sed -e 's:^/::' )";
+ elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then
+ flag_root=false;
+ else
+ declare -p path1 path2 flag_root;
+ echo "FATAL:Mixed relative and absolute paths not supported." 1>&2;
+ return 1;
+ fi;
+
+ # Save path as arrays with `/` as element delimiter
+ local IFS='/';
+ read -ra parts1 <<< "$path1";
+ read -ra parts2 <<< "$path2";
+
+ # Get fork level by counting identical path elements from rootside
+ local fork_level=0;
+ for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do
+ if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi;
+ ((fork_level++));
+ done;
+
+ echo "$fork_level";
+ #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug
+ return 0;
+}; # Get fork level int from two paths
+prune_path_rootside() {
+ # Desc: Prunes a path from the root-side to a specified prune level.
+ # Input: arg1 str path
+ # arg2 int prune level (0-indexed)
+ # Depends: GNU sed 4.8
+ # Version: 0.0.1
+ local path="$1";
+ local prune_level="$2";
+
+ # Check for absolute or relative path
+ if [[ "$path" =~ ^/ ]]; then
+ flag_root=true;
+ # Remove initial /
+ path="$(echo "$path" | sed -e 's:^/::' )";
+ else
+ flag_root=false;
+ fi;
+
+ # Save path as array with `/` as element delimiter
+ local IFS='/';
+ read -ra parts <<< "$path";
+
+ # Assemble pruned path from prune_level
+ local pruned_path="";
+ for (( i=prune_level; i<${#parts[@]}; i++ )); do
+ pruned_path+="${parts[i]}/";
+ done;
+
+ # Trim trailing `/` delimiter
+ pruned_path=$(echo "$pruned_path" | sed 's:/*$::');
+
+ # Restore initial / if appropriate
+ if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then
+ pruned_path=/"$pruned_path";
+ fi;
+
+ # Output pruned path
+ echo "$pruned_path";
+ #declare -p path prune_level parts pruned_path && printf "========\n"; # debug
+ return 0;
+}; # prune path rootside to int specified level
+get_path_hierarchy_level() {
+ # Desc: Outputs hierarchy level of input paths
+ # Example: $ cat lines.txt | get_path_hierarchy_level
+ # Input: stdin str lines with /-delimited paths
+ # Output: stdout int hierarchy level of each path
+ # Version: 0.0.1
+
+ local line level;
+ local flag_root;
+ local -a output;
+
+ n=0;
+ while read -r line; do
+ # Check for mixed absolute/relative paths.
+ if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then
+ flag_root=true;
+ else
+ flag_root=false;
+ fi;
+ if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \
+ { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then
+ echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1;
+ fi;
+
+ # Squeeze multiple slashes and remove trailing slashes
+ line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )";
+
+ # Count the number of slashes to determine hierarchy level
+ level="$(echo "$line" | awk -F'/' '{print NF-1}' )";
+ if [[ "$flag_root" == "true" ]]; then ((level--)); fi;
+
+ # Append to output
+ output+=("$level");
+ #declare -p flag_root level; # debug
+ ((n++));
+ done;
+ # Print output
+ printf "%s\n" "${output[@]}";
+}; # return hierarchy level of lines as integers
+validate_subpage_list() {
+ # Desc: Check for illegal characters in subpage titles
+ # Input: stdin unvalidated subpage list
+ # Output: stdout validated subpage list
+ # Depends: BK-2020-03 read_stdin(), yell(), die()
+ # GNU sed v4.8
+ while read -r line; do
+
+ # Reject chars illegal in Mediawiki page titles.
+ re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, }
+ if [[ "$line" =~ $re_illegal ]]; then
+ die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line";
+ fi;
+
+ # Reject trailing spaces.
+ re_ts=' $'; # match trailing space
+ if [[ "$line" =~ $re_ts ]]; then
+ die "FATAL:Trailing spaces not allowed:$line";
+ fi;
+
+ # Replace some chars with HTML-style codes
+ ## replace ampersand & with & # must be first
+ ## replace double quote " with "
+ ## replace single quote ' with '
+ line="$(sed \
+ -e 's/&/\&/g' \
+ -e 's/"/\"/g' \
+ -e "s/'/\'/g" \
+ <<< "$line" )" || { echo "FATAL:Error running sed."; };
+ printf "%s\n" "$line";
+ done || {
+ echo "FATAL:Error reading stdin." 1>&2; return 1; };
+};
+check_input() {
+ local path_in="$1";
+ if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi;
+}; # check input
+assemble_subpage_ftree() {
+ # Desc: Identify subpage markers in input wikicode file to create
+ # subpage list and subpage content files
+ # Input: var fp_in path input file
+ # var re_sp regex for identifying subpage markers
+ # var d_out path directory for output
+ # var p_spl path subpage list file
+ #declare -p re_sp d_out f_spl p_spl fp_in; # debug
+
+ yell "STATUS:Running assemble_subpage_ftree()."; # debug
+
+ spc_path="${d_out}/presubpage.content"; # default destination for content before subpage detected
+
+ ## Process input line-by-line
+ while read -r line; do
+ #declare -p line re_sp; # debug
+ ### Check for subpage marker
+ if [[ "$line" =~ $re_sp ]]; then
+ #### Identify new subpage path
+ sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )";
+ # declare -p sp_path; # debug
+ #### Update subpage content file path
+ spc_path="${d_out}/${sp_path}.content";
+ spc_dir="$(dirname "$spc_path"; )";
+ #declare -p spc_path spc_dir; # debug
+ #### Prepare file destination
+ if [[ ! -d "$spc_dir" ]]; then
+ must mkdir -p "$spc_dir" && \
+ yell "STATUS:Created dir:${spc_dir}";
+ fi;
+ if [[ -f "$spc_path" ]]; then
+ die "FATAL:File already exists:${spc_path}";
+ else
+ must touch "$spc_path";
+ fi;
+ #### Append subpage path to subpage list
+ printf "%s\n" "$sp_path" >> "$p_spl";
+ fi;
+ ### Write subpage content
+ must printf "%s\n" "$line" >> "$spc_path";
+ done < "${fp_in}";
+
+ yell "STATUS:Finished assemble_subpage_ftree()."; # debug
+}; # process input wikicode into subpage content files and subpage list
+create_output_wikicode() {
+ # Desc: Use subpage list and subpage content files to create
+ # output subpage wikicode.
+ # Input: var p_spl path subpage list file
+ # var p_splv path subpage list file (validated)
+ # file ${p_spl} subpage list file
+ # file ${p_splv} subpage list file (validated)
+ # var d_out path directory for output
+ # Depends: get_path_fork_level()
+ # prune_path_rootside()
+ # get_path_hierarchy_level()
+ # validate_subpage_list()
+ # Output: files subpages in $d_out
+
+ yell "Running create_output_wikicode()."; # debug
+
+ # Read subpage list files into arrays.
+ local -a lines_spl lines_splv;
+ mapfile -t lines_spl < "$p_spl";
+ mapfile -t lines_splv < "$p_splv";
+ ## Add extra blank lines for couple line comparisons
+ lines_spl+=('');
+ lines_splv+=('');
+ declare -p lines_spl; # debug
+
+ # Check that subpage list files have same line counts
+ lc_spl="${#lines_spl[@]}";
+ lc_splv="${#lines_splv[@]}";
+ if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then
+ die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)";
+ fi;
+ declare -p lc_spl lc_splv; # debug
+
+ # Read content files according to subpage list file
+ # Note: $i corresponds to “next” line ($lnext). Therefore, use
+ # $((i-1)) to access the “current” ($lcurr) line. This offset is
+ # because subpage list lines are compared using lagging line
+ # comparison.
+ for i in "${!lines_spl[@]}"; do
+ declare -p i; # debug;
+
+ # Check subpage content files
+ f_spc="${lines_spl[i-1]}.content";
+ p_spc="${d_out}/${f_spc}";
+ declare -p f_spc p_spc;
+ ## Exit if subpage content file missing
+ if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then
+ die "FATAL:Subpage content file missing:$p_spc"; fi;
+
+ # Prepare output subpage wikicode files
+ f_spwc="${lines_splv[i-1]}.wc";
+ p_spwc="${d_out}/${f_spwc}"; # use validated subpage name
+ declare -p f_spwc p_spwc; # debug
+ if [[ "$i" -gt 0 ]]; then must touch "$p_spwc"; fi;
+
+ # Advance input lines
+ lprev="$lcurr";
+ lcurr="$lnext";
+ lnext="${lines_splv[i]}";
+ declare -p lprev lcurr lnext; # debug
+
+ # Update hierarchy tracker states
+ lprev_hier="$lcurr_hier";
+ lcurr_hier="$lnext_hier";
+ lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)";
+
+ # Skip first iteration
+ if [[ "$i" -eq 0 ]]; then
+ yell "$i:DEBUG:Skipping first iteration."; # debug
+ printf -- "----\n" 1>&2; # debug
+ continue; fi;
+
+ # Get path fork levels
+ fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")";
+ fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")";
+
+ # Count relative ups needed (`../`)
+ relups_next="$((lcurr_hier - fork_level_next + 1))";
+ relups_prev="$((lcurr_hier - fork_level_prev + 1))";
+
+ # Initialize Next and Prev links with relative ups to fork.
+ link_next="";
+ for (( j=0; j<relups_next; j++ )); do link_next+="../"; done;
+ if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive
+ link_prev="";
+ for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done;
+
+ # Append branchs from fork to Next and Prev targets
+ link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")";
+ link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")";
+
+ # Print navigation link wikicode
+ if [[ -z "$lprev" ]]; then
+ printf "[[%s|Next]], [[../|Up]]\n" "$link_next" >> "$p_spwc";
+ elif [[ -n "$lnext" ]]; then
+ printf "[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev" >> "$p_spwc";
+ elif [[ -z "$lnext" ]]; then
+ printf "[[%s|Previous]], [[../|Up]]\n" "$link_prev" >> "$p_spwc";
+ else
+ yell "FATAL:Here be dragons.";
+ fi;
+
+ # Print subpage content
+ printf -- "\n----<onlyinclude>\n" >> "$p_spwc";
+ cat "$p_spc" >> "$p_spwc";
+ printf -- "\n</onlyinclude>----\n" >> "$p_spwc";
+ printf -- "\n==References==\n<references />\n" >> "$p_spwc";
+ printf -- "\n==Footnotes==\n<references group=fn />\n" >> "$p_spwc";
+ printf -- "\n==Comments==\n<references group=cmt />\n" >> "$p_spwc";
+ printf -- "\n"; >> "$p_spwc";
+
+ declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug
+ declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug
+ declare -p link_next link_prev; # debug
+ printf "====================\n" # debug
+ done;
+
+ yell "STATUS:Finished create_output_wikicode()."; # debug
+}; # generate output subpage wikicode
+main() {
+ check_input "$@";
+ declare -g fp_in="$1"; # input file path
+ assemble_subpage_ftree;
+ validate_subpage_list < "$p_spl" > "$p_splv";
+ create_output_wikicode;
+}; # main program
+
+main "$@";
+
+# Author: Steven Baltakatei Sandoval
+# License: GPLv3+
+
+
+# Example input:
+# ```
+# <!-- @subpage:Introduction -->
+# This is an introducton.
+# <!-- @subpage:Foreword -->
+# This is a foreword.
+
+# <!-- @subpage:Part 1/Chapter 1 -->
+# Blah.
+# <!-- @subpage:Part 1/Chapter 2 -->
+# Blah.
+# <!-- @subpage:Part 1/Chapter 2/Section A -->
+# Blabbity blah.
+# <!-- @subpage:Part 2/ -->
+# Blah.
+# <!-- @subpage:Part 2/Chapter 1 -->
+# More blah.
+# ```