From: Steven Baltakatei Sandoval Date: Tue, 6 Aug 2024 14:24:20 +0000 (+0000) Subject: feat(user/mw_wc2sp.sh):Add script to convert wikicode to mw subpages X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/commitdiff_plain/fea1bafcec18f9858dccbca7326fe60ca5f88816?hp=bad61b0dc42d1bfb92892abcff241e7f51b3021d feat(user/mw_wc2sp.sh):Add script to convert wikicode to mw subpages --- diff --git a/user/mw_wc2sp.sh b/user/mw_wc2sp.sh new file mode 100755 index 0000000..421adb7 --- /dev/null +++ b/user/mw_wc2sp.sh @@ -0,0 +1,373 @@ +#!/bin/bash +# Desc: Convert wikicode to subpages +# Usage: mw_wc2sp.sh [path file] +# Input: arg1 path input wikicode file +# Output: files wikicode file tree +# Depends: Bash 5.1.16, GNU Coreutils 8.32 +# Version: 0.0.1 + +re_sp='^()$'; # subpage marker pattern +d_out=./wikicode/; # default output dir +f_spl="subpage_list.txt"; +p_spl="${d_out}/${f_spl}"; +f_splv="subpage_list_validated.txt"; +p_splv="${d_out}/${f_splv}"; + +yell() { echo "$0: $*" >&2; } # print script path and all args to stderr +die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status +must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails +get_path_fork_level() { + # Desc: Get fork level from two paths + # Input: arg1 str path + # arg2 str path + # Output: stdout int fork level + # Version: 0.0.1 + local path1="$1"; + local path2="$2"; + + # Squeeze multiple slashes and remove trailing slashes + path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )"; + path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )"; + + # Check for mixed absolute/relative paths + if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then + flag_root=true; + # Remove initial / + path1="$(echo "$path1" | sed -e 's:^/::' )"; + path2="$(echo "$path2" | sed -e 's:^/::' )"; + elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then + flag_root=false; + else + declare -p path1 path2 flag_root; + echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; + return 1; + fi; + + # Save path as arrays with `/` as element delimiter + local IFS='/'; + read -ra parts1 <<< "$path1"; + read -ra parts2 <<< "$path2"; + + # Get fork level by counting identical path elements from rootside + local fork_level=0; + for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do + if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi; + ((fork_level++)); + done; + + echo "$fork_level"; + #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug + return 0; +}; # Get fork level int from two paths +prune_path_rootside() { + # Desc: Prunes a path from the root-side to a specified prune level. + # Input: arg1 str path + # arg2 int prune level (0-indexed) + # Depends: GNU sed 4.8 + # Version: 0.0.1 + local path="$1"; + local prune_level="$2"; + + # Check for absolute or relative path + if [[ "$path" =~ ^/ ]]; then + flag_root=true; + # Remove initial / + path="$(echo "$path" | sed -e 's:^/::' )"; + else + flag_root=false; + fi; + + # Save path as array with `/` as element delimiter + local IFS='/'; + read -ra parts <<< "$path"; + + # Assemble pruned path from prune_level + local pruned_path=""; + for (( i=prune_level; i<${#parts[@]}; i++ )); do + pruned_path+="${parts[i]}/"; + done; + + # Trim trailing `/` delimiter + pruned_path=$(echo "$pruned_path" | sed 's:/*$::'); + + # Restore initial / if appropriate + if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then + pruned_path=/"$pruned_path"; + fi; + + # Output pruned path + echo "$pruned_path"; + #declare -p path prune_level parts pruned_path && printf "========\n"; # debug + return 0; +}; # prune path rootside to int specified level +get_path_hierarchy_level() { + # Desc: Outputs hierarchy level of input paths + # Example: $ cat lines.txt | get_path_hierarchy_level + # Input: stdin str lines with /-delimited paths + # Output: stdout int hierarchy level of each path + # Version: 0.0.1 + + local line level; + local flag_root; + local -a output; + + n=0; + while read -r line; do + # Check for mixed absolute/relative paths. + if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then + flag_root=true; + else + flag_root=false; + fi; + if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \ + { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then + echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1; + fi; + + # Squeeze multiple slashes and remove trailing slashes + line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )"; + + # Count the number of slashes to determine hierarchy level + level="$(echo "$line" | awk -F'/' '{print NF-1}' )"; + if [[ "$flag_root" == "true" ]]; then ((level--)); fi; + + # Append to output + output+=("$level"); + #declare -p flag_root level; # debug + ((n++)); + done; + # Print output + printf "%s\n" "${output[@]}"; +}; # return hierarchy level of lines as integers +validate_subpage_list() { + # Desc: Check for illegal characters in subpage titles + # Input: stdin unvalidated subpage list + # Output: stdout validated subpage list + # Depends: BK-2020-03 read_stdin(), yell(), die() + # GNU sed v4.8 + while read -r line; do + + # Reject chars illegal in Mediawiki page titles. + re_illegal='[][><|}{#_]'; # match illegal page names chars #, <, >, [, ], _, {, |, } + if [[ "$line" =~ $re_illegal ]]; then + die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line"; + fi; + + # Reject trailing spaces. + re_ts=' $'; # match trailing space + if [[ "$line" =~ $re_ts ]]; then + die "FATAL:Trailing spaces not allowed:$line"; + fi; + + # Replace some chars with HTML-style codes + ## replace ampersand & with & # must be first + ## replace double quote " with " + ## replace single quote ' with ' + line="$(sed \ + -e 's/&/\&/g' \ + -e 's/"/\"/g' \ + -e "s/'/\'/g" \ + <<< "$line" )" || { echo "FATAL:Error running sed."; }; + printf "%s\n" "$line"; + done || { + echo "FATAL:Error reading stdin." 1>&2; return 1; }; +}; +check_input() { + local path_in="$1"; + if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi; +}; # check input +assemble_subpage_ftree() { + # Desc: Identify subpage markers in input wikicode file to create + # subpage list and subpage content files + # Input: var fp_in path input file + # var re_sp regex for identifying subpage markers + # var d_out path directory for output + # var p_spl path subpage list file + #declare -p re_sp d_out f_spl p_spl fp_in; # debug + + yell "STATUS:Running assemble_subpage_ftree()."; # debug + + spc_path="${d_out}/presubpage.content"; # default destination for content before subpage detected + + ## Process input line-by-line + while read -r line; do + #declare -p line re_sp; # debug + ### Check for subpage marker + if [[ "$line" =~ $re_sp ]]; then + #### Identify new subpage path + sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )"; + # declare -p sp_path; # debug + #### Update subpage content file path + spc_path="${d_out}/${sp_path}.content"; + spc_dir="$(dirname "$spc_path"; )"; + #declare -p spc_path spc_dir; # debug + #### Prepare file destination + if [[ ! -d "$spc_dir" ]]; then + must mkdir -p "$spc_dir" && \ + yell "STATUS:Created dir:${spc_dir}"; + fi; + if [[ -f "$spc_path" ]]; then + die "FATAL:File already exists:${spc_path}"; + else + must touch "$spc_path"; + fi; + #### Append subpage path to subpage list + printf "%s\n" "$sp_path" >> "$p_spl"; + fi; + ### Write subpage content + must printf "%s\n" "$line" >> "$spc_path"; + done < "${fp_in}"; + + yell "STATUS:Finished assemble_subpage_ftree()."; # debug +}; # process input wikicode into subpage content files and subpage list +create_output_wikicode() { + # Desc: Use subpage list and subpage content files to create + # output subpage wikicode. + # Input: var p_spl path subpage list file + # var p_splv path subpage list file (validated) + # file ${p_spl} subpage list file + # file ${p_splv} subpage list file (validated) + # var d_out path directory for output + # Depends: get_path_fork_level() + # prune_path_rootside() + # get_path_hierarchy_level() + # validate_subpage_list() + # Output: files subpages in $d_out + + yell "Running create_output_wikicode()."; # debug + + # Read subpage list files into arrays. + local -a lines_spl lines_splv; + mapfile -t lines_spl < "$p_spl"; + mapfile -t lines_splv < "$p_splv"; + ## Add extra blank lines for couple line comparisons + lines_spl+=(''); + lines_splv+=(''); + declare -p lines_spl; # debug + + # Check that subpage list files have same line counts + lc_spl="${#lines_spl[@]}"; + lc_splv="${#lines_splv[@]}"; + if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then + die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)"; + fi; + declare -p lc_spl lc_splv; # debug + + # Read content files according to subpage list file + # Note: $i corresponds to “next” line ($lnext). Therefore, use + # $((i-1)) to access the “current” ($lcurr) line. This offset is + # because subpage list lines are compared using lagging line + # comparison. + for i in "${!lines_spl[@]}"; do + declare -p i; # debug; + + # Check subpage content files + f_spc="${lines_spl[i-1]}.content"; + p_spc="${d_out}/${f_spc}"; + declare -p f_spc p_spc; + ## Exit if subpage content file missing + if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then + die "FATAL:Subpage content file missing:$p_spc"; fi; + + # Prepare output subpage wikicode files + f_spwc="${lines_splv[i-1]}.wc"; + p_spwc="${d_out}/${f_spwc}"; # use validated subpage name + declare -p f_spwc p_spwc; # debug + if [[ "$i" -gt 0 ]]; then must touch "$p_spwc"; fi; + + # Advance input lines + lprev="$lcurr"; + lcurr="$lnext"; + lnext="${lines_splv[i]}"; + declare -p lprev lcurr lnext; # debug + + # Update hierarchy tracker states + lprev_hier="$lcurr_hier"; + lcurr_hier="$lnext_hier"; + lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)"; + + # Skip first iteration + if [[ "$i" -eq 0 ]]; then + yell "$i:DEBUG:Skipping first iteration."; # debug + printf -- "----\n" 1>&2; # debug + continue; fi; + + # Get path fork levels + fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")"; + fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")"; + + # Count relative ups needed (`../`) + relups_next="$((lcurr_hier - fork_level_next + 1))"; + relups_prev="$((lcurr_hier - fork_level_prev + 1))"; + + # Initialize Next and Prev links with relative ups to fork. + link_next=""; + for (( j=0; j> "$p_spwc"; + elif [[ -n "$lnext" ]]; then + printf "[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev" >> "$p_spwc"; + elif [[ -z "$lnext" ]]; then + printf "[[%s|Previous]], [[../|Up]]\n" "$link_prev" >> "$p_spwc"; + else + yell "FATAL:Here be dragons."; + fi; + + # Print subpage content + printf -- "\n----\n" >> "$p_spwc"; + cat "$p_spc" >> "$p_spwc"; + printf -- "\n----\n" >> "$p_spwc"; + printf -- "\n==References==\n\n" >> "$p_spwc"; + printf -- "\n==Footnotes==\n\n" >> "$p_spwc"; + printf -- "\n==Comments==\n\n" >> "$p_spwc"; + printf -- "\n"; >> "$p_spwc"; + + declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug + declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug + declare -p link_next link_prev; # debug + printf "====================\n" # debug + done; + + yell "STATUS:Finished create_output_wikicode()."; # debug +}; # generate output subpage wikicode +main() { + check_input "$@"; + declare -g fp_in="$1"; # input file path + assemble_subpage_ftree; + validate_subpage_list < "$p_spl" > "$p_splv"; + create_output_wikicode; +}; # main program + +main "$@"; + +# Author: Steven Baltakatei Sandoval +# License: GPLv3+ + + +# Example input: +# ``` +# +# This is an introducton. +# +# This is a foreword. + +# +# Blah. +# +# Blah. +# +# Blabbity blah. +# +# Blah. +# +# More blah. +# ```