#!/bin/bash
# Desc: Convert wikicode to subpages
# Usage: mw_wc2sp.sh [path file]
# Input:  arg1  path  input wikicode file
# Output: files       wikicode file tree
# Depends: Bash 5.1.16, GNU Coreutils 8.32
# Version: 0.4.0

re_sp='^(<!-- @subpage:)(.*)([ ]*-->)$'; # subpage marker pattern
d_out=./wikicode/; # default output dir
f_spl="subpage_list.txt"; # subpage title list
p_spl="${d_out}/subpages/${f_spl}";
f_splv="subpage_list_validated.txt"; # subpage title list (validated)
p_splv="${d_out}/subpages/${f_splv}";
f_splwc="subpages.wc"; # subpage list wikicode
p_splwc="${d_out}/${f_splwc}";

yell() { echo "$0: $*" >&2; } # print script path and all args to stderr
die() { yell "$*"; exit 111; } # same as yell() but non-zero exit status
must() { "$@" || die "cannot $*"; } # runs args as command, reports args if command fails
get_path_fork_level() {
    # Desc: Get fork level from two paths
    # Input:  arg1    str  path
    #         arg2    str  path
    # Output: stdout  int  fork level
    # Version: 0.0.1
    local path1="$1";
    local path2="$2";

    # Squeeze multiple slashes and remove trailing slashes
    path1="$(echo "$path1" | tr -s '/' | sed 's:/*$::' )";
    path2="$(echo "$path2" | tr -s '/' | sed 's:/*$::' )";
    
    # Check for mixed absolute/relative paths
    if [[ "$path1" =~ ^/ ]] && [[ "$path2" =~ ^/ ]]; then
        flag_root=true;
        # Remove initial /
        path1="$(echo "$path1" | sed -e 's:^/::' )";
        path2="$(echo "$path2" | sed -e 's:^/::' )";
    elif [[ ! "$path1" =~ ^/ ]] && [[ ! "$path2" =~ ^/ ]]; then
        flag_root=false;
    else
        declare -p path1 path2 flag_root;
        echo "FATAL:Mixed relative and absolute paths not supported." 1>&2;
        return 1;
    fi;

    # Save path as arrays with `/` as element delimiter
    local IFS='/';
    read -ra parts1 <<< "$path1";
    read -ra parts2 <<< "$path2";

    # Get fork level by counting identical path elements from rootside
    local fork_level=0;
    for (( i=0; i<${#parts1[@]} && i<${#parts2[@]}; i++ )); do
        if [[ "${parts1[i]}" != "${parts2[i]}" ]]; then break; fi;
        ((fork_level++));
    done;
    
    echo "$fork_level";
    #declare -p path1 path2 flag_root parts1 parts2 fork_level; # debug
    return 0;
}; # Get fork level int from two paths
prune_path_rootside() {
    # Desc: Prunes a path from the root-side to a specified prune level.
    # Input: arg1  str  path
    #        arg2  int  prune level (0-indexed)
    # Depends: GNU sed 4.8
    # Version: 0.0.1
    local path="$1";
    local prune_level="$2";

    # Check for absolute or relative path
    if [[ "$path" =~ ^/ ]]; then
        flag_root=true;
        # Remove initial /
        path="$(echo "$path" | sed -e 's:^/::' )";
    else
        flag_root=false;
    fi;
    
    # Save path as array with `/` as element delimiter
    local IFS='/';
    read -ra parts <<< "$path";

    # Assemble pruned path from prune_level
    local pruned_path="";
    for (( i=prune_level; i<${#parts[@]}; i++ )); do
        pruned_path+="${parts[i]}/";
    done;

    # Trim trailing `/` delimiter
    pruned_path=$(echo "$pruned_path" | sed 's:/*$::');

    # Restore initial / if appropriate
    if [[ "$flag_root" == "true" ]] && [[ "$prune_level" -eq 0 ]]; then
        pruned_path=/"$pruned_path";
    fi;

    # Output pruned path
    echo "$pruned_path";
    #declare -p path prune_level parts pruned_path && printf "========\n"; # debug
    return 0;
}; # prune path rootside to int specified level
get_path_hierarchy_level() {
    # Desc: Outputs hierarchy level of input paths
    # Example: $ cat lines.txt | get_path_hierarchy_level
    # Input: stdin    str  lines with /-delimited paths
    # Output: stdout  int  hierarchy level of each path
    # Version: 0.0.1

    local line level;
    local flag_root;
    local -a output;

    n=0;
    while read -r line; do
        # Check for mixed absolute/relative paths.
        if [[ $n -le 0 ]] && [[ "$line" =~ ^/ ]]; then
            flag_root=true;
        else
            flag_root=false;
        fi;
        if { [[ "$flag_root" == "true" ]] && [[ ! "$line" =~ ^/ ]]; } || \
           { [[ "$flag_root" == "false" ]] && [[ "$line" =~ ^/ ]]; } then
            echo "FATAL:Mixed relative and absolute paths not supported." 1>&2; return 1;
        fi;
        
        # Squeeze multiple slashes and remove trailing slashes
        line="$(echo "$line" | tr -s '/' | sed 's:/*$::' )";

        # Count the number of slashes to determine hierarchy level
        level="$(echo "$line" | awk -F'/' '{print NF-1}' )";
        if [[ "$flag_root" == "true" ]]; then ((level--)); fi;

        # Append to output
        output+=("$level");
        #declare -p flag_root level; # debug
        ((n++));
    done;
    # Print output
    printf "%s\n" "${output[@]}";
}; # return hierarchy level of lines as integers
validate_subpage_list() {
    # Desc: Check for illegal characters in subpage titles
    # Input:  stdin   unvalidated subpage list
    # Output: stdout  validated subpage list
    # Depends: BK-2020-03 read_stdin(), yell(), die()
    #          GNU sed v4.8
    while read -r line; do

        # Reject chars illegal in Mediawiki page titles.
        re_illegal='[][><|}{#_]'; #  match illegal page names chars #, <, >, [, ], _, {, |, }
        if [[ "$line" =~ $re_illegal ]]; then
            die "FATAL:Illegal char. Not allowed: #, <, >, [, ], _, {, |, }:$line";
        fi;

        # Reject trailing spaces.
        re_ts=' $';  # match trailing space
        if [[ "$line" =~ $re_ts ]]; then
            die "FATAL:Trailing spaces not allowed:$line";
        fi;

        # Replace some chars with HTML-style codes
        ## replace ampersand    & with &#38  # must be first
        ## replace double quote " with &#34
        ## replace single quote ' with &#39
        line="$(sed \
                  -e 's/&/\&#38;/g' \
                  -e 's/"/\&#34;/g' \
                  -e "s/'/\&#39;/g" \
                  <<< "$line" )" || { echo "FATAL:Error running sed."; };
        printf "%s\n" "$line";
    done || {
        echo "FATAL:Error reading stdin." 1>&2; return 1; };
};
check_input() {
    local path_in="$1";
    if [[ ! -f "$path_in" ]]; then die "FATAL:Not a file path:$1"; fi;
}; # check input
assemble_subpage_ftree() {
    # Desc: Identify subpage markers in input wikicode file to create
    #   subpage list and subpage content files
    # Input: var  fp_in  path   input file
    #        var  re_sp  regex  for identifying subpage markers
    #        var  d_out  path   directory for output
    #        var  p_spl  path   subpage list file
    #declare -p re_sp d_out f_spl p_spl fp_in; # debug

    yell "STATUS:Running assemble_subpage_ftree()."; # debug

    spc_path="${d_out}/subpages/presubpage.content"; # default destination for content before subpage detected
    
    ## Process input line-by-line
    while read -r line; do
        # declare -p line re_sp; # debug
        ### Check for subpage marker
        if [[ "$line" =~ $re_sp ]]; then
            #### Identify new subpage path
            sp_path="$(echo "$line" | sed -E -e "s/${re_sp}/\2/" -e 's/[ ]*$//'; )";
            # declare -p sp_path; # debug
            #### Update subpage content file path
            spc_path="${d_out}/subpages/${sp_path}.content";
            spc_dir="$(dirname "$spc_path"; )";
            #declare -p spc_path spc_dir; # debug
            #### Prepare file destination
            if [[ ! -d "$spc_dir" ]]; then
                must mkdir -p "$spc_dir" && \
                    yell "STATUS:Created dir:${spc_dir}";
            fi;
            if [[ -f "$spc_path" ]]; then
                die "FATAL:File already exists:${spc_path}";
            else
                must touch "$spc_path";
            fi;
            #### Append subpage path to subpage list
            printf "%s\n" "$sp_path" >> "$p_spl";
        fi;
        ### Write subpage content
        must printf "%s\n" "$line" >> "$spc_path";
    done < "${fp_in}";

    yell "STATUS:Finished assemble_subpage_ftree()."; # debug
}; # process input wikicode into subpage content files and subpage list
print_wc_content() {
    # Input: var  p_spc  path  subpage content
    # Output: stdout
    printf -- "\n----<onlyinclude>\n";
    cat "$p_spc";
    printf -- "\n</onlyinclude>----\n";
}; # print wikicode content
print_wc_footer() {
    # Output: stdout
    printf -- "\n==References==\n<references />\n"
    printf -- "\n==Footnotes==\n<references group=fn />\n";
    printf -- "\n==Comments==\n<references group=cmt />\n";
    printf -- "\n<!-- End of Page -->\n";
    printf -- "\n";
}; # print wikicode footer
print_wc_nav() {
    # Desc: Print navigation wikilinks
    # Input:  var  lprev
    #         var  lnext
    #         var  link_prev
    #         var  link_next
    # Output  stdout

    # Print navigation link wikicode
    if [[ -z "$lprev" ]]; then
        printf "\n[[%s|Next]], [[../|Up]]\n" "$link_next";
    elif [[ -n "$lnext" ]]; then
        printf "\n[[%s|Next]], [[%s|Previous]], [[../|Up]]\n" "$link_next" "$link_prev";
    elif [[ -z "$lnext" ]]; then
        printf "\n[[%s|Previous]], [[../|Up]]\n" "$link_prev";
    else
        yell "FATAL:Here be dragons.";
    fi;
}; # print wikicode navigation links
create_output_wikicode() {
    # Desc: Use subpage list and subpage content files to create
    #   output subpage wikicode.
    # Input: var    p_spl   path   subpage list file
    #        var    p_splv  path   subpage list file (validated)
    #        file   ${p_spl}       subpage list file
    #        file   ${p_splv}      subpage list file (validated)
    #        var    d_out   path   directory for output
    # Depends: get_path_fork_level()
    #          prune_path_rootside()
    #          get_path_hierarchy_level()
    #          validate_subpage_list()
    # Output: files  subpages in $d_out

    yell "Running create_output_wikicode()."; # debug

    # Read subpage list files into arrays.
    local -a lines_spl lines_splv;
    mapfile -t lines_spl < "$p_spl";
    mapfile -t lines_splv < "$p_splv";
    ## Add extra blank lines for couple line comparisons
    lines_spl+=(''); 
    lines_splv+=('');
    declare -p lines_spl; # debug
    
    # Check that subpage list files have same line counts
    lc_spl="${#lines_spl[@]}";
    lc_splv="${#lines_splv[@]}";
    if [[ ! "$lc_spl" -eq "$lc_splv" ]]; then
        die "FATAL:Different line counts for subpage lists:$(declare -p lc_spl lc_splv;)";
    fi;
    declare -p lc_spl lc_splv; # debug

    # Initialize subpage list wikicode file
    must touch "$p_splwc";
    printf "==Stats==\n\n==Subpages==\n" >> "$p_splwc";
    
    # Read content files according to subpage list file    
    #   Note: $i corresponds to “next” line ($lnext). Therefore, use
    #   $((i-1)) to access the “current” ($lcurr) line. This offset is
    #   because subpage list lines are compared using lagging line
    #   comparison.    
    for i in "${!lines_spl[@]}"; do
        declare -p i; # debug;
        
        # Check subpage content files
        f_spc="${lines_spl[i-1]}.content"; 
        p_spc="${d_out}/subpages/${f_spc}";
        declare -p f_spc p_spc;
        ## Exit if subpage content file missing
        if [[ "$i" -gt 0 ]] && [[ ! -f "$p_spc" ]]; then
            die "FATAL:Subpage content file missing:$p_spc"; fi;

        # Prepare output subpage wikicode files
        f_spwc="${lines_splv[i-1]}.wc";
        p_spwc="${d_out}/subpages/${f_spwc}"; # use validated subpage name
        declare -p f_spwc p_spwc; # debug
        if [[ "$i" -gt 0 ]]; then
            must touch "$p_spwc";
            ## Append subpage list wikicode file
            printf "* [[/%s]]\n" "${lines_splv[i-1]}" >> "$p_splwc";
        fi;

        # Advance input lines
        lprev="$lcurr";
        lcurr="$lnext";
        lnext="${lines_splv[i]}";
        declare -p lprev lcurr lnext;  # debug

        # Update hierarchy tracker states
        lprev_hier="$lcurr_hier";
        lcurr_hier="$lnext_hier";
        lnext_hier="$(echo "$lnext" | get_path_hierarchy_level)";

        # Skip first iteration
        if [[ "$i" -eq 0 ]]; then
            yell "$i:DEBUG:Skipping first iteration.";  # debug
            printf -- "----\n" 1>&2;  # debug
            continue; fi;

        # Get path fork levels
        fork_level_next="$(get_path_fork_level "$lcurr" "$lnext")";
        fork_level_prev="$(get_path_fork_level "$lcurr" "$lprev")";

        # Count relative ups needed (`../`)
        relups_next="$((lcurr_hier - fork_level_next + 1))";
        relups_prev="$((lcurr_hier - fork_level_prev + 1))";

        # Initialize Next and Prev links with relative ups to fork.
        link_next="";
        for (( j=0; j<relups_next; j++ )); do link_next+="../"; done;
        if [[ "$relups_next" -eq 0 ]]; then link_next+="/"; fi; # handle new subpage path dive
        link_prev="";
        for (( j=0; j<relups_prev; j++ )); do link_prev+="../"; done;

        # Append branchs from fork to Next and Prev targets
        link_next+="$(prune_path_rootside "$lnext" "$fork_level_next")";
        link_prev+="$(prune_path_rootside "$lprev" "$fork_level_prev")";

        # Print subpage content
        print_wc_nav >> "$p_spwc";
        print_wc_content >> "$p_spwc";
        print_wc_nav >> "$p_spwc";
        print_wc_footer >> "$p_spwc";

        declare -p i lprev lcurr lnext lprev_hier lcurr_hier lnext_hier; # debug
        declare -p fork_level_next fork_level_prev relups_next relups_prev; # debug
        declare -p link_next link_prev; # debug
        printf "====================\n" # debug
    done;

    # Add footer to subpage list wikicode file
    print_wc_footer >> "$p_splwc";
    
    yell "STATUS:Finished create_output_wikicode()."; # debug
}; # generate output subpage wikicode
main() {
    check_input "$@";
    declare -g fp_in="$1"; # input file path
    assemble_subpage_ftree;
    validate_subpage_list < "$p_spl" > "$p_splv";
    create_output_wikicode;
}; # main program

main "$@";

# Author: Steven Baltakatei Sandoval
# License: GPLv3+


# Example input:
# ```
# <!-- @subpage:Introduction -->
# This is an introducton.
# <!-- @subpage:Foreword -->
# This is a foreword.

# <!-- @subpage:Part 1/Chapter 1 -->
# Blah.
# <!-- @subpage:Part 1/Chapter 2 -->
# Blah.
# <!-- @subpage:Part 1/Chapter 2/Section A -->
# Blabbity blah.
# <!-- @subpage:Part 2/ -->
# Blah.
# <!-- @subpage:Part 2/Chapter 1 -->
# More blah.
# ```
