From: Steven Baltakatei Sandoval Date: Sat, 3 May 2025 02:36:19 +0000 (+0000) Subject: feat(user/htmlz_to_cbz):Deduplicate consecutive repeat img urls X-Git-Url: https://zdv2.bktei.com/gitweb/BK-2020-03.git/commitdiff_plain/2d390fa04b24112a85c49d07025484dce82d8bd7 feat(user/htmlz_to_cbz):Deduplicate consecutive repeat img urls --- diff --git a/user/htmlz_to_cbz.sh b/user/htmlz_to_cbz.sh index bf535f0..8826e9e 100755 --- a/user/htmlz_to_cbz.sh +++ b/user/htmlz_to_cbz.sh @@ -1,13 +1,13 @@ #!/bin/bash # Desc: Collects .jpg/jpeg files from a Calibre .htmlz file into .cbz files -# Version: 0.0.1 +# Version: 0.0.2 for fin in ./*.htmlz; do ( dout="${fin%.*}"; unzip "$fin" -x / -d "$dout"; pushd "$dout"; - mapfile -t images < <(cat index.html | grep -E "(.jpg|.jpeg)" | sed -E -e 's#.+(images/[0-9]+.(jpeg|jpg)).+#\1#'); + mapfile -t images < <(cat index.html | grep -E "(.jpg|.jpeg)" | sed -E -e 's#.+(images/[0-9]+.(jpeg|jpg)).+#\1#' | uniq; ); dout="./output"; if [[ -d "$dout" ]]; then rm -r "$dout";