#!/usr/bin/env bash VERSION="1.0.0" CONFIG_DIR="$UORSS_DIR" set -u set +H # This will be removed, dont rely on it [ -z "$CONFIG_DIR" ] && echo "main: UORSS_DIR is not defined, assuming working directory. This is potentially unsafe" [ -z "$CONFIG_DIR" ] && CONFIG_DIR="$PWD" CONFIGD_DIR="$CONFIG_DIR/config.d" CACHE_DIR="$CONFIG_DIR/cache" echo "main: Running as $(whoami). Version $VERSION. Directory at $CONFIG_DIR" die () { echo "$@" 1>&2; exit 1; } stripCtrlChars () { sed 's/\x1b/\\x1b/g' /dev/stdin; return $?; } [ -d "$CONFIG_DIR" ] || die "main: No uorss directory defined! See readme." [ -d "$CONFIGD_DIR" ] || die "main: No config.d directory defined! See readme." str_startswith () { # str_startsswith: prefix string local alen local blen local end alen=${#1} blen=${#2} if [ "$alen" -gt "$blen" ]; then return 1 else if [ "$1" == "${2:0:$alen}" ]; then return 0 else return 1 fi fi } str_endswith () { # str_endswith: affix string local alen local blen local end alen=${#1} blen=${#2} if [ "$alen" -gt "$blen" ]; then return 1 else end=$((blen - alen)) if [ "$1" == "${2:$end:$alen}" ]; then return 0 else return 1 fi fi } # Merge all files in config.d/ createConfig () { # createConfig: local filepath local filebase local json local code local config config="{}" for filepath in "$CONFIGD_DIR/"*; do filebase="$(basename "$filepath")" if [ -d "$filepath" ]; then echo "createConfig: Ignoring $filebase, Is a directory." 1>&2 elif str_startswith "~" "$filebase" || str_endswith "~" "$filebase"; then echo "createConfig: Ignoring $filebase. Is a temporary file!" 1>&2 elif str_startswith "." "$filebase"; then echo "createConfig: Ignoring $filebase. Is a hidden file!" 1>&2 elif str_endswith ".yml" "$filebase" || str_endswith ".yaml" "$filebase" || str_endswith ".json" "$filebase"; then json="$(yq . "$filepath" -c)" code="$?" if [ $code -eq 0 ]; then config="$(jq --argjson a "$config" --argjson b "$json" \ '$a * $b * { artists: ($a.artists + $b.artists) }' -nc)" || die "createConfig: Failed to merge $filebase with in-memory config. Exited with code $?" else echo "createConfig: Failed to load $filebase. Exited with code $code" 1>&2 exit $code fi else echo "createConfig: Ignoring $(basename "$filebase")." 1>&2 fi done jq . -c <<< "$config" } CONFIG="$(createConfig)" || die "main: Failed to load config.d! Exited with $?" SLOW_MODE=""; jq -e .slow_mode 1>/dev/null <<< "$CONFIG" && SLOW_MODE="1" STALE_MODE=""; jq -e .stale_mode 1>/dev/null <<< "$CONFIG" && STALE_MODE="1" CACHE_ENTRY_FILES=""; jq -e .cache_entry_files 1>/dev/null <<< "$CONFIG" && CACHE_ENTRY_FILES="1" DEPTH="$(jq .depth -r <<< "$CONFIG")" BASE_URL="$(jq .base_url -r <<< "$CONFIG")" PIXIV_BASE_URL="$(jq .pixiv_base_url -r <<< "$CONFIG")" SESSION_ID="$(jq .session_id -r <<< "$CONFIG")" USER_AGENT="$(jq .user_agent -r <<< "$CONFIG")" COOKIES="Cookie: PHPSESSID=$SESSION_ID" [ "$BASE_URL" == "null" ] && die "main: base_url cannot be null!" [ "$PIXIV_BASE_URL" == "null" ] && die "main: pixiv_base_url cannot be null!" [ "$SESSION_ID" == "null" ] && die "main: session_id cannot be null!" [ -n "$STALE_MODE" ] && echo "main: Stale mode is on. New posts by artists will not be fetched!" 1>&2 arrayIndexList () { # arrayIndexList: array local length length="$(jq 'length' -r <<< "$1")" if [ -n "$length" ]; then seq 1 "$length" fi } getArtworkData () { # getArtworkData: artwork_id artist_id local json_file local json json_file="$CACHE_DIR/artwork/$1.pages.json" if [ -f "$json_file" ]; then echo "getArtworkData: Cache HIT for $2:$1" 1>&2 else echo "getArtworkData: Cache MISS for $2:$1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/illust/$1/pages" 2>/dev/null > "$json_file" fi cat "$json_file" } getArtworkInfo () { # getArtworkInfo: artwork_id artist_id local json_file local json json_file="$CACHE_DIR/artwork/$1.info.json" if [ -f "$json_file" ]; then echo "getArtworkInfo: Cache HIT for $2:$1" 1>&2 else echo "getArtworkInfo: Cache MISS for $2:$1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/illust/$1" 2>/dev/null > "$json_file" fi cat "$json_file" } getArtistInfo () { # getArtistInfo: artist_id local json_file local json json_file="$CACHE_DIR/users/$1.info.json" if [ -f "$json_file" ]; then echo "getArtworkInfo: Cache HIT for $1" 1>&2 else echo "getArtworkInfo: Cache MISS for $1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/user/$1" 2>/dev/null > "$json_file" fi cat "$json_file" } hoistArtworkPage () { # hoistArtworkPage: url artwork_id artist_id local artist local pages local page artist="$CACHE_DIR/pages/$3" pages="$artist/$2" page="$pages/$(basename "$1")" json_file="$CACHE_DIR/artwork/$1.info.json" [ -d "$artist" ] || mkdir "$artist" [ -d "$pages" ] || mkdir "$pages" if [ -f "$page" ]; then echo "hoistArtworkPage: Cache HIT for $3:$2 $1" 1>&2 else echo "hoistArtworkPage: Cache MISS for $3:$2 $1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "$1" 2>/dev/null > "$page~" mv "$page~" "$page" return $? fi } hoistArtworkPages () { # hoistArtworkPages: response artwork_id artist_id local page_index local page_json local page_url for page_index in $(arrayIndexList "$(jq .body -c <<< "$1")"); do page_json="$(jq ".body[$((page_index-1))]" -r <<< "$1")" || return 1 page_url="$(jq .urls.original -r <<< "$page_json")" || return 2 if [ "$page_url" == "null" ]; then echo "$page_json" return 3 fi if ! hoistArtworkPage "$page_url" "$2" "$3"; then echo "hoistArtworkPages: Failed for page $page_index." 1>&2 rm "$CACHE_DIR/pages/$3/$2/$(basename "$page_url")" fi if [ -n "$SLOW_MODE" ]; then sleep .3; fi #ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?" done return 0 } hoistArtworkEntry () { # hoistArtworkEntry: artwork_id artist_id artwork_info_json artwork_pages_json local file local id local href local pages local page_json local page_url local page_file local tags file="$CACHE_DIR/artwork/$1.entry.xml" [ -n "$CACHE_ENTRY_FILES" ] && [ -f "$file" ] && return 0 id="$PIXIV_BASE_URL/artworks/$(jq '.body.illustId | @uri' -r <<< "$3")" href="$(jq '.body.extraData.meta.canonical | @html' -r <<< "$3")" tags="$(jq '.body.tags.tags | map("" + (.tag | @html) + "") | join(", ")' -r <<< "$3")" pages="" for page_index in $(arrayIndexList "$(jq .body -c <<< "$4")"); do page_json="$(jq ".body[$((page_index-1))]" -r <<< "$4")" || return 2 page_url="$(jq .urls.original -r <<< "$page_json")" || return 2 page_file="$(jq @html -Rr <<< "$BASE_URL/pages/$2/$1/$(basename "$page_url")")" pages="$pages"'
' done echo ''"$tags"'
'"$pages"'