#!/usr/bin/env bash VERSION="1.0.0" CONFIG_DIR="$UORSS_DIR" set -u set +H # This will be removed, dont rely on it [ -z "$CONFIG_DIR" ] && echo "main: UORSS_DIR is not defined, assuming working directory. This is potentially unsafe" [ -z "$CONFIG_DIR" ] && CONFIG_DIR="$PWD" CONFIGD_DIR="$CONFIG_DIR/config.d" CACHE_DIR="$CONFIG_DIR/cache" echo "main: Running as $(whoami). Version $VERSION. Directory at $CONFIG_DIR" die () { echo "$@" 1>&2; exit 1; } stripCtrlChars () { sed 's/\x1b/\\x1b/g' /dev/stdin; return $?; } [ -d "$CONFIG_DIR" ] || die "main: No uorss directory defined! See readme." [ -d "$CONFIGD_DIR" ] || die "main: No config.d directory defined! See readme." str_startswith () { # str_startsswith: prefix string local alen local blen local end alen=${#1} blen=${#2} if [ "$alen" -gt "$blen" ]; then return 1 else if [ "$1" == "${2:0:$alen}" ]; then return 0 else return 1 fi fi } str_endswith () { # str_endswith: affix string local alen local blen local end alen=${#1} blen=${#2} if [ "$alen" -gt "$blen" ]; then return 1 else end=$((blen - alen)) if [ "$1" == "${2:$end:$alen}" ]; then return 0 else return 1 fi fi } # Merge all files in config.d/ createConfig () { # createConfig: local filepath local filebase local json local code local config config="{}" for filepath in "$CONFIGD_DIR/"*; do filebase="$(basename "$filepath")" if [ -d "$filepath" ]; then echo "createConfig: Ignoring $filebase, Is a directory." 1>&2 elif str_startswith "~" "$filebase" || str_endswith "~" "$filebase"; then echo "createConfig: Ignoring $filebase. Is a temporary file!" 1>&2 elif str_startswith "." "$filebase"; then echo "createConfig: Ignoring $filebase. Is a hidden file!" 1>&2 elif str_endswith ".yml" "$filebase" || str_endswith ".yaml" "$filebase" || str_endswith ".json" "$filebase"; then json="$(yq . "$filepath" -c)" code="$?" if [ $code -eq 0 ]; then config="$(jq --argjson a "$config" --argjson b "$json" \ '$a * $b * { artists: ($a.artists + $b.artists) }' -nc)" || die "createConfig: Failed to merge $filebase with in-memory config. Exited with code $?" else echo "createConfig: Failed to load $filebase. Exited with code $code" 1>&2 exit $code fi else echo "createConfig: Ignoring $(basename "$filebase")." 1>&2 fi done jq . -c <<< "$config" } CONFIG="$(createConfig)" || die "main: Failed to load config.d! Exited with $?" SLOW_MODE=""; jq -e .slow_mode 1>/dev/null <<< "$CONFIG" && SLOW_MODE="1" STALE_MODE=""; jq -e .stale_mode 1>/dev/null <<< "$CONFIG" && STALE_MODE="1" CACHE_ENTRY_FILES=""; jq -e .cache_entry_files 1>/dev/null <<< "$CONFIG" && CACHE_ENTRY_FILES="1" DEPTH="$(jq .depth -r <<< "$CONFIG")" BASE_URL="$(jq .base_url -r <<< "$CONFIG")" PIXIV_BASE_URL="$(jq .pixiv_base_url -r <<< "$CONFIG")" SESSION_ID="$(jq .session_id -r <<< "$CONFIG")" USER_AGENT="$(jq .user_agent -r <<< "$CONFIG")" COOKIES="Cookie: PHPSESSID=$SESSION_ID" [ "$BASE_URL" == "null" ] && die "main: base_url cannot be null!" [ "$PIXIV_BASE_URL" == "null" ] && die "main: pixiv_base_url cannot be null!" [ "$SESSION_ID" == "null" ] && die "main: session_id cannot be null!" [ -n "$STALE_MODE" ] && echo "main: Stale mode is on. New posts by artists will not be fetched!" 1>&2 arrayIndexList () { # arrayIndexList: array local length length="$(jq 'length' -r <<< "$1")" if [ -n "$length" ]; then seq 1 "$length" fi } getArtworkData () { # getArtworkData: artwork_id artist_id local json_file local json json_file="$CACHE_DIR/artwork/$1.pages.json" if [ -f "$json_file" ]; then echo "getArtworkData: Cache HIT for $2:$1" 1>&2 else echo "getArtworkData: Cache MISS for $2:$1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/illust/$1/pages" 2>/dev/null > "$json_file" fi cat "$json_file" } getArtworkInfo () { # getArtworkInfo: artwork_id artist_id local json_file local json json_file="$CACHE_DIR/artwork/$1.info.json" if [ -f "$json_file" ]; then echo "getArtworkInfo: Cache HIT for $2:$1" 1>&2 else echo "getArtworkInfo: Cache MISS for $2:$1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/illust/$1" 2>/dev/null > "$json_file" fi cat "$json_file" } getArtistInfo () { # getArtistInfo: artist_id local json_file local json json_file="$CACHE_DIR/users/$1.info.json" if [ -f "$json_file" ]; then echo "getArtworkInfo: Cache HIT for $1" 1>&2 else echo "getArtworkInfo: Cache MISS for $1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/user/$1" 2>/dev/null > "$json_file" fi cat "$json_file" } hoistArtworkPage () { # hoistArtworkPage: url artwork_id artist_id local artist local pages local page artist="$CACHE_DIR/pages/$3" pages="$artist/$2" page="$pages/$(basename "$1")" json_file="$CACHE_DIR/artwork/$1.info.json" [ -d "$artist" ] || mkdir "$artist" [ -d "$pages" ] || mkdir "$pages" if [ -f "$page" ]; then echo "hoistArtworkPage: Cache HIT for $3:$2 $1" 1>&2 else echo "hoistArtworkPage: Cache MISS for $3:$2 $1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "$1" 2>/dev/null > "$page~" mv "$page~" "$page" return $? fi } hoistArtworkPages () { # hoistArtworkPages: response artwork_id artist_id local page_index local page_json local page_url for page_index in $(arrayIndexList "$(jq .body -c <<< "$1")"); do page_json="$(jq ".body[$((page_index-1))]" -r <<< "$1")" || return 1 page_url="$(jq .urls.original -r <<< "$page_json")" || return 2 if [ "$page_url" == "null" ]; then echo "$page_json" return 3 fi if ! hoistArtworkPage "$page_url" "$2" "$3"; then echo "hoistArtworkPages: Failed for page $page_index." 1>&2 rm "$CACHE_DIR/pages/$3/$2/$(basename "$page_url")" fi if [ -n "$SLOW_MODE" ]; then sleep .3; fi #ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?" done return 0 } hoistArtworkEntry () { # hoistArtworkEntry: artwork_id artist_id artwork_info_json artwork_pages_json local file local id local href local pages local page_json local page_url local page_file local tags file="$CACHE_DIR/artwork/$1.entry.xml" [ -n "$CACHE_ENTRY_FILES" ] && [ -f "$file" ] && return 0 id="$PIXIV_BASE_URL/artworks/$(jq '.body.illustId | @uri' -r <<< "$3")" href="$(jq '.body.extraData.meta.canonical | @html' -r <<< "$3")" tags="$(jq '.body.tags.tags | map("" + (.tag | @html) + "") | join(", ")' -r <<< "$3")" pages="" for page_index in $(arrayIndexList "$(jq .body -c <<< "$4")"); do page_json="$(jq ".body[$((page_index-1))]" -r <<< "$4")" || return 2 page_url="$(jq .urls.original -r <<< "$page_json")" || return 2 page_file="$(jq @html -Rr <<< "$BASE_URL/pages/$2/$1/$(basename "$page_url")")" pages="$pages"'

' done echo ' '"$(jq '.body.illustTitle | @html' -r <<< "$3")"' '"$id"' '"$(jq '.body.uploadDate | @html' -r <<< "$3")"' '"$(date -Is | jq @html -rR)"'

'"$tags"'

'"$pages"'
'"$(jq '.body.userName | @html' -r <<< "$3")"'
' > "$file" # '"$(jq '.body.alt | @html' -r <<< "$3")"' } ingestArtist () { # ingestArtist: artist_id local artist_file local artist_json local artwork_keys local artwork_id local artwork_index local artwork_pages_json local artwork_info_json local feed_url local feed_file local entries_file local artist_url local artist_name artist_file="$CACHE_DIR/users/$1.json" artist_url="$PIXIV_BASE_URL/users/$1" feed_url="$BASE_URL/feeds/$1.xml" feed_file="$CACHE_DIR/feeds/$1.xml" entries_file="$CACHE_DIR/feeds/$1.entries.xml~" if [ -n "$STALE_MODE" ] && [ -f "$artist_file" ]; then echo "ingestArtist: Cache HIT for $1" else echo "ingestArtist: Cache MISS for $1" 1>&2 curl -sf \ -A "$USER_AGENT" \ -H "$COOKIES" \ -H "Referer: https://www.pixiv.net/" \ "https://www.pixiv.net/ajax/user/$1/works/latest?lang=en" 2>/dev/null > "$artist_file" fi artist_info_json="$(getArtistInfo "$1")" || return 1 artist_json="$(cat "$artist_file")" if jq -e '.error' 1>/dev/null <<< "$artist_info_json"; then echo "ingestArtist: Error while reading artist $1 info. Message: $(jq .message -r <<< "$artist_info_json")" 1>&2 return 1 fi if jq -e '.error' 1>/dev/null <<< "$artist_json"; then echo "ingestArtist: Error while reading artist $1. Message: $(jq .message -r <<< "$artist_json")" 1>&2 return 1 fi artwork_keys="$(jq ".body.illusts | keys_unsorted | .[0:$DEPTH]" -c <<< "$artist_json")" echo '' > "$feed_file~" for artwork_index in $(arrayIndexList "$artwork_keys"); do artwork_id="$(jq ".[$((artwork_index-1))]" -r <<< "$artwork_keys")" || return 2 artwork_info_json="$(getArtworkInfo "$artwork_id" "$1")" || return 3 artwork_pages_json="$(getArtworkData "$artwork_id" "$1")" || return 4 [ -n "$SLOW_MODE" ] && sleep .3 grep -Eq '^[0-9]+$' <<< "$artwork_id" || \ die "ingestArtist: Exception while handling artist $1, artwork with index of $artwork_index. Illustration ID is not numerical!" hoistArtworkPages "$artwork_pages_json" "$artwork_id" "$artist_id" || echo "ingestArtist: Error while running hoistArtworkPages. Exit code $?" 1>&2 if hoistArtworkEntry "$artwork_id" "$artist_id" "$artwork_info_json" "$artwork_pages_json"; then cat "$CACHE_DIR/artwork/$artwork_id.entry.xml" >> "$entries_file" else echo "ingestArtist: Error while running hoistArtworkEntry. Exit code $?" 1>&2 fi done artist_name="$(jq '.body.name | @html' -r <<< "$artist_info_json")" echo ' '"$artist_name"' '"$artist_name on Pixiv"' '"$artist_url"' '"$(date -Is | jq @html -rR)"' '"$artist_name"' ' > "$feed_file~" cat "$entries_file" >> "$feed_file~" rm "$entries_file" echo ' ' >> "$feed_file~" mv "$feed_file~" "$feed_file" } init () { local artist_id local artists_index for artists_index in $(arrayIndexList "$(jq '.artists' -c <<< "$CONFIG")"); do artist_id="$(jq ".artists[$((artists_index-1))]" -r <<< "$CONFIG")" || return 1 grep -Eq '^[0-9]+$' <<< "$artist_id" || die "init: Exception while handling artist $artist_id. Artist ID is not numerical!" ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?" [ -n "$SLOW_MODE" ] && sleep 1 done echo "init: End" } init