Initial public commit

This commit is contained in:
Jon 2022-02-27 10:32:02 +00:00
commit bb6553b00c
4 changed files with 449 additions and 0 deletions

5
.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
config.d
cache
*~
~*
.#*

23
license.md Normal file
View File

@ -0,0 +1,23 @@
# MIT<small>+NOMORG</small> License
Copyright (c) 2022 Jonathan Hyde
Permission is hereby granted on condition, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
You may not use the Software, or any portions of the Software, for and/or on behalf of the Matrix Foundation or any associated entities.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

47
readme.md Normal file
View File

@ -0,0 +1,47 @@
# uorss
Pixiv to RSS bridge written in bash.
## Setup
1. `$ cd $UORSS_DIR && mkdir -p cache/artwork cache/feeds cache/pages cache/users config.d`
2. Add appropriate settings in config.d. `touch config.d/0_secrets.yaml config.d/1_config.yaml config.d/2_watchlist.yaml`
3. Add cronjob or systemd scheduled service. Set `$UORSS_DIR`!
4. Expose `feeds/` and `pages/` with nginx or httpd. You should use a dedicated root with symlinks to each, don't blindly expose your `$UORSS_DIR` directory!
### `config`
```yaml
required: [ user_agent, session_id, base_url, pixiv_base_url, depth ]
properties:
user_agent:
comment: 'User Agent send for all requests. Must be a browser user agent otherwise requests wont go through'
type: string
session_id:
comment: 'Contents of PHPSESSID for a logged in pixiv session. You should put this in it's own config file'
type: string
base_url:
comment: 'The base url where you will expose feeds/ and pages/ on your server'
example: 'http://10.0.0.2/static/uorss'
type: string
pixiv_base_url:
comment: 'The base url for pixiv links that uorss generates (tags)'
example: 'https://www.pixiv.net/en'
type: string
depth:
comment: 'How many illustrations you want uorss to pull. Suggested value is 3. Note that uorss will not clean up stale illustrations it downloads!'
type: number
stale_mode:
comment: "Always use cached pixiv user responses. This is for testing so you don't spam the pixiv api with artist lookups"
suggested: false
type: boolean
cache_entry_files:
comment: 'If entry files should always regenerate. If uorss updates you should set this to false for the first run'
suggested: true
type: boolean
slow_mode:
comment: "Adds various delays while pulling from pixiv. Use if you're worried about being rate limited or while syncing a lot of artists"
type: boolean
artist:
comment: 'An array of pixiv artist ids. Uorss will not check for duplicates'
type: array
items:
type: string
```

374
uorss.sh Executable file
View File

@ -0,0 +1,374 @@
#!/usr/bin/env bash
VERSION="1.0.0"
CONFIG_DIR="$UORSS_DIR"
set -u
set +H
# This will be removed, dont rely on it
[ -z "$CONFIG_DIR" ] && echo "main: UORSS_DIR is not defined, assuming working directory. This is potentially unsafe"
[ -z "$CONFIG_DIR" ] && CONFIG_DIR="$PWD"
CONFIGD_DIR="$CONFIG_DIR/config.d"
CACHE_DIR="$CONFIG_DIR/cache"
echo "main: Running as $(whoami). Version $VERSION. Directory at $CONFIG_DIR"
die () { echo "$@" 1>&2; exit 1; }
stripCtrlChars () { sed 's/\x1b/\\x1b/g' /dev/stdin; return $?; }
[ -d "$CONFIG_DIR" ] || die "main: No uorss directory defined! See readme."
[ -d "$CONFIGD_DIR" ] || die "main: No config.d directory defined! See readme."
str_startswith () {
# str_startsswith: prefix string
local alen
local blen
local end
alen=${#1}
blen=${#2}
if [ "$alen" -gt "$blen" ]; then
return 1
else
if [ "$1" == "${2:0:$alen}" ]; then
return 0
else
return 1
fi
fi
}
str_endswith () {
# str_endswith: affix string
local alen
local blen
local end
alen=${#1}
blen=${#2}
if [ "$alen" -gt "$blen" ]; then
return 1
else
end=$((blen - alen))
if [ "$1" == "${2:$end:$alen}" ]; then
return 0
else
return 1
fi
fi
}
# Merge all files in config.d/
createConfig () {
# createConfig:
local filepath
local filebase
local json
local code
local config
config="{}"
for filepath in "$CONFIGD_DIR/"*; do
filebase="$(basename "$filepath")"
if [ -d "$filepath" ]; then
echo "createConfig: Ignoring $filebase, Is a directory." 1>&2
elif str_startswith "~" "$filebase" || str_endswith "~" "$filebase"; then
echo "createConfig: Ignoring $filebase. Is a temporary file!" 1>&2
elif str_startswith "." "$filebase"; then
echo "createConfig: Ignoring $filebase. Is a hidden file!" 1>&2
elif str_endswith ".yml" "$filebase" || str_endswith ".yaml" "$filebase" || str_endswith ".json" "$filebase"; then
json="$(yq . "$filepath" -c)"
code="$?"
if [ $code -eq 0 ]; then
config="$(jq --argjson a "$config" --argjson b "$json" \
'$a * $b * {
artists: ($a.artists + $b.artists)
}' -nc)" || die "createConfig: Failed to merge $filebase with in-memory config. Exited with code $?"
else
echo "createConfig: Failed to load $filebase. Exited with code $code" 1>&2
exit $code
fi
else
echo "createConfig: Ignoring $(basename "$filebase")." 1>&2
fi
done
jq . -c <<< "$config"
}
CONFIG="$(createConfig)" || die "main: Failed to load config.d! Exited with $?"
SLOW_MODE=""; jq -e .slow_mode 1>/dev/null <<< "$CONFIG" && SLOW_MODE="1"
STALE_MODE=""; jq -e .stale_mode 1>/dev/null <<< "$CONFIG" && STALE_MODE="1"
CACHE_ENTRY_FILES=""; jq -e .cache_entry_files 1>/dev/null <<< "$CONFIG" && CACHE_ENTRY_FILES="1"
DEPTH="$(jq .depth -r <<< "$CONFIG")"
BASE_URL="$(jq .base_url -r <<< "$CONFIG")"
PIXIV_BASE_URL="$(jq .pixiv_base_url -r <<< "$CONFIG")"
SESSION_ID="$(jq .session_id -r <<< "$CONFIG")"
USER_AGENT="$(jq .user_agent -r <<< "$CONFIG")"
COOKIES="Cookie: PHPSESSID=$SESSION_ID"
[ "$BASE_URL" == "null" ] && die "main: base_url cannot be null!"
[ "$PIXIV_BASE_URL" == "null" ] && die "main: pixiv_base_url cannot be null!"
[ "$SESSION_ID" == "null" ] && die "main: session_id cannot be null!"
[ -n "$STALE_MODE" ] && echo "main: Stale mode is on. New posts by artists will not be fetched!" 1>&2
arrayIndexList () {
# arrayIndexList: array
local length
length="$(jq 'length' -r <<< "$1")"
if [ -n "$length" ]; then
seq 1 "$length"
fi
}
getArtworkData () {
# getArtworkData: artwork_id artist_id
local json_file
local json
json_file="$CACHE_DIR/artwork/$1.pages.json"
if [ -f "$json_file" ]; then
echo "getArtworkData: Cache HIT for $2:$1" 1>&2
else
echo "getArtworkData: Cache MISS for $2:$1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/illust/$1/pages" 2>/dev/null > "$json_file"
fi
cat "$json_file"
}
getArtworkInfo () {
# getArtworkInfo: artwork_id artist_id
local json_file
local json
json_file="$CACHE_DIR/artwork/$1.info.json"
if [ -f "$json_file" ]; then
echo "getArtworkInfo: Cache HIT for $2:$1" 1>&2
else
echo "getArtworkInfo: Cache MISS for $2:$1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/illust/$1" 2>/dev/null > "$json_file"
fi
cat "$json_file"
}
getArtistInfo () {
# getArtistInfo: artist_id
local json_file
local json
json_file="$CACHE_DIR/users/$1.info.json"
if [ -f "$json_file" ]; then
echo "getArtworkInfo: Cache HIT for $1" 1>&2
else
echo "getArtworkInfo: Cache MISS for $1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/user/$1" 2>/dev/null > "$json_file"
fi
cat "$json_file"
}
hoistArtworkPage () {
# hoistArtworkPage: url artwork_id artist_id
local artist
local pages
local page
artist="$CACHE_DIR/pages/$3"
pages="$artist/$2"
page="$pages/$(basename "$1")"
json_file="$CACHE_DIR/artwork/$1.info.json"
[ -d "$artist" ] || mkdir "$artist"
[ -d "$pages" ] || mkdir "$pages"
if [ -f "$page" ]; then
echo "hoistArtworkPage: Cache HIT for $3:$2 $1" 1>&2
else
echo "hoistArtworkPage: Cache MISS for $3:$2 $1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"$1" 2>/dev/null > "$page~"
mv "$page~" "$page"
return $?
fi
}
hoistArtworkPages () {
# hoistArtworkPages: response artwork_id artist_id
local page_index
local page_json
local page_url
for page_index in $(arrayIndexList "$(jq .body -c <<< "$1")"); do
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$1")" || return 1
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
if [ "$page_url" == "null" ]; then
echo "$page_json"
return 3
fi
if ! hoistArtworkPage "$page_url" "$2" "$3"; then
echo "hoistArtworkPages: Failed for page $page_index." 1>&2
rm "$CACHE_DIR/pages/$3/$2/$(basename "$page_url")"
fi
if [ -n "$SLOW_MODE" ]; then sleep .3; fi
#ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
done
return 0
}
hoistArtworkEntry () {
# hoistArtworkEntry: artwork_id artist_id artwork_info_json artwork_pages_json
local file
local id
local href
local pages
local page_json
local page_url
local page_file
local tags
file="$CACHE_DIR/artwork/$1.entry.xml"
[ -n "$CACHE_ENTRY_FILES" ] && [ -f "$file" ] && return 0
id="$PIXIV_BASE_URL/artworks/$(jq '.body.illustId | @uri' -r <<< "$3")"
href="$(jq '.body.extraData.meta.canonical | @html' -r <<< "$3")"
tags="$(jq '.body.tags.tags | map("<a href=\"'"$PIXIV_BASE_URL"'/tags/"+ (.tag | @uri) +"\" title=\""+ (.romaji | @html) +" ("+ (.translation?.en | @html) +")\">" + (.tag | @html) + "</a>") | join(", ")' -r <<< "$3")"
pages=""
for page_index in $(arrayIndexList "$(jq .body -c <<< "$4")"); do
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$4")" || return 2
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
page_file="$(jq @html -Rr <<< "$BASE_URL/pages/$2/$1/$(basename "$page_url")")"
pages="$pages"'<p><a download="" href="'"$page_file"'"><img src="'"$page_file"'" /></a></p>
'
done
echo '<entry>
<title>'"$(jq '.body.illustTitle | @html' -r <<< "$3")"'</title>
<link href="'"$href"'"/>
<link rel="alternate" type="text/html" href="'"$href"'"/>
<id>'"$id"'</id>
<published>'"$(jq '.body.uploadDate | @html' -r <<< "$3")"'</published>
<updated>'"$(date -Is | jq @html -rR)"'</updated>
<content type="xhtml" xml:base="'"$href"'">
<div xmlns="http://www.w3.org/1999/xhtml">
<p>'"$tags"'</p>
'"$pages"'</div>
</content>
<author><name>'"$(jq '.body.userName | @html' -r <<< "$3")"'</name></author>
</entry>' > "$file"
# <summary>'"$(jq '.body.alt | @html' -r <<< "$3")"'</summary>
}
ingestArtist () {
# ingestArtist: artist_id
local artist_file
local artist_json
local artwork_keys
local artwork_id
local artwork_index
local artwork_pages_json
local artwork_info_json
local feed_url
local feed_file
local entries_file
local artist_url
local artist_name
artist_file="$CACHE_DIR/users/$1.json"
artist_url="$PIXIV_BASE_URL/users/$1"
feed_url="$BASE_URL/feeds/$1.xml"
feed_file="$CACHE_DIR/feeds/$1.xml"
entries_file="$CACHE_DIR/feeds/$1.entries.xml~"
if [ -n "$STALE_MODE" ] && [ -f "$artist_file" ]; then
echo "ingestArtist: Cache HIT for $1"
else
echo "ingestArtist: Cache MISS for $1" 1>&2
curl -sf \
-A "$USER_AGENT" \
-H "$COOKIES" \
-H "Referer: https://www.pixiv.net/" \
"https://www.pixiv.net/ajax/user/$1/works/latest?lang=en" 2>/dev/null > "$artist_file"
fi
artist_info_json="$(getArtistInfo "$1")" || return 1
artist_json="$(cat "$artist_file")"
if jq -e '.error' 1>/dev/null <<< "$artist_info_json"; then
echo "ingestArtist: Error while reading artist $1 info. Message: $(jq .message -r <<< "$artist_info_json")" 1>&2
return 1
fi
if jq -e '.error' 1>/dev/null <<< "$artist_json"; then
echo "ingestArtist: Error while reading artist $1. Message: $(jq .message -r <<< "$artist_json")" 1>&2
return 1
fi
artwork_keys="$(jq ".body.illusts | keys | reverse | .[0:$DEPTH]" -c <<< "$artist_json")"
echo '' > "$feed_file~"
for artwork_index in $(arrayIndexList "$artwork_keys"); do
artwork_id="$(jq ".[$((artwork_index-1))]" -r <<< "$artwork_keys")" || return 2
artwork_info_json="$(getArtworkInfo "$artwork_id" "$1")" || return 3
artwork_pages_json="$(getArtworkData "$artwork_id" "$1")" || return 4
[ -n "$SLOW_MODE" ] && sleep .3
grep -Eq '^[0-9]+$' <<< "$artwork_id" || \
die "ingestArtist: Exception while handling artist $1, artwork with index of $artwork_index. Illustration ID is not numerical!"
hoistArtworkPages "$artwork_pages_json" "$artwork_id" "$artist_id" || echo "ingestArtist: Error while running hoistArtworkPages. Exit code $?" 1>&2
if hoistArtworkEntry "$artwork_id" "$artist_id" "$artwork_info_json" "$artwork_pages_json"; then
cat "$CACHE_DIR/artwork/$artwork_id.entry.xml" >> "$entries_file"
else
echo "ingestArtist: Error while running hoistArtworkEntry. Exit code $?" 1>&2
fi
done
artist_name="$(jq '.body.name | @html' -r <<< "$artist_info_json")"
echo '<?xml version="1.0" encoding="UTF-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>'"$artist_name"'</title>
<subtitle>'"$artist_name on Pixiv"'</subtitle>
<link href="'"$feed_url"'" rel="self" type="application/atom+xml"/>
<link href="'"$artist_url"'" rel="alternate"/>
<id>'"$artist_url"'</id>
<updated>'"$(date -Is)"'</updated>
<author><name>'"$artist_name"'</name></author>
' > "$feed_file~"
cat "$entries_file" >> "$feed_file~"
rm "$entries_file"
echo '
</feed>' >> "$feed_file~"
mv "$feed_file~" "$feed_file"
}
init () {
local artist_id
local artists_index
for artists_index in $(arrayIndexList "$(jq '.artists' -c <<< "$CONFIG")"); do
artist_id="$(jq ".artists[$((artists_index-1))]" -r <<< "$CONFIG")" || return 1
grep -Eq '^[0-9]+$' <<< "$artist_id" || die "init: Exception while handling artist $artist_id. Artist ID is not numerical!"
ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
[ -n "$SLOW_MODE" ] && sleep 1
done
echo "init: End"
}
init