Initial public commit
This commit is contained in:
commit
bb6553b00c
5
.gitignore
vendored
Normal file
5
.gitignore
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
config.d
|
||||
cache
|
||||
*~
|
||||
~*
|
||||
.#*
|
23
license.md
Normal file
23
license.md
Normal file
@ -0,0 +1,23 @@
|
||||
# MIT<small>+NOMORG</small> License
|
||||
|
||||
Copyright (c) 2022 Jonathan Hyde
|
||||
|
||||
Permission is hereby granted on condition, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
You may not use the Software, or any portions of the Software, for and/or on behalf of the Matrix Foundation or any associated entities.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
47
readme.md
Normal file
47
readme.md
Normal file
@ -0,0 +1,47 @@
|
||||
# uorss
|
||||
Pixiv to RSS bridge written in bash.
|
||||
|
||||
## Setup
|
||||
1. `$ cd $UORSS_DIR && mkdir -p cache/artwork cache/feeds cache/pages cache/users config.d`
|
||||
2. Add appropriate settings in config.d. `touch config.d/0_secrets.yaml config.d/1_config.yaml config.d/2_watchlist.yaml`
|
||||
3. Add cronjob or systemd scheduled service. Set `$UORSS_DIR`!
|
||||
4. Expose `feeds/` and `pages/` with nginx or httpd. You should use a dedicated root with symlinks to each, don't blindly expose your `$UORSS_DIR` directory!
|
||||
|
||||
### `config`
|
||||
```yaml
|
||||
required: [ user_agent, session_id, base_url, pixiv_base_url, depth ]
|
||||
properties:
|
||||
user_agent:
|
||||
comment: 'User Agent send for all requests. Must be a browser user agent otherwise requests wont go through'
|
||||
type: string
|
||||
session_id:
|
||||
comment: 'Contents of PHPSESSID for a logged in pixiv session. You should put this in it's own config file'
|
||||
type: string
|
||||
base_url:
|
||||
comment: 'The base url where you will expose feeds/ and pages/ on your server'
|
||||
example: 'http://10.0.0.2/static/uorss'
|
||||
type: string
|
||||
pixiv_base_url:
|
||||
comment: 'The base url for pixiv links that uorss generates (tags)'
|
||||
example: 'https://www.pixiv.net/en'
|
||||
type: string
|
||||
depth:
|
||||
comment: 'How many illustrations you want uorss to pull. Suggested value is 3. Note that uorss will not clean up stale illustrations it downloads!'
|
||||
type: number
|
||||
stale_mode:
|
||||
comment: "Always use cached pixiv user responses. This is for testing so you don't spam the pixiv api with artist lookups"
|
||||
suggested: false
|
||||
type: boolean
|
||||
cache_entry_files:
|
||||
comment: 'If entry files should always regenerate. If uorss updates you should set this to false for the first run'
|
||||
suggested: true
|
||||
type: boolean
|
||||
slow_mode:
|
||||
comment: "Adds various delays while pulling from pixiv. Use if you're worried about being rate limited or while syncing a lot of artists"
|
||||
type: boolean
|
||||
artist:
|
||||
comment: 'An array of pixiv artist ids. Uorss will not check for duplicates'
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
```
|
374
uorss.sh
Executable file
374
uorss.sh
Executable file
@ -0,0 +1,374 @@
|
||||
#!/usr/bin/env bash
|
||||
VERSION="1.0.0"
|
||||
CONFIG_DIR="$UORSS_DIR"
|
||||
set -u
|
||||
set +H
|
||||
|
||||
# This will be removed, dont rely on it
|
||||
[ -z "$CONFIG_DIR" ] && echo "main: UORSS_DIR is not defined, assuming working directory. This is potentially unsafe"
|
||||
[ -z "$CONFIG_DIR" ] && CONFIG_DIR="$PWD"
|
||||
|
||||
CONFIGD_DIR="$CONFIG_DIR/config.d"
|
||||
CACHE_DIR="$CONFIG_DIR/cache"
|
||||
|
||||
echo "main: Running as $(whoami). Version $VERSION. Directory at $CONFIG_DIR"
|
||||
die () { echo "$@" 1>&2; exit 1; }
|
||||
stripCtrlChars () { sed 's/\x1b/\\x1b/g' /dev/stdin; return $?; }
|
||||
|
||||
[ -d "$CONFIG_DIR" ] || die "main: No uorss directory defined! See readme."
|
||||
[ -d "$CONFIGD_DIR" ] || die "main: No config.d directory defined! See readme."
|
||||
|
||||
str_startswith () {
|
||||
# str_startsswith: prefix string
|
||||
local alen
|
||||
local blen
|
||||
local end
|
||||
alen=${#1}
|
||||
blen=${#2}
|
||||
|
||||
if [ "$alen" -gt "$blen" ]; then
|
||||
return 1
|
||||
else
|
||||
if [ "$1" == "${2:0:$alen}" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
str_endswith () {
|
||||
# str_endswith: affix string
|
||||
local alen
|
||||
local blen
|
||||
local end
|
||||
alen=${#1}
|
||||
blen=${#2}
|
||||
|
||||
if [ "$alen" -gt "$blen" ]; then
|
||||
return 1
|
||||
else
|
||||
end=$((blen - alen))
|
||||
if [ "$1" == "${2:$end:$alen}" ]; then
|
||||
return 0
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
# Merge all files in config.d/
|
||||
createConfig () {
|
||||
# createConfig:
|
||||
local filepath
|
||||
local filebase
|
||||
local json
|
||||
local code
|
||||
local config
|
||||
config="{}"
|
||||
|
||||
for filepath in "$CONFIGD_DIR/"*; do
|
||||
filebase="$(basename "$filepath")"
|
||||
if [ -d "$filepath" ]; then
|
||||
echo "createConfig: Ignoring $filebase, Is a directory." 1>&2
|
||||
elif str_startswith "~" "$filebase" || str_endswith "~" "$filebase"; then
|
||||
echo "createConfig: Ignoring $filebase. Is a temporary file!" 1>&2
|
||||
elif str_startswith "." "$filebase"; then
|
||||
echo "createConfig: Ignoring $filebase. Is a hidden file!" 1>&2
|
||||
elif str_endswith ".yml" "$filebase" || str_endswith ".yaml" "$filebase" || str_endswith ".json" "$filebase"; then
|
||||
json="$(yq . "$filepath" -c)"
|
||||
code="$?"
|
||||
if [ $code -eq 0 ]; then
|
||||
config="$(jq --argjson a "$config" --argjson b "$json" \
|
||||
'$a * $b * {
|
||||
artists: ($a.artists + $b.artists)
|
||||
}' -nc)" || die "createConfig: Failed to merge $filebase with in-memory config. Exited with code $?"
|
||||
else
|
||||
echo "createConfig: Failed to load $filebase. Exited with code $code" 1>&2
|
||||
exit $code
|
||||
fi
|
||||
else
|
||||
echo "createConfig: Ignoring $(basename "$filebase")." 1>&2
|
||||
fi
|
||||
done
|
||||
|
||||
jq . -c <<< "$config"
|
||||
}
|
||||
|
||||
CONFIG="$(createConfig)" || die "main: Failed to load config.d! Exited with $?"
|
||||
SLOW_MODE=""; jq -e .slow_mode 1>/dev/null <<< "$CONFIG" && SLOW_MODE="1"
|
||||
STALE_MODE=""; jq -e .stale_mode 1>/dev/null <<< "$CONFIG" && STALE_MODE="1"
|
||||
CACHE_ENTRY_FILES=""; jq -e .cache_entry_files 1>/dev/null <<< "$CONFIG" && CACHE_ENTRY_FILES="1"
|
||||
|
||||
DEPTH="$(jq .depth -r <<< "$CONFIG")"
|
||||
BASE_URL="$(jq .base_url -r <<< "$CONFIG")"
|
||||
PIXIV_BASE_URL="$(jq .pixiv_base_url -r <<< "$CONFIG")"
|
||||
SESSION_ID="$(jq .session_id -r <<< "$CONFIG")"
|
||||
USER_AGENT="$(jq .user_agent -r <<< "$CONFIG")"
|
||||
COOKIES="Cookie: PHPSESSID=$SESSION_ID"
|
||||
|
||||
[ "$BASE_URL" == "null" ] && die "main: base_url cannot be null!"
|
||||
[ "$PIXIV_BASE_URL" == "null" ] && die "main: pixiv_base_url cannot be null!"
|
||||
[ "$SESSION_ID" == "null" ] && die "main: session_id cannot be null!"
|
||||
|
||||
[ -n "$STALE_MODE" ] && echo "main: Stale mode is on. New posts by artists will not be fetched!" 1>&2
|
||||
|
||||
arrayIndexList () {
|
||||
# arrayIndexList: array
|
||||
local length
|
||||
length="$(jq 'length' -r <<< "$1")"
|
||||
if [ -n "$length" ]; then
|
||||
seq 1 "$length"
|
||||
fi
|
||||
}
|
||||
|
||||
getArtworkData () {
|
||||
# getArtworkData: artwork_id artist_id
|
||||
local json_file
|
||||
local json
|
||||
json_file="$CACHE_DIR/artwork/$1.pages.json"
|
||||
|
||||
if [ -f "$json_file" ]; then
|
||||
echo "getArtworkData: Cache HIT for $2:$1" 1>&2
|
||||
else
|
||||
echo "getArtworkData: Cache MISS for $2:$1" 1>&2
|
||||
curl -sf \
|
||||
-A "$USER_AGENT" \
|
||||
-H "$COOKIES" \
|
||||
-H "Referer: https://www.pixiv.net/" \
|
||||
"https://www.pixiv.net/ajax/illust/$1/pages" 2>/dev/null > "$json_file"
|
||||
fi
|
||||
cat "$json_file"
|
||||
}
|
||||
|
||||
getArtworkInfo () {
|
||||
# getArtworkInfo: artwork_id artist_id
|
||||
local json_file
|
||||
local json
|
||||
json_file="$CACHE_DIR/artwork/$1.info.json"
|
||||
|
||||
if [ -f "$json_file" ]; then
|
||||
echo "getArtworkInfo: Cache HIT for $2:$1" 1>&2
|
||||
else
|
||||
echo "getArtworkInfo: Cache MISS for $2:$1" 1>&2
|
||||
curl -sf \
|
||||
-A "$USER_AGENT" \
|
||||
-H "$COOKIES" \
|
||||
-H "Referer: https://www.pixiv.net/" \
|
||||
"https://www.pixiv.net/ajax/illust/$1" 2>/dev/null > "$json_file"
|
||||
fi
|
||||
cat "$json_file"
|
||||
}
|
||||
|
||||
getArtistInfo () {
|
||||
# getArtistInfo: artist_id
|
||||
local json_file
|
||||
local json
|
||||
json_file="$CACHE_DIR/users/$1.info.json"
|
||||
|
||||
if [ -f "$json_file" ]; then
|
||||
echo "getArtworkInfo: Cache HIT for $1" 1>&2
|
||||
else
|
||||
echo "getArtworkInfo: Cache MISS for $1" 1>&2
|
||||
curl -sf \
|
||||
-A "$USER_AGENT" \
|
||||
-H "$COOKIES" \
|
||||
-H "Referer: https://www.pixiv.net/" \
|
||||
"https://www.pixiv.net/ajax/user/$1" 2>/dev/null > "$json_file"
|
||||
fi
|
||||
cat "$json_file"
|
||||
}
|
||||
|
||||
hoistArtworkPage () {
|
||||
# hoistArtworkPage: url artwork_id artist_id
|
||||
local artist
|
||||
local pages
|
||||
local page
|
||||
artist="$CACHE_DIR/pages/$3"
|
||||
pages="$artist/$2"
|
||||
page="$pages/$(basename "$1")"
|
||||
json_file="$CACHE_DIR/artwork/$1.info.json"
|
||||
|
||||
[ -d "$artist" ] || mkdir "$artist"
|
||||
[ -d "$pages" ] || mkdir "$pages"
|
||||
|
||||
|
||||
if [ -f "$page" ]; then
|
||||
echo "hoistArtworkPage: Cache HIT for $3:$2 $1" 1>&2
|
||||
else
|
||||
echo "hoistArtworkPage: Cache MISS for $3:$2 $1" 1>&2
|
||||
curl -sf \
|
||||
-A "$USER_AGENT" \
|
||||
-H "$COOKIES" \
|
||||
-H "Referer: https://www.pixiv.net/" \
|
||||
"$1" 2>/dev/null > "$page~"
|
||||
mv "$page~" "$page"
|
||||
return $?
|
||||
fi
|
||||
}
|
||||
|
||||
hoistArtworkPages () {
|
||||
# hoistArtworkPages: response artwork_id artist_id
|
||||
local page_index
|
||||
local page_json
|
||||
local page_url
|
||||
|
||||
for page_index in $(arrayIndexList "$(jq .body -c <<< "$1")"); do
|
||||
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$1")" || return 1
|
||||
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
|
||||
if [ "$page_url" == "null" ]; then
|
||||
echo "$page_json"
|
||||
return 3
|
||||
fi
|
||||
|
||||
if ! hoistArtworkPage "$page_url" "$2" "$3"; then
|
||||
echo "hoistArtworkPages: Failed for page $page_index." 1>&2
|
||||
rm "$CACHE_DIR/pages/$3/$2/$(basename "$page_url")"
|
||||
fi
|
||||
|
||||
if [ -n "$SLOW_MODE" ]; then sleep .3; fi
|
||||
#ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
|
||||
done
|
||||
return 0
|
||||
}
|
||||
|
||||
hoistArtworkEntry () {
|
||||
# hoistArtworkEntry: artwork_id artist_id artwork_info_json artwork_pages_json
|
||||
local file
|
||||
local id
|
||||
local href
|
||||
local pages
|
||||
local page_json
|
||||
local page_url
|
||||
local page_file
|
||||
local tags
|
||||
file="$CACHE_DIR/artwork/$1.entry.xml"
|
||||
[ -n "$CACHE_ENTRY_FILES" ] && [ -f "$file" ] && return 0
|
||||
id="$PIXIV_BASE_URL/artworks/$(jq '.body.illustId | @uri' -r <<< "$3")"
|
||||
href="$(jq '.body.extraData.meta.canonical | @html' -r <<< "$3")"
|
||||
tags="$(jq '.body.tags.tags | map("<a href=\"'"$PIXIV_BASE_URL"'/tags/"+ (.tag | @uri) +"\" title=\""+ (.romaji | @html) +" ("+ (.translation?.en | @html) +")\">" + (.tag | @html) + "</a>") | join(", ")' -r <<< "$3")"
|
||||
pages=""
|
||||
|
||||
for page_index in $(arrayIndexList "$(jq .body -c <<< "$4")"); do
|
||||
page_json="$(jq ".body[$((page_index-1))]" -r <<< "$4")" || return 2
|
||||
page_url="$(jq .urls.original -r <<< "$page_json")" || return 2
|
||||
page_file="$(jq @html -Rr <<< "$BASE_URL/pages/$2/$1/$(basename "$page_url")")"
|
||||
pages="$pages"'<p><a download="" href="'"$page_file"'"><img src="'"$page_file"'" /></a></p>
|
||||
'
|
||||
done
|
||||
|
||||
echo '<entry>
|
||||
<title>'"$(jq '.body.illustTitle | @html' -r <<< "$3")"'</title>
|
||||
<link href="'"$href"'"/>
|
||||
<link rel="alternate" type="text/html" href="'"$href"'"/>
|
||||
<id>'"$id"'</id>
|
||||
<published>'"$(jq '.body.uploadDate | @html' -r <<< "$3")"'</published>
|
||||
<updated>'"$(date -Is | jq @html -rR)"'</updated>
|
||||
<content type="xhtml" xml:base="'"$href"'">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml">
|
||||
<p>'"$tags"'</p>
|
||||
'"$pages"'</div>
|
||||
</content>
|
||||
<author><name>'"$(jq '.body.userName | @html' -r <<< "$3")"'</name></author>
|
||||
</entry>' > "$file"
|
||||
# <summary>'"$(jq '.body.alt | @html' -r <<< "$3")"'</summary>
|
||||
}
|
||||
|
||||
ingestArtist () {
|
||||
# ingestArtist: artist_id
|
||||
local artist_file
|
||||
local artist_json
|
||||
local artwork_keys
|
||||
local artwork_id
|
||||
local artwork_index
|
||||
local artwork_pages_json
|
||||
local artwork_info_json
|
||||
local feed_url
|
||||
local feed_file
|
||||
local entries_file
|
||||
local artist_url
|
||||
local artist_name
|
||||
artist_file="$CACHE_DIR/users/$1.json"
|
||||
artist_url="$PIXIV_BASE_URL/users/$1"
|
||||
feed_url="$BASE_URL/feeds/$1.xml"
|
||||
feed_file="$CACHE_DIR/feeds/$1.xml"
|
||||
entries_file="$CACHE_DIR/feeds/$1.entries.xml~"
|
||||
|
||||
if [ -n "$STALE_MODE" ] && [ -f "$artist_file" ]; then
|
||||
echo "ingestArtist: Cache HIT for $1"
|
||||
else
|
||||
echo "ingestArtist: Cache MISS for $1" 1>&2
|
||||
curl -sf \
|
||||
-A "$USER_AGENT" \
|
||||
-H "$COOKIES" \
|
||||
-H "Referer: https://www.pixiv.net/" \
|
||||
"https://www.pixiv.net/ajax/user/$1/works/latest?lang=en" 2>/dev/null > "$artist_file"
|
||||
fi
|
||||
|
||||
artist_info_json="$(getArtistInfo "$1")" || return 1
|
||||
artist_json="$(cat "$artist_file")"
|
||||
|
||||
if jq -e '.error' 1>/dev/null <<< "$artist_info_json"; then
|
||||
echo "ingestArtist: Error while reading artist $1 info. Message: $(jq .message -r <<< "$artist_info_json")" 1>&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
if jq -e '.error' 1>/dev/null <<< "$artist_json"; then
|
||||
echo "ingestArtist: Error while reading artist $1. Message: $(jq .message -r <<< "$artist_json")" 1>&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
artwork_keys="$(jq ".body.illusts | keys | reverse | .[0:$DEPTH]" -c <<< "$artist_json")"
|
||||
|
||||
echo '' > "$feed_file~"
|
||||
for artwork_index in $(arrayIndexList "$artwork_keys"); do
|
||||
artwork_id="$(jq ".[$((artwork_index-1))]" -r <<< "$artwork_keys")" || return 2
|
||||
artwork_info_json="$(getArtworkInfo "$artwork_id" "$1")" || return 3
|
||||
artwork_pages_json="$(getArtworkData "$artwork_id" "$1")" || return 4
|
||||
[ -n "$SLOW_MODE" ] && sleep .3
|
||||
|
||||
grep -Eq '^[0-9]+$' <<< "$artwork_id" || \
|
||||
die "ingestArtist: Exception while handling artist $1, artwork with index of $artwork_index. Illustration ID is not numerical!"
|
||||
|
||||
hoistArtworkPages "$artwork_pages_json" "$artwork_id" "$artist_id" || echo "ingestArtist: Error while running hoistArtworkPages. Exit code $?" 1>&2
|
||||
|
||||
if hoistArtworkEntry "$artwork_id" "$artist_id" "$artwork_info_json" "$artwork_pages_json"; then
|
||||
cat "$CACHE_DIR/artwork/$artwork_id.entry.xml" >> "$entries_file"
|
||||
else
|
||||
echo "ingestArtist: Error while running hoistArtworkEntry. Exit code $?" 1>&2
|
||||
fi
|
||||
done
|
||||
|
||||
artist_name="$(jq '.body.name | @html' -r <<< "$artist_info_json")"
|
||||
echo '<?xml version="1.0" encoding="UTF-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>'"$artist_name"'</title>
|
||||
<subtitle>'"$artist_name on Pixiv"'</subtitle>
|
||||
<link href="'"$feed_url"'" rel="self" type="application/atom+xml"/>
|
||||
<link href="'"$artist_url"'" rel="alternate"/>
|
||||
<id>'"$artist_url"'</id>
|
||||
<updated>'"$(date -Is)"'</updated>
|
||||
<author><name>'"$artist_name"'</name></author>
|
||||
' > "$feed_file~"
|
||||
cat "$entries_file" >> "$feed_file~"
|
||||
rm "$entries_file"
|
||||
echo '
|
||||
</feed>' >> "$feed_file~"
|
||||
mv "$feed_file~" "$feed_file"
|
||||
}
|
||||
|
||||
init () {
|
||||
local artist_id
|
||||
local artists_index
|
||||
|
||||
for artists_index in $(arrayIndexList "$(jq '.artists' -c <<< "$CONFIG")"); do
|
||||
artist_id="$(jq ".artists[$((artists_index-1))]" -r <<< "$CONFIG")" || return 1
|
||||
grep -Eq '^[0-9]+$' <<< "$artist_id" || die "init: Exception while handling artist $artist_id. Artist ID is not numerical!"
|
||||
ingestArtist "$artist_id" || die "init[$artist_id]: Unexpected exit code when processing artist. $?"
|
||||
[ -n "$SLOW_MODE" ] && sleep 1
|
||||
done
|
||||
echo "init: End"
|
||||
}
|
||||
|
||||
init
|
Loading…
Reference in New Issue
Block a user