ssd
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -236,5 +236,4 @@ scripts/mm.ps1
|
||||
scripts/mm
|
||||
.style.yapf
|
||||
.yapfignore
|
||||
|
||||
|
||||
tmp_*
|
||||
@@ -144,6 +144,11 @@ def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]:
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def get_requests_verify_value(verify_ssl: bool = True) -> Union[bool, str]:
|
||||
"""Expose the verified value for reuse outside of HTTPClient (requests sessions)."""
|
||||
return _resolve_verify_value(verify_ssl)
|
||||
|
||||
# Default configuration
|
||||
DEFAULT_TIMEOUT = 30.0
|
||||
DEFAULT_RETRIES = 3
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
"(hitfile\\.net/[a-z0-9A-Z]{4,9})"
|
||||
],
|
||||
"regexp": "(hitf\\.(to|cc)/([a-z0-9A-Z]{4,9}))|(htfl\\.(net|to|cc)/([a-z0-9A-Z]{4,9}))|(hitfile\\.(net)/download/free/([a-z0-9A-Z]{4,9}))|((hitfile\\.net/[a-z0-9A-Z]{4,9}))",
|
||||
"status": true
|
||||
"status": false
|
||||
},
|
||||
"mega": {
|
||||
"name": "mega",
|
||||
@@ -389,7 +389,7 @@
|
||||
"(filespace\\.com/[a-zA-Z0-9]{12})"
|
||||
],
|
||||
"regexp": "(filespace\\.com/fd/([a-zA-Z0-9]{12}))|((filespace\\.com/[a-zA-Z0-9]{12}))",
|
||||
"status": true
|
||||
"status": false
|
||||
},
|
||||
"filezip": {
|
||||
"name": "filezip",
|
||||
@@ -412,7 +412,7 @@
|
||||
"(gigapeta\\.com/dl/[0-9a-zA-Z]{13,15})"
|
||||
],
|
||||
"regexp": "(gigapeta\\.com/dl/[0-9a-zA-Z]{13,15})",
|
||||
"status": false
|
||||
"status": true
|
||||
},
|
||||
"google": {
|
||||
"name": "google",
|
||||
@@ -507,7 +507,7 @@
|
||||
"mediafire\\.com/(\\?|download/|file/|download\\.php\\?)([0-9a-z]{15})"
|
||||
],
|
||||
"regexp": "mediafire\\.com/(\\?|download/|file/|download\\.php\\?)([0-9a-z]{15})",
|
||||
"status": false
|
||||
"status": true
|
||||
},
|
||||
"mexashare": {
|
||||
"name": "mexashare",
|
||||
@@ -1650,18 +1650,6 @@
|
||||
],
|
||||
"regexp": "https?://music\\.apple\\.com/[\\w-]+/post/(\\d+)"
|
||||
},
|
||||
"appledaily": {
|
||||
"name": "appledaily",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"ent.appledaily.com.tw",
|
||||
"appledaily.com.tw"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(www|ent)\\.appledaily\\.com\\.tw/[^/]+/[^/]+/[^/]+/(\\d+)/(\\d+)(/.*)?"
|
||||
],
|
||||
"regexp": "https?://(www|ent)\\.appledaily\\.com\\.tw/[^/]+/[^/]+/[^/]+/(\\d+)/(\\d+)(/.*)?"
|
||||
},
|
||||
"applepodcasts": {
|
||||
"name": "applepodcasts",
|
||||
"type": "free",
|
||||
@@ -2058,9 +2046,9 @@
|
||||
"https?://(?:([^.]+)\\.)?bandcamp\\.com/album/([^/?#&]+)",
|
||||
"https?://([^/]+)\\.bandcamp\\.com/track/([^/?#&]+)",
|
||||
"https?://(?!www\\.)([^.]+)\\.bandcamp\\.com(?:/music)?/?(?:[#?]|$)",
|
||||
"https?://(?:www\\.)?bandcamp\\.com/?\\?(?:.*?&)?show=(\\d+)"
|
||||
"https?://(?:www\\.)?bandcamp\\.com/radio/?\\?(?:[^#]+&)?show=(\\d+)"
|
||||
],
|
||||
"regexp": "(https?://(?:([^.]+)\\.)?bandcamp\\.com/album/([^/?#&]+))|(https?://([^/]+)\\.bandcamp\\.com/track/([^/?#&]+))|(https?://(?!www\\.)([^.]+)\\.bandcamp\\.com(?:/music)?/?(?:[#?]|$))|(https?://(?:www\\.)?bandcamp\\.com/?\\?(?:.*?&)?show=(\\d+))"
|
||||
"regexp": "(https?://(?:([^.]+)\\.)?bandcamp\\.com/album/([^/?#&]+))|(https?://([^/]+)\\.bandcamp\\.com/track/([^/?#&]+))|(https?://(?!www\\.)([^.]+)\\.bandcamp\\.com(?:/music)?/?(?:[#?]|$))|(https?://(?:www\\.)?bandcamp\\.com/radio/?\\?(?:[^#]+&)?show=(\\d+))"
|
||||
},
|
||||
"bandlab": {
|
||||
"name": "bandlab",
|
||||
@@ -3296,23 +3284,25 @@
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"cda.pl",
|
||||
"ebd.cda.pl"
|
||||
"ebd.cda.pl",
|
||||
"m.cda.pl"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:(?:www\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
|
||||
"https?://(?:(?:(?:www|m)\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
|
||||
],
|
||||
"regexp": "https?://(?:(?:www\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
|
||||
"regexp": "https?://(?:(?:(?:www|m)\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
|
||||
},
|
||||
"cdafolder": {
|
||||
"name": "cdafolder",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"cda.pl"
|
||||
"cda.pl",
|
||||
"m.cda.pl"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
|
||||
"https?://(?:(?:www|m)\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
|
||||
"regexp": "https?://(?:(?:www|m)\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
|
||||
},
|
||||
"cellebrite": {
|
||||
"name": "cellebrite",
|
||||
@@ -3768,6 +3758,17 @@
|
||||
],
|
||||
"regexp": "https?://www\\.craftsy\\.com/class/([\\w-]+)"
|
||||
},
|
||||
"croatian.film": {
|
||||
"name": "croatian.film",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"croatian.film"
|
||||
],
|
||||
"regexps": [
|
||||
"https://?(?:www\\.)?croatian\\.film/[a-z]{2}/[^/?#]+/(\\d+)"
|
||||
],
|
||||
"regexp": "https://?(?:www\\.)?croatian\\.film/[a-z]{2}/[^/?#]+/(\\d+)"
|
||||
},
|
||||
"crooksandliars": {
|
||||
"name": "crooksandliars",
|
||||
"type": "free",
|
||||
@@ -4379,9 +4380,9 @@
|
||||
"dropbox.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/fi|sh?)/(\\w+)"
|
||||
"https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/f[io]|sh?)/(\\w+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/fi|sh?)/(\\w+)"
|
||||
"regexp": "https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/f[io]|sh?)/(\\w+)"
|
||||
},
|
||||
"dropout": {
|
||||
"name": "dropout",
|
||||
@@ -5088,6 +5089,17 @@
|
||||
],
|
||||
"regexp": "https?://www\\.fifa\\.com/fifaplus/\\w{2}/watch/([^#?]+/)?(\\w+)"
|
||||
},
|
||||
"filmarchiv": {
|
||||
"name": "filmarchiv",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"filmarchiv.at"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?filmarchiv\\.at/de/filmarchiv-on/video/(f_[0-9a-zA-Z]{5,})"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?filmarchiv\\.at/de/filmarchiv-on/video/(f_[0-9a-zA-Z]{5,})"
|
||||
},
|
||||
"filmon": {
|
||||
"name": "filmon",
|
||||
"type": "free",
|
||||
@@ -7954,37 +7966,6 @@
|
||||
],
|
||||
"regexp": "https?://(?:w(?:ww)?\\.)?mgtv\\.com/[bv]/(?:[^/]+/)*(\\d+)\\.html"
|
||||
},
|
||||
"manototv": {
|
||||
"name": "manototv",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"manototv.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?manototv\\.com/episode/([0-9]+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?manototv\\.com/episode/([0-9]+)"
|
||||
},
|
||||
"manototvlive": {
|
||||
"name": "manototvlive",
|
||||
"type": "free",
|
||||
"domains": [],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?manototv\\.com/live/"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?manototv\\.com/live/"
|
||||
},
|
||||
"manototvshow": {
|
||||
"name": "manototvshow",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"manototv.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?manototv\\.com/show/([0-9]+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?manototv\\.com/show/([0-9]+)"
|
||||
},
|
||||
"manyvids": {
|
||||
"name": "manyvids",
|
||||
"type": "free",
|
||||
@@ -9321,9 +9302,10 @@
|
||||
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!myshows|library|videos)([\\w-]+)/?(?:$|[?#])",
|
||||
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!(?:myshows|library|videos)/)([\\w-]+)/([\\w-]+)/?(?:$|[?#])",
|
||||
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/videos/([\\w-]+)",
|
||||
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/([\\w-]+)/season/([\\w-]+)",
|
||||
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(myshows|library/latest-videos)/?(?:$|[?#])"
|
||||
],
|
||||
"regexp": "(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!myshows|library|videos)([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!(?:myshows|library|videos)/)([\\w-]+)/([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/videos/([\\w-]+))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(myshows|library/latest-videos)/?(?:$|[?#]))"
|
||||
"regexp": "(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!myshows|library|videos)([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!(?:myshows|library|videos)/)([\\w-]+)/([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/videos/([\\w-]+))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/([\\w-]+)/season/([\\w-]+))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(myshows|library/latest-videos)/?(?:$|[?#]))"
|
||||
},
|
||||
"nekohacker": {
|
||||
"name": "nekohacker",
|
||||
@@ -9402,10 +9384,10 @@
|
||||
"https?://(?:y\\.)?music\\.163\\.com/(?:[#m]/)?song\\?.*?\\bid=([0-9]+)",
|
||||
"https?://music\\.163\\.com/(?:#/)?(?:playlist|discover/toplist)\\?id=([0-9]+)",
|
||||
"https?://music\\.163\\.com/(?:#/)?mv\\?id=([0-9]+)",
|
||||
"https?://music\\.163\\.com/(?:#/)?program\\?id=([0-9]+)",
|
||||
"https?://music\\.163\\.com/(?:#/)?(?:dj|program)\\?id=([0-9]+)",
|
||||
"https?://music\\.163\\.com/(?:#/)?artist\\?id=([0-9]+)"
|
||||
],
|
||||
"regexp": "(https?://music\\.163\\.com/(?:#/)?album\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?djradio\\?id=([0-9]+))|(https?://(?:y\\.)?music\\.163\\.com/(?:[#m]/)?song\\?.*?\\bid=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?(?:playlist|discover/toplist)\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?mv\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?program\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?artist\\?id=([0-9]+))"
|
||||
"regexp": "(https?://music\\.163\\.com/(?:#/)?album\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?djradio\\?id=([0-9]+))|(https?://(?:y\\.)?music\\.163\\.com/(?:[#m]/)?song\\?.*?\\bid=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?(?:playlist|discover/toplist)\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?mv\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?(?:dj|program)\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?artist\\?id=([0-9]+))"
|
||||
},
|
||||
"netplustv": {
|
||||
"name": "netplustv",
|
||||
@@ -9478,9 +9460,9 @@
|
||||
"netzkino.de"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?netzkino\\.de/\\#!/[^/]+/([^/]+)"
|
||||
"https?://(?:www\\.)?netzkino\\.de/details/([^/?#]+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?netzkino\\.de/\\#!/[^/]+/([^/]+)"
|
||||
"regexp": "https?://(?:www\\.)?netzkino\\.de/details/([^/?#]+)"
|
||||
},
|
||||
"newgrounds": {
|
||||
"name": "newgrounds",
|
||||
@@ -9519,37 +9501,6 @@
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?newsy\\.com/stories/([^/?#$&]+)"
|
||||
},
|
||||
"nextmedia": {
|
||||
"name": "nextmedia",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"hk.apple.nextmedia.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://hk\\.apple\\.nextmedia\\.com/[^/]+/[^/]+/(\\d+)/(\\d+)"
|
||||
],
|
||||
"regexp": "https?://hk\\.apple\\.nextmedia\\.com/[^/]+/[^/]+/(\\d+)/(\\d+)"
|
||||
},
|
||||
"nextmediaactionnews": {
|
||||
"name": "nextmediaactionnews",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"hk.dv.nextmedia.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://hk\\.dv\\.nextmedia\\.com/actionnews/[^/]+/(\\d+)/(\\d+)/\\d+"
|
||||
],
|
||||
"regexp": "https?://hk\\.dv\\.nextmedia\\.com/actionnews/[^/]+/(\\d+)/(\\d+)/\\d+"
|
||||
},
|
||||
"nexttv": {
|
||||
"name": "nexttv",
|
||||
"type": "free",
|
||||
"domains": [],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?nexttv\\.com\\.tw/(?:[^/]+/)+(\\d+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?nexttv\\.com\\.tw/(?:[^/]+/)+(\\d+)"
|
||||
},
|
||||
"nexx": {
|
||||
"name": "nexx",
|
||||
"type": "free",
|
||||
@@ -9809,7 +9760,7 @@
|
||||
"name": "nitter",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"nitter.priv.pw"
|
||||
"nitter.projectsegfau.lt"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad\\.onion|nitter\\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\\.onion|nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd\\.onion|npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid\\.onion|nitter\\.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd\\.onion|i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad\\.onion|26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid\\.onion|vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad\\.onion|iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd\\.onion|erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad\\.onion|ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd\\.onion|jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid\\.onion|nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad\\.onion|nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd\\.onion|nitter\\.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd\\.onion|ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad\\.onion|ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd\\.onion|nitter\\.i2p|u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa\\.b32\\.i2p|nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd\\.onion|nitter\\.lacontrevoie\\.fr|nitter\\.fdn\\.fr|nitter\\.1d4\\.us|nitter\\.kavin\\.rocks|nitter\\.unixfox\\.eu|nitter\\.domain\\.glass|nitter\\.namazso\\.eu|birdsite\\.xanny\\.family|nitter\\.moomoo\\.me|bird\\.trom\\.tf|nitter\\.it|twitter\\.censors\\.us|nitter\\.grimneko\\.de|twitter\\.076\\.ne\\.jp|nitter\\.fly\\.dev|notabird\\.site|nitter\\.weiler\\.rocks|nitter\\.sethforprivacy\\.com|nitter\\.cutelab\\.space|nitter\\.nl|nitter\\.mint\\.lgbt|nitter\\.bus\\-hit\\.me|nitter\\.esmailelbob\\.xyz|tw\\.artemislena\\.eu|nitter\\.winscloud\\.net|nitter\\.tiekoetter\\.com|nitter\\.spaceint\\.fr|nitter\\.privacy\\.com\\.de|nitter\\.poast\\.org|nitter\\.bird\\.froth\\.zone|nitter\\.dcs0\\.hu|twitter\\.dr460nf1r3\\.org|nitter\\.garudalinux\\.org|twitter\\.femboy\\.hu|nitter\\.cz|nitter\\.privacydev\\.net|nitter\\.evil\\.site|tweet\\.lambda\\.dance|nitter\\.kylrth\\.com|nitter\\.foss\\.wtf|nitter\\.priv\\.pw|nitter\\.tokhmi\\.xyz|nitter\\.catalyst\\.sx|unofficialbird\\.com|nitter\\.projectsegfau\\.lt|nitter\\.eu\\.projectsegfau\\.lt|singapore\\.unofficialbird\\.com|canada\\.unofficialbird\\.com|india\\.unofficialbird\\.com|nederland\\.unofficialbird\\.com|uk\\.unofficialbird\\.com|n\\.l5\\.ca|nitter\\.slipfox\\.xyz|nitter\\.soopy\\.moe|nitter\\.qwik\\.space|read\\.whatever\\.social|nitter\\.rawbit\\.ninja|nt\\.vern\\.cc|ntr\\.odyssey346\\.dev|nitter\\.ir|nitter\\.privacytools\\.io|nitter\\.sneed\\.network|n\\.sneed\\.network|nitter\\.manasiwibi\\.com|nitter\\.smnz\\.de|nitter\\.twei\\.space|nitter\\.inpt\\.fr|nitter\\.d420\\.de|nitter\\.caioalonso\\.com|nitter\\.at|nitter\\.drivet\\.xyz|nitter\\.pw|nitter\\.nicfab\\.eu|bird\\.habedieeh\\.re|nitter\\.hostux\\.net|nitter\\.adminforge\\.de|nitter\\.platypush\\.tech|nitter\\.mask\\.sh|nitter\\.pufe\\.org|nitter\\.us\\.projectsegfau\\.lt|nitter\\.arcticfoxes\\.net|t\\.com\\.sb|nitter\\.kling\\.gg|nitter\\.ktachibana\\.party|nitter\\.riverside\\.rocks|nitter\\.girlboss\\.ceo|nitter\\.lunar\\.icu|twitter\\.moe\\.ngo|nitter\\.freedit\\.eu|ntr\\.frail\\.duckdns\\.org|nitter\\.librenode\\.org|n\\.opnxng\\.com|nitter\\.plus\\.st|nitter\\.ethibox\\.fr|nitter\\.net|is\\-nitter\\.resolv\\.ee|lu\\-nitter\\.resolv\\.ee|nitter\\.13ad\\.de|nitter\\.40two\\.app|nitter\\.cattube\\.org|nitter\\.cc|nitter\\.dark\\.fail|nitter\\.himiko\\.cloud|nitter\\.koyu\\.space|nitter\\.mailstation\\.de|nitter\\.mastodont\\.cat|nitter\\.tedomum\\.net|nitter\\.tokhmi\\.xyz|nitter\\.weaponizedhumiliation\\.com|nitter\\.vxempire\\.xyz|tweet\\.lambda\\.dance|nitter\\.ca|nitter\\.42l\\.fr|nitter\\.pussthecat\\.org|nitter\\.nixnet\\.services|nitter\\.eu|nitter\\.actionsack\\.com|nitter\\.hu|twitr\\.gq|nittereu\\.moomoo\\.me|bird\\.from\\.tf|twitter\\.grimneko\\.de|nitter\\.alefvanoon\\.xyz|n\\.hyperborea\\.cloud|twitter\\.mstdn\\.social|nitter\\.silkky\\.cloud|nttr\\.stream|fuckthesacklers\\.network|nitter\\.govt\\.land|nitter\\.datatunnel\\.xyz|de\\.nttr\\.stream|twtr\\.bch\\.bar|nitter\\.exonip\\.de|nitter\\.mastodon\\.pro|nitter\\.notraxx\\.ch|nitter\\.skrep\\.in|nitter\\.snopyta\\.org)/(.+)/status/([0-9]+)(#.)?"
|
||||
@@ -10613,6 +10564,17 @@
|
||||
],
|
||||
"regexp": "(https?://(?:www\\.)?palcomp3\\.com(?:\\.br)?/([^/?&#]+))|(https?://(?:www\\.)?palcomp3\\.com(?:\\.br)?/([^/]+)/([^/?&#]+))|(https?://(?:www\\.)?palcomp3\\.com(?:\\.br)?/([^/]+)/([^/?&#]+)/?#clipe)"
|
||||
},
|
||||
"pandatv": {
|
||||
"name": "pandatv",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"pandalive.co.kr"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.|m\\.)?pandalive\\.co\\.kr/play/(\\w+)"
|
||||
],
|
||||
"regexp": "https?://(?:www\\.|m\\.)?pandalive\\.co\\.kr/play/(\\w+)"
|
||||
},
|
||||
"panopto": {
|
||||
"name": "panopto",
|
||||
"type": "free",
|
||||
@@ -10704,10 +10666,10 @@
|
||||
"parti.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?parti\\.com/creator/([\\w]+)/([\\w/-]+)",
|
||||
"https?://(?:www\\.)?parti\\.com/(?!video/)([\\w/-]+)",
|
||||
"https?://(?:www\\.)?parti\\.com/video/(\\d+)"
|
||||
],
|
||||
"regexp": "(https?://(?:www\\.)?parti\\.com/creator/([\\w]+)/([\\w/-]+))|(https?://(?:www\\.)?parti\\.com/video/(\\d+))"
|
||||
"regexp": "(https?://(?:www\\.)?parti\\.com/(?!video/)([\\w/-]+))|(https?://(?:www\\.)?parti\\.com/video/(\\d+))"
|
||||
},
|
||||
"patreon": {
|
||||
"name": "patreon",
|
||||
@@ -12963,15 +12925,6 @@
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?sciencechannel\\.com/video/([^/]+/[^/?#]+)"
|
||||
},
|
||||
"screen.yahoo": {
|
||||
"name": "screen.yahoo",
|
||||
"type": "free",
|
||||
"domains": [],
|
||||
"regexps": [
|
||||
"yvsearch(|[1-9][0-9]*|all):([\\s\\S]+)"
|
||||
],
|
||||
"regexp": "yvsearch(|[1-9][0-9]*|all):([\\s\\S]+)"
|
||||
},
|
||||
"screen9": {
|
||||
"name": "screen9",
|
||||
"type": "free",
|
||||
@@ -13060,28 +13013,6 @@
|
||||
],
|
||||
"regexp": "https?://(?:www\\.)?scrolller\\.com/([\\w-]+)"
|
||||
},
|
||||
"scte": {
|
||||
"name": "scte",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"learning.scte.org"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://learning\\.scte\\.org/mod/scorm/view\\.php?.*?\\bid=(\\d+)"
|
||||
],
|
||||
"regexp": "https?://learning\\.scte\\.org/mod/scorm/view\\.php?.*?\\bid=(\\d+)"
|
||||
},
|
||||
"sctecourse": {
|
||||
"name": "sctecourse",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"learning.scte.org"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://learning\\.scte\\.org/(?:mod/sub)?course/view\\.php?.*?\\bid=(\\d+)"
|
||||
],
|
||||
"regexp": "https?://learning\\.scte\\.org/(?:mod/sub)?course/view\\.php?.*?\\bid=(\\d+)"
|
||||
},
|
||||
"sejm": {
|
||||
"name": "sejm",
|
||||
"type": "free",
|
||||
@@ -14199,6 +14130,19 @@
|
||||
],
|
||||
"regexp": "https?://www\\.taptap\\.io/post/(\\d+)"
|
||||
},
|
||||
"tarangplus": {
|
||||
"name": "tarangplus",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"tarangplus.in"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/([^#?/]+)/episodes/?(?:$|[?#])",
|
||||
"https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/all/?(?:$|[?#])",
|
||||
"https?://(?:www\\.)?tarangplus\\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)([^#?/]+)"
|
||||
],
|
||||
"regexp": "(https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/([^#?/]+)/episodes/?(?:$|[?#]))|(https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/all/?(?:$|[?#]))|(https?://(?:www\\.)?tarangplus\\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)([^#?/]+))"
|
||||
},
|
||||
"tass": {
|
||||
"name": "tass",
|
||||
"type": "free",
|
||||
@@ -15152,10 +15096,10 @@
|
||||
"tubitv.com"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?tubitv\\.com/(video|movies|tv-shows)/(\\d+)",
|
||||
"https?://(?:www\\.)?tubitv\\.com/(?:[a-z]{2}-[a-z]{2}/)?(video|movies|tv-shows)/(\\d+)",
|
||||
"https?://(?:www\\.)?tubitv\\.com/series/\\d+/([^/?#]+)(?:/season-(\\d+))?"
|
||||
],
|
||||
"regexp": "(https?://(?:www\\.)?tubitv\\.com/(video|movies|tv-shows)/(\\d+))|(https?://(?:www\\.)?tubitv\\.com/series/\\d+/([^/?#]+)(?:/season-(\\d+))?)"
|
||||
"regexp": "(https?://(?:www\\.)?tubitv\\.com/(?:[a-z]{2}-[a-z]{2}/)?(video|movies|tv-shows)/(\\d+))|(https?://(?:www\\.)?tubitv\\.com/series/\\d+/([^/?#]+)(?:/season-(\\d+))?)"
|
||||
},
|
||||
"tumblr": {
|
||||
"name": "tumblr",
|
||||
@@ -15304,10 +15248,10 @@
|
||||
"tv5unis.ca"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://(?:www\\.)?tv5unis\\.ca/videos/([^/]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$)",
|
||||
"https?://(?:www\\.)?tv5unis\\.ca/videos/[^/]+/(\\d+)"
|
||||
"https?://(?:www\\.)?tv5unis\\.ca/videos/([^/?#]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$)",
|
||||
"https?://(?:www\\.)?tv5unis\\.ca/videos/[^/?#]+/(\\d+)"
|
||||
],
|
||||
"regexp": "(https?://(?:www\\.)?tv5unis\\.ca/videos/([^/]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$))|(https?://(?:www\\.)?tv5unis\\.ca/videos/[^/]+/(\\d+))"
|
||||
"regexp": "(https?://(?:www\\.)?tv5unis\\.ca/videos/([^/?#]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$))|(https?://(?:www\\.)?tv5unis\\.ca/videos/[^/?#]+/(\\d+))"
|
||||
},
|
||||
"tv8.it": {
|
||||
"name": "tv8.it",
|
||||
@@ -17315,8 +17259,6 @@
|
||||
"name": "yahoo",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"screen.yahoo.com",
|
||||
"uk.screen.yahoo.com",
|
||||
"news.yahoo.com",
|
||||
"yahoo.com",
|
||||
"gma.yahoo.com",
|
||||
@@ -17329,9 +17271,10 @@
|
||||
],
|
||||
"regexps": [
|
||||
"(https?://(?:([a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\\.)?(?:[\\da-zA-Z_-]+\\.)?yahoo\\.com/(?:[^/]+/)*([^?&#]*-[0-9]+(?:-[a-z]+)?)\\.html)",
|
||||
"https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+)"
|
||||
"https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+)",
|
||||
"yvsearch(|[1-9][0-9]*|all):([\\s\\S]+)"
|
||||
],
|
||||
"regexp": "((https?://(?:([a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\\.)?(?:[\\da-zA-Z_-]+\\.)?yahoo\\.com/(?:[^/]+/)*([^?&#]*-[0-9]+(?:-[a-z]+)?)\\.html))|(https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+))"
|
||||
"regexp": "((https?://(?:([a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\\.)?(?:[\\da-zA-Z_-]+\\.)?yahoo\\.com/(?:[^/]+/)*([^?&#]*-[0-9]+(?:-[a-z]+)?)\\.html))|(https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+))|(yvsearch(|[1-9][0-9]*|all):([\\s\\S]+))"
|
||||
},
|
||||
"yandexdisk": {
|
||||
"name": "yandexdisk",
|
||||
@@ -17718,14 +17661,17 @@
|
||||
"name": "zdf",
|
||||
"type": "free",
|
||||
"domains": [
|
||||
"zdf.de"
|
||||
"zdf.de",
|
||||
"zdfheute.de",
|
||||
"logo.de"
|
||||
],
|
||||
"regexps": [
|
||||
"https?://www\\.zdf\\.de/(?:[^/?#]+/)*([^/?#]+)",
|
||||
"https?://(?:www\\.)?zdf\\.de/(?:video|play)/(?:[^/?#]+/)*([^/?#]+)",
|
||||
"https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html"
|
||||
"https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html",
|
||||
"https?://(?:www\\.)?(?:zdfheute|logo)\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html"
|
||||
],
|
||||
"regexp": "(https?://www\\.zdf\\.de/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:video|play)/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html)"
|
||||
"regexp": "(https?://www\\.zdf\\.de/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:video|play)/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html)|(https?://(?:www\\.)?(?:zdfheute|logo)\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html)"
|
||||
},
|
||||
"zee5": {
|
||||
"name": "zee5",
|
||||
|
||||
@@ -8,6 +8,9 @@ import requests
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from ProviderCore.base import SearchResult
|
||||
try: # Optional dependency for IMDb scraping
|
||||
from imdbinfo.services import search_title # type: ignore
|
||||
except ImportError: # pragma: no cover - optional
|
||||
@@ -15,6 +18,7 @@ except ImportError: # pragma: no cover - optional
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.metadata import imdb_tag
|
||||
from SYS.json_table import normalize_record
|
||||
|
||||
try: # Optional dependency
|
||||
import musicbrainzngs # type: ignore
|
||||
@@ -892,6 +896,524 @@ class YtdlpMetadataProvider(MetadataProvider):
|
||||
return out
|
||||
|
||||
|
||||
def _coerce_archive_field_list(value: Any) -> List[str]:
|
||||
"""Coerce an Archive.org metadata field to a list of strings."""
|
||||
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
out: List[str] = []
|
||||
for v in value:
|
||||
try:
|
||||
s = str(v).strip()
|
||||
except Exception:
|
||||
continue
|
||||
if s:
|
||||
out.append(s)
|
||||
return out
|
||||
if isinstance(value, (tuple, set)):
|
||||
out = []
|
||||
for v in value:
|
||||
try:
|
||||
s = str(v).strip()
|
||||
except Exception:
|
||||
continue
|
||||
if s:
|
||||
out.append(s)
|
||||
return out
|
||||
try:
|
||||
s = str(value).strip()
|
||||
except Exception:
|
||||
return []
|
||||
return [s] if s else []
|
||||
|
||||
|
||||
def archive_item_metadata_to_tags(archive_id: str,
|
||||
item_metadata: Dict[str, Any]) -> List[str]:
|
||||
"""Coerce Archive.org metadata into a stable set of bibliographic tags."""
|
||||
|
||||
archive_id_clean = str(archive_id or "").strip()
|
||||
meta = item_metadata if isinstance(item_metadata, dict) else {}
|
||||
|
||||
tags: List[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add(tag: str) -> None:
|
||||
try:
|
||||
t = str(tag).strip()
|
||||
except Exception:
|
||||
return
|
||||
if not t:
|
||||
return
|
||||
if t.lower() in seen:
|
||||
return
|
||||
seen.add(t.lower())
|
||||
tags.append(t)
|
||||
|
||||
if archive_id_clean:
|
||||
_add(f"internet_archive:{archive_id_clean}")
|
||||
|
||||
for title in _coerce_archive_field_list(meta.get("title"))[:1]:
|
||||
_add(f"title:{title}")
|
||||
|
||||
creators: List[str] = []
|
||||
creators.extend(_coerce_archive_field_list(meta.get("creator")))
|
||||
creators.extend(_coerce_archive_field_list(meta.get("author")))
|
||||
for creator in creators[:3]:
|
||||
_add(f"author:{creator}")
|
||||
|
||||
for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
|
||||
_add(f"publisher:{publisher}")
|
||||
|
||||
for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
|
||||
_add(f"publish_date:{date_val}")
|
||||
for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
|
||||
_add(f"publish_date:{year_val}")
|
||||
|
||||
for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
|
||||
_add(f"language:{lang}")
|
||||
|
||||
for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
|
||||
if len(subj) > 200:
|
||||
subj = subj[:200]
|
||||
_add(subj)
|
||||
|
||||
def _clean_isbn(raw: str) -> str:
|
||||
return str(raw or "").replace("-", "").strip()
|
||||
|
||||
for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
|
||||
isbn_clean = _clean_isbn(isbn)
|
||||
if isbn_clean:
|
||||
_add(f"isbn:{isbn_clean}")
|
||||
|
||||
identifiers: List[str] = []
|
||||
identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
|
||||
identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
|
||||
added_other = 0
|
||||
for ident in identifiers:
|
||||
ident_s = str(ident or "").strip()
|
||||
if not ident_s:
|
||||
continue
|
||||
low = ident_s.lower()
|
||||
|
||||
if low.startswith("urn:isbn:"):
|
||||
val = _clean_isbn(ident_s.split(":", 2)[-1])
|
||||
if val:
|
||||
_add(f"isbn:{val}")
|
||||
continue
|
||||
if low.startswith("isbn:"):
|
||||
val = _clean_isbn(ident_s.split(":", 1)[-1])
|
||||
if val:
|
||||
_add(f"isbn:{val}")
|
||||
continue
|
||||
if low.startswith("urn:oclc:"):
|
||||
val = ident_s.split(":", 2)[-1].strip()
|
||||
if val:
|
||||
_add(f"oclc:{val}")
|
||||
continue
|
||||
if low.startswith("oclc:"):
|
||||
val = ident_s.split(":", 1)[-1].strip()
|
||||
if val:
|
||||
_add(f"oclc:{val}")
|
||||
continue
|
||||
if low.startswith("urn:lccn:"):
|
||||
val = ident_s.split(":", 2)[-1].strip()
|
||||
if val:
|
||||
_add(f"lccn:{val}")
|
||||
continue
|
||||
if low.startswith("lccn:"):
|
||||
val = ident_s.split(":", 1)[-1].strip()
|
||||
if val:
|
||||
_add(f"lccn:{val}")
|
||||
continue
|
||||
if low.startswith("doi:"):
|
||||
val = ident_s.split(":", 1)[-1].strip()
|
||||
if val:
|
||||
_add(f"doi:{val}")
|
||||
continue
|
||||
|
||||
if archive_id_clean and low == archive_id_clean.lower():
|
||||
continue
|
||||
if added_other >= 5:
|
||||
continue
|
||||
if len(ident_s) > 200:
|
||||
ident_s = ident_s[:200]
|
||||
_add(f"identifier:{ident_s}")
|
||||
added_other += 1
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
def fetch_archive_item_metadata(archive_id: str,
|
||||
*,
|
||||
timeout: int = 8) -> Dict[str, Any]:
|
||||
ident = str(archive_id or "").strip()
|
||||
if not ident:
|
||||
return {}
|
||||
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
|
||||
resp.raise_for_status()
|
||||
data = resp.json() if resp is not None else {}
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
meta = data.get("metadata")
|
||||
return meta if isinstance(meta, dict) else {}
|
||||
|
||||
|
||||
def scrape_isbn_metadata(isbn: str) -> List[str]:
|
||||
"""Scrape metadata tags for an ISBN using OpenLibrary's books API."""
|
||||
|
||||
new_tags: List[str] = []
|
||||
|
||||
isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
|
||||
if not isbn_clean:
|
||||
return []
|
||||
|
||||
url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
|
||||
try:
|
||||
with HTTPClient() as client:
|
||||
response = client.get(url)
|
||||
response.raise_for_status()
|
||||
data = json.loads(response.content.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if not data:
|
||||
log(f"No ISBN metadata found for: {isbn}")
|
||||
return []
|
||||
|
||||
book_data = next(iter(data.values()), None)
|
||||
if not isinstance(book_data, dict):
|
||||
return []
|
||||
|
||||
if "title" in book_data:
|
||||
new_tags.append(f"title:{book_data['title']}")
|
||||
|
||||
authors = book_data.get("authors")
|
||||
if isinstance(authors, list):
|
||||
for author in authors[:3]:
|
||||
if isinstance(author, dict) and author.get("name"):
|
||||
new_tags.append(f"author:{author['name']}")
|
||||
|
||||
if book_data.get("publish_date"):
|
||||
new_tags.append(f"publish_date:{book_data['publish_date']}")
|
||||
|
||||
publishers = book_data.get("publishers")
|
||||
if isinstance(publishers, list) and publishers:
|
||||
pub = publishers[0]
|
||||
if isinstance(pub, dict) and pub.get("name"):
|
||||
new_tags.append(f"publisher:{pub['name']}")
|
||||
|
||||
if "description" in book_data:
|
||||
desc = book_data.get("description")
|
||||
if isinstance(desc, dict) and "value" in desc:
|
||||
desc = desc.get("value")
|
||||
if desc:
|
||||
desc_str = str(desc).strip()
|
||||
if desc_str:
|
||||
new_tags.append(f"description:{desc_str[:200]}")
|
||||
|
||||
page_count = book_data.get("number_of_pages")
|
||||
if isinstance(page_count, int) and page_count > 0:
|
||||
new_tags.append(f"pages:{page_count}")
|
||||
|
||||
identifiers = book_data.get("identifiers")
|
||||
if isinstance(identifiers, dict):
|
||||
|
||||
def _first(value: Any) -> Any:
|
||||
if isinstance(value, list) and value:
|
||||
return value[0]
|
||||
return value
|
||||
|
||||
for key, ns in (
|
||||
("openlibrary", "openlibrary"),
|
||||
("lccn", "lccn"),
|
||||
("oclc", "oclc"),
|
||||
("goodreads", "goodreads"),
|
||||
("librarything", "librarything"),
|
||||
("doi", "doi"),
|
||||
("internet_archive", "internet_archive"),
|
||||
):
|
||||
val = _first(identifiers.get(key))
|
||||
if val:
|
||||
new_tags.append(f"{ns}:{val}")
|
||||
|
||||
debug(f"Found {len(new_tags)} tag(s) from ISBN lookup")
|
||||
return new_tags
|
||||
|
||||
|
||||
def scrape_openlibrary_metadata(olid: str) -> List[str]:
|
||||
"""Scrape metadata tags for an OpenLibrary ID using the edition JSON endpoint."""
|
||||
|
||||
new_tags: List[str] = []
|
||||
|
||||
olid_text = str(olid or "").strip()
|
||||
if not olid_text:
|
||||
return []
|
||||
|
||||
olid_norm = olid_text
|
||||
try:
|
||||
if not olid_norm.startswith("OL"):
|
||||
olid_norm = f"OL{olid_norm}"
|
||||
if not olid_norm.endswith("M"):
|
||||
olid_norm = f"{olid_norm}M"
|
||||
except Exception:
|
||||
olid_norm = olid_text
|
||||
|
||||
new_tags.append(f"openlibrary:{olid_norm}")
|
||||
|
||||
olid_clean = olid_text.replace("OL", "").replace("M", "")
|
||||
if not olid_clean.isdigit():
|
||||
olid_clean = olid_text
|
||||
|
||||
if not olid_text.startswith("OL"):
|
||||
url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
|
||||
else:
|
||||
url = f"https://openlibrary.org/books/{olid_text}.json"
|
||||
|
||||
try:
|
||||
with HTTPClient() as client:
|
||||
response = client.get(url)
|
||||
response.raise_for_status()
|
||||
data = json.loads(response.content.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if not isinstance(data, dict) or not data:
|
||||
log(f"No OpenLibrary metadata found for: {olid_text}")
|
||||
return []
|
||||
|
||||
if "title" in data:
|
||||
new_tags.append(f"title:{data['title']}")
|
||||
|
||||
authors = data.get("authors")
|
||||
if isinstance(authors, list):
|
||||
for author in authors[:3]:
|
||||
if isinstance(author, dict) and author.get("name"):
|
||||
new_tags.append(f"author:{author['name']}")
|
||||
continue
|
||||
|
||||
author_key = None
|
||||
if isinstance(author, dict):
|
||||
if isinstance(author.get("author"), dict):
|
||||
author_key = author.get("author", {}).get("key")
|
||||
if not author_key:
|
||||
author_key = author.get("key")
|
||||
|
||||
if isinstance(author_key, str) and author_key.startswith("/"):
|
||||
try:
|
||||
author_url = f"https://openlibrary.org{author_key}.json"
|
||||
with HTTPClient(timeout=10) as client:
|
||||
author_resp = client.get(author_url)
|
||||
author_resp.raise_for_status()
|
||||
author_data = json.loads(author_resp.content.decode("utf-8"))
|
||||
if isinstance(author_data, dict) and author_data.get("name"):
|
||||
new_tags.append(f"author:{author_data['name']}")
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if isinstance(author, str) and author:
|
||||
new_tags.append(f"author:{author}")
|
||||
|
||||
if data.get("publish_date"):
|
||||
new_tags.append(f"publish_date:{data['publish_date']}")
|
||||
|
||||
publishers = data.get("publishers")
|
||||
if isinstance(publishers, list) and publishers:
|
||||
pub = publishers[0]
|
||||
if isinstance(pub, dict) and pub.get("name"):
|
||||
new_tags.append(f"publisher:{pub['name']}")
|
||||
elif isinstance(pub, str) and pub:
|
||||
new_tags.append(f"publisher:{pub}")
|
||||
|
||||
if "description" in data:
|
||||
desc = data.get("description")
|
||||
if isinstance(desc, dict) and "value" in desc:
|
||||
desc = desc.get("value")
|
||||
if desc:
|
||||
desc_str = str(desc).strip()
|
||||
if desc_str:
|
||||
new_tags.append(f"description:{desc_str[:200]}")
|
||||
|
||||
page_count = data.get("number_of_pages")
|
||||
if isinstance(page_count, int) and page_count > 0:
|
||||
new_tags.append(f"pages:{page_count}")
|
||||
|
||||
subjects = data.get("subjects")
|
||||
if isinstance(subjects, list):
|
||||
for subject in subjects[:10]:
|
||||
if isinstance(subject, str):
|
||||
subject_clean = subject.strip()
|
||||
if subject_clean and subject_clean not in new_tags:
|
||||
new_tags.append(subject_clean)
|
||||
|
||||
identifiers = data.get("identifiers")
|
||||
if isinstance(identifiers, dict):
|
||||
|
||||
def _first(value: Any) -> Any:
|
||||
if isinstance(value, list) and value:
|
||||
return value[0]
|
||||
return value
|
||||
|
||||
for key, ns in (
|
||||
("isbn_10", "isbn_10"),
|
||||
("isbn_13", "isbn_13"),
|
||||
("lccn", "lccn"),
|
||||
("oclc_numbers", "oclc"),
|
||||
("goodreads", "goodreads"),
|
||||
("internet_archive", "internet_archive"),
|
||||
):
|
||||
val = _first(identifiers.get(key))
|
||||
if val:
|
||||
new_tags.append(f"{ns}:{val}")
|
||||
|
||||
ocaid = data.get("ocaid")
|
||||
if isinstance(ocaid, str) and ocaid.strip():
|
||||
new_tags.append(f"internet_archive:{ocaid.strip()}")
|
||||
|
||||
debug(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
|
||||
return new_tags
|
||||
|
||||
|
||||
SAMPLE_ITEMS: List[Dict[str, Any]] = [
|
||||
{
|
||||
"title": "Sample OpenLibrary book",
|
||||
"path": "https://openlibrary.org/books/OL123M",
|
||||
"openlibrary_id": "OL123M",
|
||||
"archive_id": "samplearchive123",
|
||||
"availability": "borrow",
|
||||
"availability_reason": "sample",
|
||||
"direct_url": "https://archive.org/download/sample.pdf",
|
||||
"author_name": ["OpenLibrary Demo"],
|
||||
"first_publish_year": 2023,
|
||||
"ia": ["samplearchive123"],
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
try:
|
||||
from typing import Iterable
|
||||
|
||||
from SYS.result_table_api import ColumnSpec, ResultModel, metadata_column, title_column
|
||||
from SYS.result_table_adapters import register_provider
|
||||
|
||||
def _ensure_search_result(item: Any) -> SearchResult:
|
||||
if isinstance(item, SearchResult):
|
||||
return item
|
||||
if isinstance(item, dict):
|
||||
data = dict(item)
|
||||
title = str(data.get("title") or data.get("name") or "OpenLibrary")
|
||||
path = str(data.get("path") or data.get("url") or "")
|
||||
detail = str(data.get("detail") or "")
|
||||
annotations = list(data.get("annotations") or [])
|
||||
media_kind = str(data.get("media_kind") or "book")
|
||||
return SearchResult(
|
||||
table="openlibrary",
|
||||
title=title,
|
||||
path=path,
|
||||
detail=detail,
|
||||
annotations=annotations,
|
||||
media_kind=media_kind,
|
||||
columns=data.get("columns") or [],
|
||||
full_metadata={**data, "raw": dict(item)},
|
||||
)
|
||||
return SearchResult(
|
||||
table="openlibrary",
|
||||
title=str(item or "OpenLibrary"),
|
||||
path="",
|
||||
detail="",
|
||||
annotations=[],
|
||||
media_kind="book",
|
||||
full_metadata={"raw": {}},
|
||||
)
|
||||
|
||||
def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
|
||||
for item in items:
|
||||
sr = _ensure_search_result(item)
|
||||
metadata = dict(getattr(sr, "full_metadata", {}) or {})
|
||||
raw = metadata.get("raw")
|
||||
if isinstance(raw, dict):
|
||||
normalized = normalize_record(raw)
|
||||
for key, val in normalized.items():
|
||||
metadata.setdefault(key, val)
|
||||
|
||||
def _make_url() -> str:
|
||||
candidate = (
|
||||
metadata.get("selection_url") or
|
||||
metadata.get("direct_url") or
|
||||
metadata.get("url") or
|
||||
metadata.get("path") or
|
||||
sr.path or
|
||||
""
|
||||
)
|
||||
return str(candidate or "").strip()
|
||||
|
||||
selection_url = _make_url()
|
||||
if selection_url:
|
||||
metadata["selection_url"] = selection_url
|
||||
authors_value = metadata.get("authors_display") or metadata.get("authors") or metadata.get("author_name") or ""
|
||||
if isinstance(authors_value, list):
|
||||
authors_value = ", ".join(str(v) for v in authors_value if v)
|
||||
authors_text = str(authors_value or "").strip()
|
||||
if authors_text:
|
||||
metadata["authors_display"] = authors_text
|
||||
year_value = metadata.get("year") or metadata.get("first_publish_year")
|
||||
if year_value and not isinstance(year_value, str):
|
||||
year_value = str(year_value)
|
||||
if year_value:
|
||||
metadata["year"] = str(year_value)
|
||||
metadata.setdefault("openlibrary_id", metadata.get("openlibrary_id") or metadata.get("olid"))
|
||||
metadata.setdefault("source", metadata.get("source") or "openlibrary")
|
||||
yield ResultModel(
|
||||
title=str(sr.title or metadata.get("title") or selection_url or "OpenLibrary"),
|
||||
path=selection_url or None,
|
||||
metadata=metadata,
|
||||
source="openlibrary",
|
||||
)
|
||||
|
||||
def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
|
||||
cols: List[ColumnSpec] = [title_column()]
|
||||
def _has(key: str) -> bool:
|
||||
return any((row.metadata or {}).get(key) for row in rows)
|
||||
|
||||
if _has("authors_display"):
|
||||
cols.append(
|
||||
ColumnSpec(
|
||||
"authors_display",
|
||||
"Author",
|
||||
lambda r: (r.metadata or {}).get("authors_display") or "",
|
||||
)
|
||||
)
|
||||
if _has("year"):
|
||||
cols.append(metadata_column("year", "Year"))
|
||||
if _has("availability"):
|
||||
cols.append(metadata_column("availability", "Avail"))
|
||||
if _has("archive_id"):
|
||||
cols.append(metadata_column("archive_id", "Archive ID"))
|
||||
if _has("openlibrary_id"):
|
||||
cols.append(metadata_column("openlibrary_id", "OLID"))
|
||||
return cols
|
||||
|
||||
def _selection_fn(row: ResultModel) -> List[str]:
|
||||
metadata = row.metadata or {}
|
||||
url = str(metadata.get("selection_url") or row.path or "").strip()
|
||||
if url:
|
||||
return ["-url", url]
|
||||
return ["-title", row.title or ""]
|
||||
|
||||
register_provider(
|
||||
"openlibrary",
|
||||
_adapter,
|
||||
columns=_columns_factory,
|
||||
selection_fn=_selection_fn,
|
||||
metadata={"description": "OpenLibrary search provider (JSON result table template)"},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# Registry ---------------------------------------------------------------
|
||||
|
||||
_METADATA_PROVIDERS: Dict[str,
|
||||
|
||||
@@ -11,18 +11,29 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
from API.HTTP import HTTPClient
|
||||
from API.HTTP import HTTPClient, get_requests_verify_value
|
||||
from ProviderCore.base import Provider, SearchResult
|
||||
from SYS.utils import sanitize_filename
|
||||
from SYS.cli_syntax import get_field, get_free_text, parse_query
|
||||
from SYS.logger import debug, log
|
||||
from Provider.metadata_provider import (
|
||||
archive_item_metadata_to_tags,
|
||||
fetch_archive_item_metadata,
|
||||
)
|
||||
from SYS.utils import unique_path
|
||||
|
||||
_ARCHIVE_VERIFY_VALUE = get_requests_verify_value()
|
||||
|
||||
def _create_archive_session() -> requests.Session:
|
||||
session = requests.Session()
|
||||
session.verify = _ARCHIVE_VERIFY_VALUE
|
||||
return session
|
||||
|
||||
try:
|
||||
from Crypto.Cipher import AES # type: ignore
|
||||
from Crypto.Util import Counter # type: ignore
|
||||
@@ -262,182 +273,6 @@ def title_hint_from_url_slug(u: str) -> str:
|
||||
return slug or "OpenLibrary"
|
||||
|
||||
|
||||
def _coerce_archive_field_list(value: Any) -> List[str]:
|
||||
"""Coerce an Archive.org metadata field to a list of strings."""
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, list):
|
||||
out: List[str] = []
|
||||
for v in value:
|
||||
try:
|
||||
s = str(v).strip()
|
||||
except Exception:
|
||||
continue
|
||||
if s:
|
||||
out.append(s)
|
||||
return out
|
||||
if isinstance(value, (tuple, set)):
|
||||
out = []
|
||||
for v in value:
|
||||
try:
|
||||
s = str(v).strip()
|
||||
except Exception:
|
||||
continue
|
||||
if s:
|
||||
out.append(s)
|
||||
return out
|
||||
try:
|
||||
s = str(value).strip()
|
||||
except Exception:
|
||||
return []
|
||||
return [s] if s else []
|
||||
|
||||
|
||||
def _archive_item_metadata_to_tags(archive_id: str,
|
||||
item_metadata: Dict[str,
|
||||
Any]) -> List[str]:
|
||||
"""Map Archive.org metadata JSON (the `metadata` object) to tag strings.
|
||||
|
||||
This is intentionally best-effort and conservative: it focuses on stable,
|
||||
useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
|
||||
"""
|
||||
archive_id_clean = str(archive_id or "").strip()
|
||||
meta = item_metadata if isinstance(item_metadata,
|
||||
dict) else {}
|
||||
|
||||
tags: List[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add(tag: str) -> None:
|
||||
try:
|
||||
t = str(tag).strip()
|
||||
except Exception:
|
||||
return
|
||||
if not t:
|
||||
return
|
||||
if t.lower() in seen:
|
||||
return
|
||||
seen.add(t.lower())
|
||||
tags.append(t)
|
||||
|
||||
if archive_id_clean:
|
||||
_add(f"internet_archive:{archive_id_clean}")
|
||||
|
||||
# Title
|
||||
for title in _coerce_archive_field_list(meta.get("title"))[:1]:
|
||||
_add(f"title:{title}")
|
||||
|
||||
# Authors/creators
|
||||
creators: List[str] = []
|
||||
creators.extend(_coerce_archive_field_list(meta.get("creator")))
|
||||
creators.extend(_coerce_archive_field_list(meta.get("author")))
|
||||
for creator in creators[:3]:
|
||||
_add(f"author:{creator}")
|
||||
|
||||
# Publisher
|
||||
for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
|
||||
_add(f"publisher:{publisher}")
|
||||
|
||||
# Publish date/year
|
||||
for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
|
||||
_add(f"publish_date:{date_val}")
|
||||
for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
|
||||
_add(f"publish_date:{year_val}")
|
||||
|
||||
# Language
|
||||
for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
|
||||
_add(f"language:{lang}")
|
||||
|
||||
# Topics/subjects: follow existing OpenLibrary behavior (un-namespaced tags)
|
||||
for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
|
||||
if len(subj) > 200:
|
||||
subj = subj[:200]
|
||||
_add(subj)
|
||||
|
||||
# ISBNs and identifiers
|
||||
def _clean_isbn(raw: str) -> str:
|
||||
return str(raw or "").replace("-", "").strip()
|
||||
|
||||
for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
|
||||
isbn_clean = _clean_isbn(isbn)
|
||||
if isbn_clean:
|
||||
_add(f"isbn:{isbn_clean}")
|
||||
|
||||
identifiers: List[str] = []
|
||||
identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
|
||||
identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
|
||||
added_other = 0
|
||||
for ident in identifiers:
|
||||
ident_s = str(ident or "").strip()
|
||||
if not ident_s:
|
||||
continue
|
||||
low = ident_s.lower()
|
||||
|
||||
if low.startswith("urn:isbn:"):
|
||||
val = _clean_isbn(ident_s.split(":", 2)[-1])
|
||||
if val:
|
||||
_add(f"isbn:{val}")
|
||||
continue
|
||||
if low.startswith("isbn:"):
|
||||
val = _clean_isbn(ident_s.split(":", 1)[-1])
|
||||
if val:
|
||||
_add(f"isbn:{val}")
|
||||
continue
|
||||
if low.startswith("urn:oclc:"):
|
||||
val = ident_s.split(":", 2)[-1].strip()
|
||||
if val:
|
||||
_add(f"oclc:{val}")
|
||||
continue
|
||||
if low.startswith("oclc:"):
|
||||
val = ident_s.split(":", 1)[-1].strip()
|
||||
if val:
|
||||
_add(f"oclc:{val}")
|
||||
continue
|
||||
if low.startswith("urn:lccn:"):
|
||||
val = ident_s.split(":", 2)[-1].strip()
|
||||
if val:
|
||||
_add(f"lccn:{val}")
|
||||
continue
|
||||
if low.startswith("lccn:"):
|
||||
val = ident_s.split(":", 1)[-1].strip()
|
||||
if val:
|
||||
_add(f"lccn:{val}")
|
||||
continue
|
||||
if low.startswith("doi:"):
|
||||
val = ident_s.split(":", 1)[-1].strip()
|
||||
if val:
|
||||
_add(f"doi:{val}")
|
||||
continue
|
||||
|
||||
if archive_id_clean and low == archive_id_clean.lower():
|
||||
continue
|
||||
if added_other >= 5:
|
||||
continue
|
||||
if len(ident_s) > 200:
|
||||
ident_s = ident_s[:200]
|
||||
_add(f"identifier:{ident_s}")
|
||||
added_other += 1
|
||||
|
||||
return tags
|
||||
|
||||
|
||||
def _fetch_archive_item_metadata(archive_id: str,
|
||||
*,
|
||||
timeout: int = 8) -> Dict[str,
|
||||
Any]:
|
||||
ident = str(archive_id or "").strip()
|
||||
if not ident:
|
||||
return {}
|
||||
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
|
||||
resp.raise_for_status()
|
||||
data = resp.json() if resp is not None else {}
|
||||
if not isinstance(data, dict):
|
||||
return {}
|
||||
meta = data.get("metadata")
|
||||
return meta if isinstance(meta,
|
||||
dict) else {}
|
||||
|
||||
|
||||
class OpenLibrary(Provider):
|
||||
|
||||
TABLE_AUTO_STAGES = {
|
||||
@@ -466,7 +301,7 @@ class OpenLibrary(Provider):
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self._session = requests.Session()
|
||||
self._session = _create_archive_session()
|
||||
|
||||
class BookNotAvailableError(Exception):
|
||||
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
||||
@@ -612,7 +447,7 @@ class OpenLibrary(Provider):
|
||||
@classmethod
|
||||
def _archive_login(cls, email: str, password: str) -> requests.Session:
|
||||
"""Login to archive.org using the token-based services endpoint (matches test-login.py)."""
|
||||
session = requests.Session()
|
||||
session = _create_archive_session()
|
||||
|
||||
token_resp = session.get(
|
||||
"https://archive.org/services/account/login/",
|
||||
@@ -766,7 +601,11 @@ class OpenLibrary(Provider):
|
||||
if not ident:
|
||||
return False, "no-archive-id"
|
||||
try:
|
||||
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
|
||||
resp = requests.get(
|
||||
f"https://archive.org/metadata/{ident}",
|
||||
timeout=8,
|
||||
verify=_ARCHIVE_VERIFY_VALUE,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() if resp is not None else {}
|
||||
meta = data.get("metadata",
|
||||
@@ -976,7 +815,11 @@ class OpenLibrary(Provider):
|
||||
"""Check for a directly downloadable original PDF in Archive.org metadata."""
|
||||
try:
|
||||
metadata_url = f"https://archive.org/metadata/{book_id}"
|
||||
response = requests.get(metadata_url, timeout=6)
|
||||
response = requests.get(
|
||||
metadata_url,
|
||||
timeout=6,
|
||||
verify=_ARCHIVE_VERIFY_VALUE,
|
||||
)
|
||||
response.raise_for_status()
|
||||
metadata = response.json()
|
||||
files = metadata.get("files") if isinstance(metadata, dict) else None
|
||||
@@ -993,7 +836,8 @@ class OpenLibrary(Provider):
|
||||
check_response = requests.head(
|
||||
pdf_url,
|
||||
timeout=4,
|
||||
allow_redirects=True
|
||||
allow_redirects=True,
|
||||
verify=_ARCHIVE_VERIFY_VALUE,
|
||||
)
|
||||
if check_response.status_code == 200:
|
||||
return True, pdf_url
|
||||
@@ -1001,235 +845,6 @@ class OpenLibrary(Provider):
|
||||
except Exception:
|
||||
return False, ""
|
||||
|
||||
@staticmethod
|
||||
def scrape_isbn_metadata(isbn: str) -> List[str]:
|
||||
"""Scrape tags for an ISBN using Open Library API.
|
||||
|
||||
Returns tags such as:
|
||||
- title:<...>, author:<...>, publish_date:<...>, publisher:<...>, description:<...>, pages:<...>
|
||||
- identifiers: openlibrary:<...>, lccn:<...>, oclc:<...>, goodreads:<...>, librarything:<...>, doi:<...>, internet_archive:<...>
|
||||
"""
|
||||
new_tags: List[str] = []
|
||||
|
||||
isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
|
||||
if not isbn_clean:
|
||||
return []
|
||||
|
||||
url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
|
||||
try:
|
||||
with HTTPClient() as client:
|
||||
response = client.get(url)
|
||||
response.raise_for_status()
|
||||
data = json_module.loads(response.content.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if not data:
|
||||
log(f"No ISBN metadata found for: {isbn}")
|
||||
return []
|
||||
|
||||
book_data = next(iter(data.values()), None)
|
||||
if not isinstance(book_data, dict):
|
||||
return []
|
||||
|
||||
if "title" in book_data:
|
||||
new_tags.append(f"title:{book_data['title']}")
|
||||
|
||||
authors = book_data.get("authors")
|
||||
if isinstance(authors, list):
|
||||
for author in authors[:3]:
|
||||
if isinstance(author, dict) and author.get("name"):
|
||||
new_tags.append(f"author:{author['name']}")
|
||||
|
||||
if book_data.get("publish_date"):
|
||||
new_tags.append(f"publish_date:{book_data['publish_date']}")
|
||||
|
||||
publishers = book_data.get("publishers")
|
||||
if isinstance(publishers, list) and publishers:
|
||||
pub = publishers[0]
|
||||
if isinstance(pub, dict) and pub.get("name"):
|
||||
new_tags.append(f"publisher:{pub['name']}")
|
||||
|
||||
if "description" in book_data:
|
||||
desc = book_data.get("description")
|
||||
if isinstance(desc, dict) and "value" in desc:
|
||||
desc = desc.get("value")
|
||||
if desc:
|
||||
desc_str = str(desc).strip()
|
||||
if desc_str:
|
||||
new_tags.append(f"description:{desc_str[:200]}")
|
||||
|
||||
page_count = book_data.get("number_of_pages")
|
||||
if isinstance(page_count, int) and page_count > 0:
|
||||
new_tags.append(f"pages:{page_count}")
|
||||
|
||||
identifiers = book_data.get("identifiers")
|
||||
if isinstance(identifiers, dict):
|
||||
|
||||
def _first(value: Any) -> Any:
|
||||
if isinstance(value, list) and value:
|
||||
return value[0]
|
||||
return value
|
||||
|
||||
for key, ns in (
|
||||
("openlibrary", "openlibrary"),
|
||||
("lccn", "lccn"),
|
||||
("oclc", "oclc"),
|
||||
("goodreads", "goodreads"),
|
||||
("librarything", "librarything"),
|
||||
("doi", "doi"),
|
||||
("internet_archive", "internet_archive"),
|
||||
):
|
||||
val = _first(identifiers.get(key))
|
||||
if val:
|
||||
new_tags.append(f"{ns}:{val}")
|
||||
|
||||
debug(f"Found {len(new_tags)} tag(s) from ISBN lookup")
|
||||
return new_tags
|
||||
|
||||
@staticmethod
|
||||
def scrape_openlibrary_metadata(olid: str) -> List[str]:
|
||||
"""Scrape tags for an OpenLibrary ID using the .json API endpoint."""
|
||||
new_tags: List[str] = []
|
||||
|
||||
olid_text = str(olid or "").strip()
|
||||
if not olid_text:
|
||||
return []
|
||||
|
||||
# Normalize OLID to the common "OL<digits>M" form when possible.
|
||||
olid_norm = olid_text
|
||||
try:
|
||||
if not olid_norm.startswith("OL"):
|
||||
olid_norm = f"OL{olid_norm}"
|
||||
if not olid_norm.endswith("M"):
|
||||
olid_norm = f"{olid_norm}M"
|
||||
except Exception:
|
||||
olid_norm = olid_text
|
||||
|
||||
# Ensure we always include a scrapeable identifier tag.
|
||||
new_tags.append(f"openlibrary:{olid_norm}")
|
||||
|
||||
# Accept OL9674499M, 9674499M, or just digits.
|
||||
olid_clean = olid_text.replace("OL", "").replace("M", "")
|
||||
if not olid_clean.isdigit():
|
||||
olid_clean = olid_text
|
||||
|
||||
if not olid_text.startswith("OL"):
|
||||
url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
|
||||
else:
|
||||
url = f"https://openlibrary.org/books/{olid_text}.json"
|
||||
|
||||
try:
|
||||
with HTTPClient() as client:
|
||||
response = client.get(url)
|
||||
response.raise_for_status()
|
||||
data = json_module.loads(response.content.decode("utf-8"))
|
||||
except Exception as exc:
|
||||
log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
if not isinstance(data, dict) or not data:
|
||||
log(f"No OpenLibrary metadata found for: {olid_text}")
|
||||
return []
|
||||
|
||||
if "title" in data:
|
||||
new_tags.append(f"title:{data['title']}")
|
||||
|
||||
authors = data.get("authors")
|
||||
if isinstance(authors, list):
|
||||
for author in authors[:3]:
|
||||
if isinstance(author, dict) and author.get("name"):
|
||||
new_tags.append(f"author:{author['name']}")
|
||||
continue
|
||||
|
||||
# Common OL shape: {"key": "/authors/OL...A"} or {"author": {"key": ...}}
|
||||
author_key = None
|
||||
if isinstance(author, dict):
|
||||
if isinstance(author.get("author"), dict):
|
||||
author_key = author.get("author",
|
||||
{}).get("key")
|
||||
if not author_key:
|
||||
author_key = author.get("key")
|
||||
|
||||
if isinstance(author_key, str) and author_key.startswith("/"):
|
||||
try:
|
||||
author_url = f"https://openlibrary.org{author_key}.json"
|
||||
with HTTPClient(timeout=10) as client:
|
||||
author_resp = client.get(author_url)
|
||||
author_resp.raise_for_status()
|
||||
author_data = json_module.loads(
|
||||
author_resp.content.decode("utf-8")
|
||||
)
|
||||
if isinstance(author_data, dict) and author_data.get("name"):
|
||||
new_tags.append(f"author:{author_data['name']}")
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if isinstance(author, str) and author:
|
||||
new_tags.append(f"author:{author}")
|
||||
|
||||
if data.get("publish_date"):
|
||||
new_tags.append(f"publish_date:{data['publish_date']}")
|
||||
|
||||
publishers = data.get("publishers")
|
||||
if isinstance(publishers, list) and publishers:
|
||||
pub = publishers[0]
|
||||
if isinstance(pub, dict) and pub.get("name"):
|
||||
new_tags.append(f"publisher:{pub['name']}")
|
||||
elif isinstance(pub, str) and pub:
|
||||
new_tags.append(f"publisher:{pub}")
|
||||
|
||||
if "description" in data:
|
||||
desc = data.get("description")
|
||||
if isinstance(desc, dict) and "value" in desc:
|
||||
desc = desc.get("value")
|
||||
if desc:
|
||||
desc_str = str(desc).strip()
|
||||
if desc_str:
|
||||
new_tags.append(f"description:{desc_str[:200]}")
|
||||
|
||||
page_count = data.get("number_of_pages")
|
||||
if isinstance(page_count, int) and page_count > 0:
|
||||
new_tags.append(f"pages:{page_count}")
|
||||
|
||||
subjects = data.get("subjects")
|
||||
if isinstance(subjects, list):
|
||||
for subject in subjects[:10]:
|
||||
if isinstance(subject, str):
|
||||
subject_clean = subject.strip()
|
||||
if subject_clean and subject_clean not in new_tags:
|
||||
new_tags.append(subject_clean)
|
||||
|
||||
identifiers = data.get("identifiers")
|
||||
if isinstance(identifiers, dict):
|
||||
|
||||
def _first(value: Any) -> Any:
|
||||
if isinstance(value, list) and value:
|
||||
return value[0]
|
||||
return value
|
||||
|
||||
for key, ns in (
|
||||
("isbn_10", "isbn_10"),
|
||||
("isbn_13", "isbn_13"),
|
||||
("lccn", "lccn"),
|
||||
("oclc_numbers", "oclc"),
|
||||
("goodreads", "goodreads"),
|
||||
("internet_archive", "internet_archive"),
|
||||
):
|
||||
val = _first(identifiers.get(key))
|
||||
if val:
|
||||
new_tags.append(f"{ns}:{val}")
|
||||
|
||||
# Some editions expose a direct Archive.org identifier as "ocaid".
|
||||
ocaid = data.get("ocaid")
|
||||
if isinstance(ocaid, str) and ocaid.strip():
|
||||
new_tags.append(f"internet_archive:{ocaid.strip()}")
|
||||
|
||||
debug(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
|
||||
return new_tags
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
@@ -1293,7 +908,7 @@ class OpenLibrary(Provider):
|
||||
ia_val_local = []
|
||||
ia_ids_local = [str(x) for x in ia_val_local if x]
|
||||
|
||||
session_local = requests.Session()
|
||||
session_local = _create_archive_session()
|
||||
|
||||
try:
|
||||
archive_id_local = _resolve_archive_id(
|
||||
@@ -1423,27 +1038,15 @@ class OpenLibrary(Provider):
|
||||
"borrow"}:
|
||||
annotations.append(availability)
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="openlibrary",
|
||||
title=book_title,
|
||||
path=(
|
||||
book_path = (
|
||||
f"https://openlibrary.org/books/{edition_id}" if edition_id else
|
||||
(
|
||||
f"https://openlibrary.org{work_key}"
|
||||
if isinstance(work_key,
|
||||
str) and work_key.startswith("/") else
|
||||
if isinstance(work_key, str) and work_key.startswith("/") else
|
||||
"https://openlibrary.org"
|
||||
)
|
||||
),
|
||||
detail=(
|
||||
(f"By: {', '.join(authors_list)}" if authors_list else "") +
|
||||
(f" ({year})" if year else "")
|
||||
).strip(),
|
||||
annotations=annotations,
|
||||
media_kind="book",
|
||||
columns=columns,
|
||||
full_metadata={
|
||||
)
|
||||
metadata = {
|
||||
"openlibrary_id": edition_id,
|
||||
"openlibrary_key": work_key,
|
||||
"authors": authors_list,
|
||||
@@ -1456,7 +1059,25 @@ class OpenLibrary(Provider):
|
||||
"archive_id": archive_id,
|
||||
"direct_url": direct_url,
|
||||
"raw": doc,
|
||||
},
|
||||
}
|
||||
if book_path:
|
||||
metadata["selection_url"] = book_path
|
||||
metadata["_selection_args"] = ["-url", book_path]
|
||||
metadata["_selection_action"] = ["download-file", "-url", book_path]
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="openlibrary",
|
||||
title=book_title,
|
||||
path=book_path,
|
||||
detail=(
|
||||
(f"By: {', '.join(authors_list)}" if authors_list else "") +
|
||||
(f" ({year})" if year else "")
|
||||
).strip(),
|
||||
annotations=annotations,
|
||||
media_kind="book",
|
||||
columns=columns,
|
||||
full_metadata=metadata,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -1507,8 +1128,8 @@ class OpenLibrary(Provider):
|
||||
|
||||
# Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
|
||||
try:
|
||||
archive_meta = _fetch_archive_item_metadata(archive_id)
|
||||
tags = _archive_item_metadata_to_tags(archive_id, archive_meta)
|
||||
archive_meta = fetch_archive_item_metadata(archive_id)
|
||||
tags = archive_item_metadata_to_tags(archive_id, archive_meta)
|
||||
if tags:
|
||||
try:
|
||||
result.tag.update(tags)
|
||||
|
||||
110
SYS/json_table.py
Normal file
110
SYS/json_table.py
Normal file
@@ -0,0 +1,110 @@
|
||||
"""Helper utilities for normalizing JSON result tables.
|
||||
|
||||
This mirrors the intent of the existing `SYS.html_table` helper but operates on
|
||||
JSON payloads (API responses, JSON APIs, etc.). It exposes:
|
||||
|
||||
- `extract_records` for locating and normalizing the first list of record dicts
|
||||
from a JSON document.
|
||||
- `normalize_record` for coercing arbitrary values into printable strings.
|
||||
|
||||
These helpers make it easy for providers that consume JSON to populate
|
||||
`ResultModel` metadata without hand-writing ad-hoc sanitizers.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
|
||||
_DEFAULT_LIST_KEYS: Tuple[str, ...] = ("results", "items", "docs", "records")
|
||||
|
||||
|
||||
def _coerce_value(value: Any) -> str:
|
||||
"""Convert a JSON value into a compact string representation."""
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, bool):
|
||||
return "true" if value else "false"
|
||||
if isinstance(value, (list, tuple, set)):
|
||||
parts = [_coerce_value(v) for v in value]
|
||||
cleaned = [part for part in parts if part]
|
||||
return ", ".join(cleaned)
|
||||
if isinstance(value, dict):
|
||||
parts: List[str] = []
|
||||
for subkey, subvalue in value.items():
|
||||
part = _coerce_value(subvalue)
|
||||
if part:
|
||||
parts.append(f"{subkey}:{part}")
|
||||
return ", ".join(parts)
|
||||
try:
|
||||
return str(value).strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def normalize_record(record: Dict[str, Any]) -> Dict[str, str]:
|
||||
"""Return a copy of ``record`` with keys lowered and values coerced to strings."""
|
||||
out: Dict[str, str] = {}
|
||||
if not isinstance(record, dict):
|
||||
return out
|
||||
for key, value in record.items():
|
||||
normalized_key = str(key or "").strip().lower()
|
||||
if not normalized_key:
|
||||
continue
|
||||
normalized_value = _coerce_value(value)
|
||||
if normalized_value:
|
||||
out[normalized_key] = normalized_value
|
||||
return out
|
||||
|
||||
|
||||
def _traverse(data: Any, path: Sequence[str]) -> Optional[Any]:
|
||||
current = data
|
||||
for key in path:
|
||||
if not isinstance(current, dict):
|
||||
return None
|
||||
current = current.get(key)
|
||||
return current
|
||||
|
||||
|
||||
def extract_records(
|
||||
data: Any,
|
||||
*,
|
||||
path: Optional[Sequence[str]] = None,
|
||||
list_keys: Optional[Sequence[str]] = None,
|
||||
) -> Tuple[List[Dict[str, str]], Optional[str]]:
|
||||
"""Extract normalized record dicts from ``data``.
|
||||
|
||||
Args:
|
||||
data: JSON document (dict/list) that may contain tabular records.
|
||||
path: optional key path to traverse before looking for a list.
|
||||
list_keys: candidate keys to inspect when ``path`` is not provided.
|
||||
|
||||
Returns:
|
||||
(records, chosen_path) where ``records`` is the list of normalized dicts
|
||||
and ``chosen_path`` is either the traversed path or the key that matched.
|
||||
"""
|
||||
list_keys = list_keys or _DEFAULT_LIST_KEYS
|
||||
chosen_path: Optional[str] = None
|
||||
candidates: List[Any] = []
|
||||
|
||||
if path:
|
||||
found = _traverse(data, path)
|
||||
if isinstance(found, list):
|
||||
candidates = found
|
||||
chosen_path = ".".join(path)
|
||||
|
||||
if not candidates and isinstance(data, dict):
|
||||
for key in list_keys:
|
||||
found = data.get(key)
|
||||
if isinstance(found, list):
|
||||
candidates = found
|
||||
chosen_path = key
|
||||
break
|
||||
|
||||
if not candidates and isinstance(data, list):
|
||||
candidates = data
|
||||
chosen_path = ""
|
||||
|
||||
records: List[Dict[str, str]] = []
|
||||
for entry in candidates:
|
||||
if isinstance(entry, dict):
|
||||
records.append(normalize_record(entry))
|
||||
return records, chosen_path
|
||||
@@ -783,13 +783,13 @@ class ResultTable:
|
||||
|
||||
def _add_search_result(self, row: ResultRow, result: Any) -> None:
|
||||
"""Extract and add SearchResult fields to row."""
|
||||
# If provider supplied explicit columns, render those and skip legacy defaults
|
||||
cols = getattr(result, "columns", None)
|
||||
used_explicit_columns = False
|
||||
if cols:
|
||||
used_explicit_columns = True
|
||||
for name, value in cols:
|
||||
row.add_column(name, value)
|
||||
return
|
||||
|
||||
else:
|
||||
# Core fields (legacy fallback)
|
||||
title = getattr(result, "title", "")
|
||||
table = str(getattr(result, "table", "") or "").lower()
|
||||
|
||||
@@ -61,15 +61,22 @@ class Provider:
|
||||
|
||||
def serialize_row(self, row: ResultModel) -> Dict[str, Any]:
|
||||
r = ensure_result_model(row)
|
||||
return {
|
||||
metadata = r.metadata or {}
|
||||
out: Dict[str, Any] = {
|
||||
"title": r.title,
|
||||
"path": r.path,
|
||||
"ext": r.ext,
|
||||
"size_bytes": r.size_bytes,
|
||||
"metadata": r.metadata or {},
|
||||
"metadata": metadata,
|
||||
"source": r.source or self.name,
|
||||
"_selection_args": self.selection_args(r),
|
||||
}
|
||||
selection_action = metadata.get("_selection_action") or metadata.get("selection_action")
|
||||
if selection_action:
|
||||
out["_selection_action"] = [
|
||||
str(x) for x in selection_action if x is not None
|
||||
]
|
||||
return out
|
||||
|
||||
def serialize_rows(self, rows: Iterable[ResultModel]) -> List[Dict[str, Any]]:
|
||||
return [self.serialize_row(r) for r in rows]
|
||||
|
||||
@@ -30,6 +30,7 @@ from SYS.result_table import ResultTable
|
||||
from SYS.rich_display import stderr_console as get_stderr_console
|
||||
from SYS import pipeline as pipeline_context
|
||||
from SYS.utils import sha256_file
|
||||
from SYS.metadata import normalize_urls as normalize_url_list
|
||||
from rich.prompt import Confirm
|
||||
|
||||
from tool.ytdlp import (
|
||||
@@ -125,243 +126,6 @@ class Download_File(Cmdlet):
|
||||
debug(f"[download-file] run invoked with args: {list(args)}")
|
||||
return self._run_impl(result, args, config)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
|
||||
raw_url = parsed.get("url", [])
|
||||
if isinstance(raw_url, str):
|
||||
raw_url = [raw_url]
|
||||
|
||||
expanded_urls: List[str] = []
|
||||
for u in raw_url or []:
|
||||
if u is None:
|
||||
continue
|
||||
s = str(u).strip()
|
||||
if not s:
|
||||
continue
|
||||
if "," in s:
|
||||
parts = [p.strip() for p in s.split(",")]
|
||||
expanded_urls.extend([p for p in parts if p])
|
||||
else:
|
||||
expanded_urls.append(s)
|
||||
|
||||
return expanded_urls
|
||||
|
||||
@staticmethod
|
||||
def _rewrite_archive_org_urls(raw_urls: Sequence[str]) -> List[str]:
|
||||
"""Rewrite Archive.org URLs using metadata JSON to pick the right flow.
|
||||
|
||||
- /metadata/<id>:
|
||||
- if lendable (collection contains inlibrary/printdisabled/lendinglibrary) -> /borrow/<id>
|
||||
- else -> /details/<id>
|
||||
- /details/<id>:
|
||||
- if lendable -> /borrow/<id>
|
||||
|
||||
This makes `download-file` do the right thing for borrow-only items.
|
||||
"""
|
||||
|
||||
out: List[str] = []
|
||||
for u in list(raw_urls or []):
|
||||
s = str(u or "").strip()
|
||||
if not s:
|
||||
continue
|
||||
|
||||
try:
|
||||
p = urlparse(s)
|
||||
host = (p.hostname or "").strip().lower()
|
||||
path = (p.path or "").strip()
|
||||
except Exception:
|
||||
out.append(s)
|
||||
continue
|
||||
|
||||
if not host or (host != "archive.org" and not host.endswith(".archive.org")):
|
||||
out.append(s)
|
||||
continue
|
||||
|
||||
low_path = path.lower().strip()
|
||||
if not (low_path.startswith("/metadata/") or low_path.startswith("/details/")):
|
||||
out.append(s)
|
||||
continue
|
||||
|
||||
parts = [x for x in path.split("/") if x]
|
||||
if len(parts) < 2:
|
||||
out.append(s)
|
||||
continue
|
||||
head = str(parts[0] or "").strip().lower()
|
||||
archive_id = str(parts[1] or "").strip()
|
||||
if head not in {"metadata", "details"} or not archive_id:
|
||||
out.append(s)
|
||||
continue
|
||||
|
||||
lendable = False
|
||||
try:
|
||||
meta_url = f"https://archive.org/metadata/{archive_id}"
|
||||
resp = requests.get(meta_url, timeout=8)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() if resp is not None else {}
|
||||
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
|
||||
collection = meta.get("collection") if isinstance(meta, dict) else None
|
||||
|
||||
values: List[str] = []
|
||||
if isinstance(collection, list):
|
||||
values = [str(x).strip().lower() for x in collection if str(x).strip()]
|
||||
elif isinstance(collection, str):
|
||||
values = [collection.strip().lower()] if collection.strip() else []
|
||||
|
||||
lendable = any(v in {"inlibrary", "lendinglibrary"} for v in values)
|
||||
except Exception:
|
||||
lendable = False
|
||||
|
||||
if lendable:
|
||||
debug(f"[download-file] archive.org item '{archive_id}' looks lendable; using borrow flow")
|
||||
out.append(f"https://archive.org/borrow/{archive_id}")
|
||||
continue
|
||||
|
||||
# Non-lendable: turn metadata URLs into details URLs so IA picker can show files.
|
||||
if head == "metadata":
|
||||
out.append(f"https://archive.org/details/{archive_id}")
|
||||
continue
|
||||
|
||||
out.append(s)
|
||||
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _collect_piped_items_if_no_urls(result: Any,
|
||||
raw_urls: Sequence[str]) -> List[Any]:
|
||||
if raw_urls:
|
||||
return []
|
||||
if isinstance(result, list):
|
||||
return list(result)
|
||||
if result:
|
||||
return [result]
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int:
|
||||
try:
|
||||
return int(len(raw_urls or []) + len(piped_items or []))
|
||||
except Exception:
|
||||
return 1
|
||||
|
||||
@staticmethod
|
||||
def _build_preview(
|
||||
raw_urls: Sequence[str],
|
||||
piped_items: Sequence[Any],
|
||||
total_items: int
|
||||
) -> List[Any]:
|
||||
try:
|
||||
preview: List[Any] = []
|
||||
preview.extend(list(raw_urls or [])[:max(0, total_items)])
|
||||
if len(preview) < total_items:
|
||||
preview.extend(
|
||||
list(piped_items or [])[:max(0,
|
||||
total_items - len(preview))]
|
||||
)
|
||||
return preview
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def _load_provider_registry() -> Dict[str, Any]:
|
||||
try:
|
||||
from ProviderCore.registry import (
|
||||
get_search_provider as _get_search_provider,
|
||||
get_provider as _get_provider,
|
||||
match_provider_name_for_url as _match_provider_name_for_url,
|
||||
SearchResult as _SearchResult,
|
||||
)
|
||||
|
||||
return {
|
||||
"get_search_provider": _get_search_provider,
|
||||
"get_provider": _get_provider,
|
||||
"match_provider_name_for_url": _match_provider_name_for_url,
|
||||
"SearchResult": _SearchResult,
|
||||
}
|
||||
|
||||
except Exception:
|
||||
return {
|
||||
"get_search_provider": None,
|
||||
"get_provider": None,
|
||||
"match_provider_name_for_url": None,
|
||||
"SearchResult": None,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _path_from_download_result(result_obj: Any) -> Path:
|
||||
file_path = None
|
||||
if hasattr(result_obj, "path"):
|
||||
file_path = getattr(result_obj, "path")
|
||||
elif isinstance(result_obj, dict):
|
||||
file_path = result_obj.get("path")
|
||||
if not file_path:
|
||||
file_path = str(result_obj)
|
||||
return Path(str(file_path))
|
||||
|
||||
def _emit_local_file(
|
||||
self,
|
||||
*,
|
||||
downloaded_path: Path,
|
||||
source: Optional[str],
|
||||
title_hint: Optional[str],
|
||||
tags_hint: Optional[List[str]],
|
||||
media_kind_hint: Optional[str],
|
||||
full_metadata: Optional[Dict[str,
|
||||
Any]],
|
||||
progress: PipelineProgress,
|
||||
config: Dict[str,
|
||||
Any],
|
||||
provider_hint: Optional[str] = None,
|
||||
) -> None:
|
||||
title_val = (title_hint or downloaded_path.stem
|
||||
or "Unknown").strip() or downloaded_path.stem
|
||||
hash_value = self._compute_file_hash(downloaded_path)
|
||||
notes: Optional[Dict[str, str]] = None
|
||||
try:
|
||||
if isinstance(full_metadata, dict):
|
||||
subtitles = full_metadata.get("_tidal_lyrics_subtitles")
|
||||
if isinstance(subtitles, str) and subtitles.strip():
|
||||
notes = {"lyric": subtitles}
|
||||
except Exception:
|
||||
notes = None
|
||||
tag: List[str] = []
|
||||
if tags_hint:
|
||||
tag.extend([str(t) for t in tags_hint if t])
|
||||
if not any(str(t).lower().startswith("title:") for t in tag):
|
||||
tag.insert(0, f"title:{title_val}")
|
||||
|
||||
payload: Dict[str,
|
||||
Any] = {
|
||||
"path": str(downloaded_path),
|
||||
"hash": hash_value,
|
||||
"title": title_val,
|
||||
"action": "cmdlet:download-file",
|
||||
"download_mode": "file",
|
||||
"store": "local",
|
||||
"media_kind": media_kind_hint or "file",
|
||||
"tag": tag,
|
||||
}
|
||||
if provider_hint:
|
||||
payload["provider"] = str(provider_hint)
|
||||
if full_metadata:
|
||||
payload["full_metadata"] = full_metadata
|
||||
if notes:
|
||||
payload["notes"] = notes
|
||||
if source and str(source).startswith("http"):
|
||||
payload["url"] = source
|
||||
elif source:
|
||||
payload["source_url"] = source
|
||||
|
||||
pipeline_context.emit(payload)
|
||||
|
||||
# When running with a local progress UI (standalone cmdlet), ensure
|
||||
# the pipe advances on emit.
|
||||
progress.on_emit(payload)
|
||||
|
||||
# Automatically register url with local library
|
||||
if payload.get("url"):
|
||||
pipe_obj = coerce_to_pipe_object(payload)
|
||||
register_url_with_local_library(pipe_obj, config)
|
||||
|
||||
def _process_explicit_urls(
|
||||
self,
|
||||
*,
|
||||
@@ -373,6 +137,7 @@ class Download_File(Cmdlet):
|
||||
registry: Dict[str,
|
||||
Any],
|
||||
progress: PipelineProgress,
|
||||
context_items: Sequence[Any] = (),
|
||||
) -> tuple[int,
|
||||
Optional[int]]:
|
||||
downloaded_count = 0
|
||||
@@ -381,6 +146,12 @@ class Download_File(Cmdlet):
|
||||
get_provider = registry.get("get_provider")
|
||||
match_provider_name_for_url = registry.get("match_provider_name_for_url")
|
||||
|
||||
context_items_list: List[Any]
|
||||
try:
|
||||
context_items_list = list(context_items) if context_items else []
|
||||
except Exception:
|
||||
context_items_list = []
|
||||
|
||||
for url in raw_urls:
|
||||
try:
|
||||
debug(f"Processing URL: {url}")
|
||||
@@ -521,14 +292,15 @@ class Download_File(Cmdlet):
|
||||
if provider_name and get_provider is not None and SearchResult is not None:
|
||||
# OpenLibrary URLs should be handled by the OpenLibrary provider.
|
||||
if provider_name == "openlibrary":
|
||||
url_str = str(url).strip()
|
||||
provider = get_provider("openlibrary", config)
|
||||
if provider is None:
|
||||
raise DownloadError(
|
||||
"OpenLibrary provider not configured or not available"
|
||||
)
|
||||
|
||||
edition_id = ol_provider.edition_id_from_url(str(url))
|
||||
title_hint = ol_provider.title_hint_from_url_slug(str(url))
|
||||
edition_id = ol_provider.edition_id_from_url(url_str)
|
||||
title_hint = ol_provider.title_hint_from_url_slug(url_str)
|
||||
|
||||
download_payload: Optional[Dict[str, Any]] = None
|
||||
try:
|
||||
@@ -596,9 +368,95 @@ class Download_File(Cmdlet):
|
||||
|
||||
progress_cb = _progress
|
||||
|
||||
if hasattr(provider, "download_url"):
|
||||
# Prefer piped OpenLibrary context (selection row) when present so we keep
|
||||
# resolved metadata like archive_id and availability.
|
||||
ctx_item = None
|
||||
ctx_md: Dict[str, Any] = {}
|
||||
ctx_title: Optional[str] = None
|
||||
ctx_tags: Optional[List[str]] = None
|
||||
ctx_media_kind: Optional[str] = None
|
||||
for candidate in context_items_list:
|
||||
try:
|
||||
table_val = get_field(candidate, "table")
|
||||
except Exception:
|
||||
table_val = None
|
||||
if str(table_val or "").lower() != "openlibrary":
|
||||
continue
|
||||
|
||||
md_val = get_field(candidate, "full_metadata")
|
||||
md_dict = md_val if isinstance(md_val, dict) else {}
|
||||
cand_olid = str(md_dict.get("openlibrary_id") or md_dict.get("olid") or "").strip()
|
||||
cand_archive = str(md_dict.get("archive_id") or "").strip()
|
||||
cand_url = str(
|
||||
get_field(candidate, "path")
|
||||
or get_field(candidate, "url")
|
||||
or md_dict.get("selection_url")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
matched = False
|
||||
if edition_id and cand_olid and cand_olid == edition_id:
|
||||
matched = True
|
||||
elif cand_url and url_str and cand_url == url_str:
|
||||
matched = True
|
||||
elif (not edition_id) and cand_archive and cand_archive in url_str:
|
||||
matched = True
|
||||
|
||||
if matched:
|
||||
ctx_item = candidate
|
||||
ctx_md = md_dict
|
||||
ctx_title = get_field(candidate, "title")
|
||||
ctx_media_kind = get_field(candidate, "media_kind")
|
||||
tags_val = get_field(candidate, "tag")
|
||||
if isinstance(tags_val, list):
|
||||
ctx_tags = [str(t) for t in tags_val if t]
|
||||
break
|
||||
|
||||
if ctx_item is not None and SearchResult is not None:
|
||||
sr_meta = dict(ctx_md) if isinstance(ctx_md, dict) else {}
|
||||
if edition_id and not sr_meta.get("openlibrary_id"):
|
||||
sr_meta["openlibrary_id"] = edition_id
|
||||
|
||||
sr_title = str(ctx_title or title_hint or "").strip() or title_hint
|
||||
sr_media_kind = str(ctx_media_kind or "book")
|
||||
|
||||
sr_obj = (
|
||||
ctx_item
|
||||
if isinstance(ctx_item, SearchResult)
|
||||
else SearchResult(
|
||||
table="openlibrary",
|
||||
title=sr_title,
|
||||
path=url_str,
|
||||
media_kind=sr_media_kind,
|
||||
full_metadata=sr_meta,
|
||||
)
|
||||
)
|
||||
|
||||
try:
|
||||
sr_obj.path = url_str # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if ctx_tags:
|
||||
sr_obj.tag = set(ctx_tags) # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
downloaded_path = provider.download(
|
||||
sr_obj,
|
||||
final_output_dir,
|
||||
progress_callback=progress_cb
|
||||
) # type: ignore[call-arg]
|
||||
|
||||
if downloaded_path:
|
||||
download_payload = {
|
||||
"path": Path(downloaded_path),
|
||||
"search_result": sr_obj,
|
||||
}
|
||||
|
||||
if download_payload is None and hasattr(provider, "download_url"):
|
||||
download_payload = provider.download_url( # type: ignore[attr-defined]
|
||||
str(url),
|
||||
url_str,
|
||||
final_output_dir,
|
||||
progress_cb,
|
||||
)
|
||||
@@ -606,12 +464,12 @@ class Download_File(Cmdlet):
|
||||
if download_payload is None:
|
||||
sr = None
|
||||
if hasattr(provider, "search_result_from_url"):
|
||||
sr = provider.search_result_from_url(str(url)) # type: ignore[attr-defined]
|
||||
sr = provider.search_result_from_url(url_str) # type: ignore[attr-defined]
|
||||
if sr is None:
|
||||
sr = SearchResult(
|
||||
table="openlibrary",
|
||||
title=title_hint,
|
||||
path=str(url),
|
||||
path=url_str,
|
||||
media_kind="book",
|
||||
full_metadata={
|
||||
"openlibrary_id": edition_id,
|
||||
@@ -811,6 +669,97 @@ class Download_File(Cmdlet):
|
||||
downloaded_count += 1
|
||||
continue
|
||||
|
||||
if provider_name and get_provider is not None and SearchResult is not None:
|
||||
provider = get_provider(provider_name, config)
|
||||
|
||||
if provider is not None and hasattr(provider, "download_url"):
|
||||
try:
|
||||
downloaded_path = provider.download_url(
|
||||
str(url),
|
||||
final_output_dir
|
||||
) # type: ignore[attr-defined]
|
||||
except Exception as exc:
|
||||
raise DownloadError(str(exc))
|
||||
|
||||
if downloaded_path:
|
||||
self._emit_local_file(
|
||||
downloaded_path=Path(downloaded_path),
|
||||
source=str(url),
|
||||
title_hint=Path(str(downloaded_path)).stem,
|
||||
tags_hint=None,
|
||||
media_kind_hint="file",
|
||||
full_metadata=None,
|
||||
provider_hint=str(provider_name),
|
||||
progress=progress,
|
||||
config=config,
|
||||
)
|
||||
downloaded_count += 1
|
||||
continue
|
||||
|
||||
if provider is not None:
|
||||
sr_obj = None
|
||||
try:
|
||||
sr_obj = SearchResult(
|
||||
table=str(provider_name),
|
||||
title=str(url),
|
||||
path=str(url),
|
||||
full_metadata={},
|
||||
)
|
||||
downloaded_path = provider.download(
|
||||
sr_obj,
|
||||
final_output_dir
|
||||
) # type: ignore[call-arg]
|
||||
except Exception:
|
||||
downloaded_path = None
|
||||
|
||||
if (not downloaded_path
|
||||
) and str(provider_name).lower() == "libgen":
|
||||
raise DownloadError(
|
||||
"LibGen URL did not resolve to a downloadable file"
|
||||
)
|
||||
|
||||
if downloaded_path:
|
||||
emit_tags: Optional[List[str]] = None
|
||||
full_md: Optional[Dict[str, Any]] = None
|
||||
title_hint = Path(str(downloaded_path)).stem
|
||||
media_kind_hint = "file"
|
||||
|
||||
if str(provider_name
|
||||
).lower() == "libgen" and sr_obj is not None:
|
||||
media_kind_hint = "book"
|
||||
try:
|
||||
sr_tags = getattr(sr_obj, "tag", None)
|
||||
if isinstance(sr_tags, set) and sr_tags:
|
||||
emit_tags = sorted(
|
||||
[str(t) for t in sr_tags if t]
|
||||
)
|
||||
except Exception:
|
||||
emit_tags = None
|
||||
|
||||
try:
|
||||
sr_full_md = getattr(sr_obj, "full_metadata", None)
|
||||
if isinstance(sr_full_md, dict):
|
||||
full_md = sr_full_md
|
||||
t = str(sr_full_md.get("title") or "").strip()
|
||||
if t:
|
||||
title_hint = t
|
||||
except Exception:
|
||||
full_md = None
|
||||
|
||||
self._emit_local_file(
|
||||
downloaded_path=Path(downloaded_path),
|
||||
source=str(url),
|
||||
title_hint=title_hint,
|
||||
tags_hint=emit_tags,
|
||||
media_kind_hint=media_kind_hint,
|
||||
full_metadata=full_md,
|
||||
provider_hint=str(provider_name),
|
||||
progress=progress,
|
||||
config=config,
|
||||
)
|
||||
downloaded_count += 1
|
||||
continue
|
||||
|
||||
result_obj = _download_direct_file(
|
||||
str(url),
|
||||
final_output_dir,
|
||||
@@ -1237,6 +1186,170 @@ class Download_File(Cmdlet):
|
||||
|
||||
return downloaded_count, queued_magnet_submissions
|
||||
|
||||
@staticmethod
|
||||
def _path_from_download_result(result_obj: Any) -> Path:
|
||||
file_path = None
|
||||
if hasattr(result_obj, "path"):
|
||||
file_path = getattr(result_obj, "path")
|
||||
elif isinstance(result_obj, dict):
|
||||
file_path = result_obj.get("path")
|
||||
if not file_path:
|
||||
file_path = str(result_obj)
|
||||
return Path(str(file_path))
|
||||
|
||||
def _emit_local_file(
|
||||
self,
|
||||
*,
|
||||
downloaded_path: Path,
|
||||
source: Optional[str],
|
||||
title_hint: Optional[str],
|
||||
tags_hint: Optional[List[str]],
|
||||
media_kind_hint: Optional[str],
|
||||
full_metadata: Optional[Dict[str, Any]],
|
||||
progress: PipelineProgress,
|
||||
config: Dict[str, Any],
|
||||
provider_hint: Optional[str] = None,
|
||||
) -> None:
|
||||
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
|
||||
hash_value = self._compute_file_hash(downloaded_path)
|
||||
notes: Optional[Dict[str, str]] = None
|
||||
try:
|
||||
if isinstance(full_metadata, dict):
|
||||
subtitles = full_metadata.get("_tidal_lyrics_subtitles")
|
||||
if isinstance(subtitles, str) and subtitles.strip():
|
||||
notes = {"lyric": subtitles}
|
||||
except Exception:
|
||||
notes = None
|
||||
tag: List[str] = []
|
||||
if tags_hint:
|
||||
tag.extend([str(t) for t in tags_hint if t])
|
||||
if not any(str(t).lower().startswith("title:") for t in tag):
|
||||
tag.insert(0, f"title:{title_val}")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"path": str(downloaded_path),
|
||||
"hash": hash_value,
|
||||
"title": title_val,
|
||||
"action": "cmdlet:download-file",
|
||||
"download_mode": "file",
|
||||
"store": "local",
|
||||
"media_kind": media_kind_hint or "file",
|
||||
"tag": tag,
|
||||
}
|
||||
if provider_hint:
|
||||
payload["provider"] = str(provider_hint)
|
||||
if full_metadata:
|
||||
payload["full_metadata"] = full_metadata
|
||||
if notes:
|
||||
payload["notes"] = notes
|
||||
if source and str(source).startswith("http"):
|
||||
payload["url"] = source
|
||||
elif source:
|
||||
payload["source_url"] = source
|
||||
|
||||
pipeline_context.emit(payload)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
|
||||
urls: List[str] = []
|
||||
url_value: Any = None
|
||||
if isinstance(parsed, dict):
|
||||
url_value = parsed.get("url")
|
||||
|
||||
try:
|
||||
urls = normalize_url_list(url_value)
|
||||
except Exception:
|
||||
urls = []
|
||||
|
||||
if not urls and isinstance(parsed, dict):
|
||||
query_val = parsed.get("query")
|
||||
try:
|
||||
if isinstance(query_val, str) and query_val.strip().lower().startswith("url:"):
|
||||
urls = normalize_url_list(query_val)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return urls
|
||||
|
||||
@staticmethod
|
||||
def _collect_piped_items_if_no_urls(result: Any, raw_url: Sequence[str]) -> List[Any]:
|
||||
if raw_url:
|
||||
return []
|
||||
if result is None:
|
||||
return []
|
||||
if isinstance(result, list):
|
||||
return list(result)
|
||||
return [result]
|
||||
|
||||
@staticmethod
|
||||
def _load_provider_registry() -> Dict[str, Any]:
|
||||
"""Lightweight accessor for provider helpers without hard dependencies."""
|
||||
try:
|
||||
from ProviderCore import registry as provider_registry # type: ignore
|
||||
from ProviderCore.base import SearchResult # type: ignore
|
||||
|
||||
return {
|
||||
"get_provider": getattr(provider_registry, "get_provider", None),
|
||||
"get_search_provider": getattr(provider_registry, "get_search_provider", None),
|
||||
"match_provider_name_for_url": getattr(provider_registry, "match_provider_name_for_url", None),
|
||||
"SearchResult": SearchResult,
|
||||
}
|
||||
except Exception:
|
||||
return {
|
||||
"get_provider": None,
|
||||
"get_search_provider": None,
|
||||
"match_provider_name_for_url": None,
|
||||
"SearchResult": None,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _safe_total_items(raw_url: Sequence[str], piped_items: Sequence[Any]) -> int:
|
||||
"""Return a sane item count for progress display."""
|
||||
try:
|
||||
url_count = len(raw_url or [])
|
||||
except Exception:
|
||||
url_count = 0
|
||||
try:
|
||||
piped_count = len(piped_items or [])
|
||||
except Exception:
|
||||
piped_count = 0
|
||||
total = url_count + piped_count
|
||||
return total if total > 0 else 1
|
||||
|
||||
@staticmethod
|
||||
def _build_preview(raw_url: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[str]:
|
||||
"""Construct a short preview list for the local progress UI."""
|
||||
preview: List[str] = []
|
||||
|
||||
try:
|
||||
for url in raw_url or []:
|
||||
if len(preview) >= 5:
|
||||
break
|
||||
preview.append(str(url))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if len(preview) < 5:
|
||||
try:
|
||||
items = piped_items if isinstance(piped_items, list) else list(piped_items or [])
|
||||
except Exception:
|
||||
items = []
|
||||
for item in items:
|
||||
if len(preview) >= 5:
|
||||
break
|
||||
try:
|
||||
label = get_field(item, "title") or get_field(item, "path") or get_field(item, "url")
|
||||
except Exception:
|
||||
label = None
|
||||
if label:
|
||||
preview.append(str(label))
|
||||
|
||||
# If we still have nothing, supply a generic placeholder to avoid empty previews.
|
||||
if not preview and total_items:
|
||||
preview.append(f"{total_items} item(s)")
|
||||
|
||||
return preview
|
||||
|
||||
# === Streaming helpers (yt-dlp) ===
|
||||
|
||||
@staticmethod
|
||||
@@ -3531,7 +3644,6 @@ class Download_File(Cmdlet):
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
raw_url = self._normalize_urls(parsed)
|
||||
raw_url = self._rewrite_archive_org_urls(raw_url)
|
||||
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
|
||||
|
||||
had_piped_input = False
|
||||
@@ -3660,6 +3772,7 @@ class Download_File(Cmdlet):
|
||||
quiet_mode=quiet_mode,
|
||||
registry=registry,
|
||||
progress=progress,
|
||||
context_items=(result if isinstance(result, list) else ([result] if result else [])),
|
||||
)
|
||||
downloaded_count += int(urls_downloaded)
|
||||
if early_exit is not None:
|
||||
|
||||
@@ -14,15 +14,12 @@ import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
try:
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
|
||||
_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
|
||||
_ol_scrape_openlibrary_metadata = OpenLibrary.scrape_openlibrary_metadata
|
||||
except Exception:
|
||||
_ol_scrape_isbn_metadata = None # type: ignore[assignment]
|
||||
_ol_scrape_openlibrary_metadata = None # type: ignore[assignment]
|
||||
from Provider.metadata_provider import get_metadata_provider, list_metadata_providers
|
||||
from Provider.metadata_provider import (
|
||||
get_metadata_provider,
|
||||
list_metadata_providers,
|
||||
scrape_isbn_metadata,
|
||||
scrape_openlibrary_metadata,
|
||||
)
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
@@ -270,9 +267,6 @@ def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
_scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment]
|
||||
_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment]
|
||||
|
||||
# Tag item for ResultTable display and piping
|
||||
from dataclasses import dataclass
|
||||
|
||||
@@ -1039,22 +1033,16 @@ def _extract_url_formats(formats: list) -> List[Tuple[str, str]]:
|
||||
|
||||
|
||||
def _scrape_isbn_metadata(isbn: str) -> List[str]:
|
||||
if _ol_scrape_isbn_metadata is None:
|
||||
log("OpenLibrary scraper unavailable", file=sys.stderr)
|
||||
return []
|
||||
try:
|
||||
return list(_ol_scrape_isbn_metadata(isbn))
|
||||
return list(scrape_isbn_metadata(isbn))
|
||||
except Exception as e:
|
||||
log(f"ISBN scraping error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
def _scrape_openlibrary_metadata(olid: str) -> List[str]:
|
||||
if _ol_scrape_openlibrary_metadata is None:
|
||||
log("OpenLibrary scraper unavailable", file=sys.stderr)
|
||||
return []
|
||||
try:
|
||||
return list(_ol_scrape_openlibrary_metadata(olid))
|
||||
return list(scrape_openlibrary_metadata(olid))
|
||||
except Exception as e:
|
||||
log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user