This commit is contained in:
2026-01-06 16:19:29 -08:00
parent 41c11d39fd
commit edc33f4528
10 changed files with 1192 additions and 881 deletions

3
.gitignore vendored
View File

@@ -236,5 +236,4 @@ scripts/mm.ps1
scripts/mm
.style.yapf
.yapfignore
tmp_*

View File

@@ -144,6 +144,11 @@ def _resolve_verify_value(verify_ssl: bool) -> Union[bool, str]:
return True
def get_requests_verify_value(verify_ssl: bool = True) -> Union[bool, str]:
"""Expose the verified value for reuse outside of HTTPClient (requests sessions)."""
return _resolve_verify_value(verify_ssl)
# Default configuration
DEFAULT_TIMEOUT = 30.0
DEFAULT_RETRIES = 3

View File

@@ -92,7 +92,7 @@
"(hitfile\\.net/[a-z0-9A-Z]{4,9})"
],
"regexp": "(hitf\\.(to|cc)/([a-z0-9A-Z]{4,9}))|(htfl\\.(net|to|cc)/([a-z0-9A-Z]{4,9}))|(hitfile\\.(net)/download/free/([a-z0-9A-Z]{4,9}))|((hitfile\\.net/[a-z0-9A-Z]{4,9}))",
"status": true
"status": false
},
"mega": {
"name": "mega",
@@ -389,7 +389,7 @@
"(filespace\\.com/[a-zA-Z0-9]{12})"
],
"regexp": "(filespace\\.com/fd/([a-zA-Z0-9]{12}))|((filespace\\.com/[a-zA-Z0-9]{12}))",
"status": true
"status": false
},
"filezip": {
"name": "filezip",
@@ -412,7 +412,7 @@
"(gigapeta\\.com/dl/[0-9a-zA-Z]{13,15})"
],
"regexp": "(gigapeta\\.com/dl/[0-9a-zA-Z]{13,15})",
"status": false
"status": true
},
"google": {
"name": "google",
@@ -507,7 +507,7 @@
"mediafire\\.com/(\\?|download/|file/|download\\.php\\?)([0-9a-z]{15})"
],
"regexp": "mediafire\\.com/(\\?|download/|file/|download\\.php\\?)([0-9a-z]{15})",
"status": false
"status": true
},
"mexashare": {
"name": "mexashare",
@@ -1650,18 +1650,6 @@
],
"regexp": "https?://music\\.apple\\.com/[\\w-]+/post/(\\d+)"
},
"appledaily": {
"name": "appledaily",
"type": "free",
"domains": [
"ent.appledaily.com.tw",
"appledaily.com.tw"
],
"regexps": [
"https?://(www|ent)\\.appledaily\\.com\\.tw/[^/]+/[^/]+/[^/]+/(\\d+)/(\\d+)(/.*)?"
],
"regexp": "https?://(www|ent)\\.appledaily\\.com\\.tw/[^/]+/[^/]+/[^/]+/(\\d+)/(\\d+)(/.*)?"
},
"applepodcasts": {
"name": "applepodcasts",
"type": "free",
@@ -2058,9 +2046,9 @@
"https?://(?:([^.]+)\\.)?bandcamp\\.com/album/([^/?#&]+)",
"https?://([^/]+)\\.bandcamp\\.com/track/([^/?#&]+)",
"https?://(?!www\\.)([^.]+)\\.bandcamp\\.com(?:/music)?/?(?:[#?]|$)",
"https?://(?:www\\.)?bandcamp\\.com/?\\?(?:.*?&)?show=(\\d+)"
"https?://(?:www\\.)?bandcamp\\.com/radio/?\\?(?:[^#]+&)?show=(\\d+)"
],
"regexp": "(https?://(?:([^.]+)\\.)?bandcamp\\.com/album/([^/?#&]+))|(https?://([^/]+)\\.bandcamp\\.com/track/([^/?#&]+))|(https?://(?!www\\.)([^.]+)\\.bandcamp\\.com(?:/music)?/?(?:[#?]|$))|(https?://(?:www\\.)?bandcamp\\.com/?\\?(?:.*?&)?show=(\\d+))"
"regexp": "(https?://(?:([^.]+)\\.)?bandcamp\\.com/album/([^/?#&]+))|(https?://([^/]+)\\.bandcamp\\.com/track/([^/?#&]+))|(https?://(?!www\\.)([^.]+)\\.bandcamp\\.com(?:/music)?/?(?:[#?]|$))|(https?://(?:www\\.)?bandcamp\\.com/radio/?\\?(?:[^#]+&)?show=(\\d+))"
},
"bandlab": {
"name": "bandlab",
@@ -3296,23 +3284,25 @@
"type": "free",
"domains": [
"cda.pl",
"ebd.cda.pl"
"ebd.cda.pl",
"m.cda.pl"
],
"regexps": [
"https?://(?:(?:www\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
"https?://(?:(?:(?:www|m)\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
],
"regexp": "https?://(?:(?:www\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
"regexp": "https?://(?:(?:(?:www|m)\\.)?cda\\.pl/video|ebd\\.cda\\.pl/[0-9]+x[0-9]+)/([0-9a-z]+)"
},
"cdafolder": {
"name": "cdafolder",
"type": "free",
"domains": [
"cda.pl"
"cda.pl",
"m.cda.pl"
],
"regexps": [
"https?://(?:www\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
"https?://(?:(?:www|m)\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
],
"regexp": "https?://(?:www\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
"regexp": "https?://(?:(?:www|m)\\.)?cda\\.pl/([\\w-]+)/folder/(\\d+)"
},
"cellebrite": {
"name": "cellebrite",
@@ -3768,6 +3758,17 @@
],
"regexp": "https?://www\\.craftsy\\.com/class/([\\w-]+)"
},
"croatian.film": {
"name": "croatian.film",
"type": "free",
"domains": [
"croatian.film"
],
"regexps": [
"https://?(?:www\\.)?croatian\\.film/[a-z]{2}/[^/?#]+/(\\d+)"
],
"regexp": "https://?(?:www\\.)?croatian\\.film/[a-z]{2}/[^/?#]+/(\\d+)"
},
"crooksandliars": {
"name": "crooksandliars",
"type": "free",
@@ -4379,9 +4380,9 @@
"dropbox.com"
],
"regexps": [
"https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/fi|sh?)/(\\w+)"
"https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/f[io]|sh?)/(\\w+)"
],
"regexp": "https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/fi|sh?)/(\\w+)"
"regexp": "https?://(?:www\\.)?dropbox\\.com/(?:(?:e/)?scl/f[io]|sh?)/(\\w+)"
},
"dropout": {
"name": "dropout",
@@ -5088,6 +5089,17 @@
],
"regexp": "https?://www\\.fifa\\.com/fifaplus/\\w{2}/watch/([^#?]+/)?(\\w+)"
},
"filmarchiv": {
"name": "filmarchiv",
"type": "free",
"domains": [
"filmarchiv.at"
],
"regexps": [
"https?://(?:www\\.)?filmarchiv\\.at/de/filmarchiv-on/video/(f_[0-9a-zA-Z]{5,})"
],
"regexp": "https?://(?:www\\.)?filmarchiv\\.at/de/filmarchiv-on/video/(f_[0-9a-zA-Z]{5,})"
},
"filmon": {
"name": "filmon",
"type": "free",
@@ -7954,37 +7966,6 @@
],
"regexp": "https?://(?:w(?:ww)?\\.)?mgtv\\.com/[bv]/(?:[^/]+/)*(\\d+)\\.html"
},
"manototv": {
"name": "manototv",
"type": "free",
"domains": [
"manototv.com"
],
"regexps": [
"https?://(?:www\\.)?manototv\\.com/episode/([0-9]+)"
],
"regexp": "https?://(?:www\\.)?manototv\\.com/episode/([0-9]+)"
},
"manototvlive": {
"name": "manototvlive",
"type": "free",
"domains": [],
"regexps": [
"https?://(?:www\\.)?manototv\\.com/live/"
],
"regexp": "https?://(?:www\\.)?manototv\\.com/live/"
},
"manototvshow": {
"name": "manototvshow",
"type": "free",
"domains": [
"manototv.com"
],
"regexps": [
"https?://(?:www\\.)?manototv\\.com/show/([0-9]+)"
],
"regexp": "https?://(?:www\\.)?manototv\\.com/show/([0-9]+)"
},
"manyvids": {
"name": "manyvids",
"type": "free",
@@ -9321,9 +9302,10 @@
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!myshows|library|videos)([\\w-]+)/?(?:$|[?#])",
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!(?:myshows|library|videos)/)([\\w-]+)/([\\w-]+)/?(?:$|[?#])",
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/videos/([\\w-]+)",
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/([\\w-]+)/season/([\\w-]+)",
"https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(myshows|library/latest-videos)/?(?:$|[?#])"
],
"regexp": "(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!myshows|library|videos)([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!(?:myshows|library|videos)/)([\\w-]+)/([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/videos/([\\w-]+))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(myshows|library/latest-videos)/?(?:$|[?#]))"
"regexp": "(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!myshows|library|videos)([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(?!(?:myshows|library|videos)/)([\\w-]+)/([\\w-]+)/?(?:$|[?#]))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/videos/([\\w-]+))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/([\\w-]+)/season/([\\w-]+))|(https?://(?:www\\.|beta\\.)?(?:watchnebula\\.com|nebula\\.app|nebula\\.tv)/(myshows|library/latest-videos)/?(?:$|[?#]))"
},
"nekohacker": {
"name": "nekohacker",
@@ -9402,10 +9384,10 @@
"https?://(?:y\\.)?music\\.163\\.com/(?:[#m]/)?song\\?.*?\\bid=([0-9]+)",
"https?://music\\.163\\.com/(?:#/)?(?:playlist|discover/toplist)\\?id=([0-9]+)",
"https?://music\\.163\\.com/(?:#/)?mv\\?id=([0-9]+)",
"https?://music\\.163\\.com/(?:#/)?program\\?id=([0-9]+)",
"https?://music\\.163\\.com/(?:#/)?(?:dj|program)\\?id=([0-9]+)",
"https?://music\\.163\\.com/(?:#/)?artist\\?id=([0-9]+)"
],
"regexp": "(https?://music\\.163\\.com/(?:#/)?album\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?djradio\\?id=([0-9]+))|(https?://(?:y\\.)?music\\.163\\.com/(?:[#m]/)?song\\?.*?\\bid=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?(?:playlist|discover/toplist)\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?mv\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?program\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?artist\\?id=([0-9]+))"
"regexp": "(https?://music\\.163\\.com/(?:#/)?album\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?djradio\\?id=([0-9]+))|(https?://(?:y\\.)?music\\.163\\.com/(?:[#m]/)?song\\?.*?\\bid=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?(?:playlist|discover/toplist)\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?mv\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?(?:dj|program)\\?id=([0-9]+))|(https?://music\\.163\\.com/(?:#/)?artist\\?id=([0-9]+))"
},
"netplustv": {
"name": "netplustv",
@@ -9478,9 +9460,9 @@
"netzkino.de"
],
"regexps": [
"https?://(?:www\\.)?netzkino\\.de/\\#!/[^/]+/([^/]+)"
"https?://(?:www\\.)?netzkino\\.de/details/([^/?#]+)"
],
"regexp": "https?://(?:www\\.)?netzkino\\.de/\\#!/[^/]+/([^/]+)"
"regexp": "https?://(?:www\\.)?netzkino\\.de/details/([^/?#]+)"
},
"newgrounds": {
"name": "newgrounds",
@@ -9519,37 +9501,6 @@
],
"regexp": "https?://(?:www\\.)?newsy\\.com/stories/([^/?#$&]+)"
},
"nextmedia": {
"name": "nextmedia",
"type": "free",
"domains": [
"hk.apple.nextmedia.com"
],
"regexps": [
"https?://hk\\.apple\\.nextmedia\\.com/[^/]+/[^/]+/(\\d+)/(\\d+)"
],
"regexp": "https?://hk\\.apple\\.nextmedia\\.com/[^/]+/[^/]+/(\\d+)/(\\d+)"
},
"nextmediaactionnews": {
"name": "nextmediaactionnews",
"type": "free",
"domains": [
"hk.dv.nextmedia.com"
],
"regexps": [
"https?://hk\\.dv\\.nextmedia\\.com/actionnews/[^/]+/(\\d+)/(\\d+)/\\d+"
],
"regexp": "https?://hk\\.dv\\.nextmedia\\.com/actionnews/[^/]+/(\\d+)/(\\d+)/\\d+"
},
"nexttv": {
"name": "nexttv",
"type": "free",
"domains": [],
"regexps": [
"https?://(?:www\\.)?nexttv\\.com\\.tw/(?:[^/]+/)+(\\d+)"
],
"regexp": "https?://(?:www\\.)?nexttv\\.com\\.tw/(?:[^/]+/)+(\\d+)"
},
"nexx": {
"name": "nexx",
"type": "free",
@@ -9809,7 +9760,7 @@
"name": "nitter",
"type": "free",
"domains": [
"nitter.priv.pw"
"nitter.projectsegfau.lt"
],
"regexps": [
"https?://(?:3nzoldnxplag42gqjs23xvghtzf6t6yzssrtytnntc6ppc7xxuoneoad\\.onion|nitter\\.l4qlywnpwqsluw65ts7md3khrivpirse744un3x7mlskqauz5pyuzgqd\\.onion|nitter7bryz3jv7e3uekphigvmoyoem4al3fynerxkj22dmoxoq553qd\\.onion|npf37k3mtzwxreiw52ccs5ay4e6qt2fkcs2ndieurdyn2cuzzsfyfvid\\.onion|nitter\\.v6vgyqpa7yefkorazmg5d5fimstmvm2vtbirt6676mt7qmllrcnwycqd\\.onion|i23nv6w3juvzlw32xzoxcqzktegd4i4fu3nmnc2ewv4ggiu4ledwklad\\.onion|26oq3gioiwcmfojub37nz5gzbkdiqp7fue5kvye7d4txv4ny6fb4wwid\\.onion|vfaomgh4jxphpbdfizkm5gbtjahmei234giqj4facbwhrfjtcldauqad\\.onion|iwgu3cv7ywf3gssed5iqtavmrlszgsxazkmwwnt4h2kdait75thdyrqd\\.onion|erpnncl5nhyji3c32dcfmztujtl3xaddqb457jsbkulq24zqq7ifdgad\\.onion|ckzuw5misyahmg7j5t5xwwuj3bwy62jfolxyux4brfflramzsvvd3syd\\.onion|jebqj47jgxleaiosfcxfibx2xdahjettuydlxbg64azd4khsxv6kawid\\.onion|nttr2iupbb6fazdpr2rgbooon2tzbbsvvkagkgkwohhodjzj43stxhad\\.onion|nitraeju2mipeziu2wtcrqsxg7h62v5y4eqgwi75uprynkj74gevvuqd\\.onion|nitter\\.lqs5fjmajyp7rvp4qvyubwofzi6d4imua7vs237rkc4m5qogitqwrgyd\\.onion|ibsboeui2im5o7dxnik3s5yghufumgy5abevtij5nbizequfpu4qi4ad\\.onion|ec5nvbycpfa5k6ro77blxgkyrzbkv7uy6r5cngcbkadtjj2733nm3uyd\\.onion|nitter\\.i2p|u6ikd6zndl3c4dsdq4mmujpntgeevdk5qzkfb57r4tnfeccrn2qa\\.b32\\.i2p|nitterlgj3n5fgwesu3vxc5h67ruku33nqaoeoocae2mvlzhsu6k7fqd\\.onion|nitter\\.lacontrevoie\\.fr|nitter\\.fdn\\.fr|nitter\\.1d4\\.us|nitter\\.kavin\\.rocks|nitter\\.unixfox\\.eu|nitter\\.domain\\.glass|nitter\\.namazso\\.eu|birdsite\\.xanny\\.family|nitter\\.moomoo\\.me|bird\\.trom\\.tf|nitter\\.it|twitter\\.censors\\.us|nitter\\.grimneko\\.de|twitter\\.076\\.ne\\.jp|nitter\\.fly\\.dev|notabird\\.site|nitter\\.weiler\\.rocks|nitter\\.sethforprivacy\\.com|nitter\\.cutelab\\.space|nitter\\.nl|nitter\\.mint\\.lgbt|nitter\\.bus\\-hit\\.me|nitter\\.esmailelbob\\.xyz|tw\\.artemislena\\.eu|nitter\\.winscloud\\.net|nitter\\.tiekoetter\\.com|nitter\\.spaceint\\.fr|nitter\\.privacy\\.com\\.de|nitter\\.poast\\.org|nitter\\.bird\\.froth\\.zone|nitter\\.dcs0\\.hu|twitter\\.dr460nf1r3\\.org|nitter\\.garudalinux\\.org|twitter\\.femboy\\.hu|nitter\\.cz|nitter\\.privacydev\\.net|nitter\\.evil\\.site|tweet\\.lambda\\.dance|nitter\\.kylrth\\.com|nitter\\.foss\\.wtf|nitter\\.priv\\.pw|nitter\\.tokhmi\\.xyz|nitter\\.catalyst\\.sx|unofficialbird\\.com|nitter\\.projectsegfau\\.lt|nitter\\.eu\\.projectsegfau\\.lt|singapore\\.unofficialbird\\.com|canada\\.unofficialbird\\.com|india\\.unofficialbird\\.com|nederland\\.unofficialbird\\.com|uk\\.unofficialbird\\.com|n\\.l5\\.ca|nitter\\.slipfox\\.xyz|nitter\\.soopy\\.moe|nitter\\.qwik\\.space|read\\.whatever\\.social|nitter\\.rawbit\\.ninja|nt\\.vern\\.cc|ntr\\.odyssey346\\.dev|nitter\\.ir|nitter\\.privacytools\\.io|nitter\\.sneed\\.network|n\\.sneed\\.network|nitter\\.manasiwibi\\.com|nitter\\.smnz\\.de|nitter\\.twei\\.space|nitter\\.inpt\\.fr|nitter\\.d420\\.de|nitter\\.caioalonso\\.com|nitter\\.at|nitter\\.drivet\\.xyz|nitter\\.pw|nitter\\.nicfab\\.eu|bird\\.habedieeh\\.re|nitter\\.hostux\\.net|nitter\\.adminforge\\.de|nitter\\.platypush\\.tech|nitter\\.mask\\.sh|nitter\\.pufe\\.org|nitter\\.us\\.projectsegfau\\.lt|nitter\\.arcticfoxes\\.net|t\\.com\\.sb|nitter\\.kling\\.gg|nitter\\.ktachibana\\.party|nitter\\.riverside\\.rocks|nitter\\.girlboss\\.ceo|nitter\\.lunar\\.icu|twitter\\.moe\\.ngo|nitter\\.freedit\\.eu|ntr\\.frail\\.duckdns\\.org|nitter\\.librenode\\.org|n\\.opnxng\\.com|nitter\\.plus\\.st|nitter\\.ethibox\\.fr|nitter\\.net|is\\-nitter\\.resolv\\.ee|lu\\-nitter\\.resolv\\.ee|nitter\\.13ad\\.de|nitter\\.40two\\.app|nitter\\.cattube\\.org|nitter\\.cc|nitter\\.dark\\.fail|nitter\\.himiko\\.cloud|nitter\\.koyu\\.space|nitter\\.mailstation\\.de|nitter\\.mastodont\\.cat|nitter\\.tedomum\\.net|nitter\\.tokhmi\\.xyz|nitter\\.weaponizedhumiliation\\.com|nitter\\.vxempire\\.xyz|tweet\\.lambda\\.dance|nitter\\.ca|nitter\\.42l\\.fr|nitter\\.pussthecat\\.org|nitter\\.nixnet\\.services|nitter\\.eu|nitter\\.actionsack\\.com|nitter\\.hu|twitr\\.gq|nittereu\\.moomoo\\.me|bird\\.from\\.tf|twitter\\.grimneko\\.de|nitter\\.alefvanoon\\.xyz|n\\.hyperborea\\.cloud|twitter\\.mstdn\\.social|nitter\\.silkky\\.cloud|nttr\\.stream|fuckthesacklers\\.network|nitter\\.govt\\.land|nitter\\.datatunnel\\.xyz|de\\.nttr\\.stream|twtr\\.bch\\.bar|nitter\\.exonip\\.de|nitter\\.mastodon\\.pro|nitter\\.notraxx\\.ch|nitter\\.skrep\\.in|nitter\\.snopyta\\.org)/(.+)/status/([0-9]+)(#.)?"
@@ -10613,6 +10564,17 @@
],
"regexp": "(https?://(?:www\\.)?palcomp3\\.com(?:\\.br)?/([^/?&#]+))|(https?://(?:www\\.)?palcomp3\\.com(?:\\.br)?/([^/]+)/([^/?&#]+))|(https?://(?:www\\.)?palcomp3\\.com(?:\\.br)?/([^/]+)/([^/?&#]+)/?#clipe)"
},
"pandatv": {
"name": "pandatv",
"type": "free",
"domains": [
"pandalive.co.kr"
],
"regexps": [
"https?://(?:www\\.|m\\.)?pandalive\\.co\\.kr/play/(\\w+)"
],
"regexp": "https?://(?:www\\.|m\\.)?pandalive\\.co\\.kr/play/(\\w+)"
},
"panopto": {
"name": "panopto",
"type": "free",
@@ -10704,10 +10666,10 @@
"parti.com"
],
"regexps": [
"https?://(?:www\\.)?parti\\.com/creator/([\\w]+)/([\\w/-]+)",
"https?://(?:www\\.)?parti\\.com/(?!video/)([\\w/-]+)",
"https?://(?:www\\.)?parti\\.com/video/(\\d+)"
],
"regexp": "(https?://(?:www\\.)?parti\\.com/creator/([\\w]+)/([\\w/-]+))|(https?://(?:www\\.)?parti\\.com/video/(\\d+))"
"regexp": "(https?://(?:www\\.)?parti\\.com/(?!video/)([\\w/-]+))|(https?://(?:www\\.)?parti\\.com/video/(\\d+))"
},
"patreon": {
"name": "patreon",
@@ -12963,15 +12925,6 @@
],
"regexp": "https?://(?:www\\.)?sciencechannel\\.com/video/([^/]+/[^/?#]+)"
},
"screen.yahoo": {
"name": "screen.yahoo",
"type": "free",
"domains": [],
"regexps": [
"yvsearch(|[1-9][0-9]*|all):([\\s\\S]+)"
],
"regexp": "yvsearch(|[1-9][0-9]*|all):([\\s\\S]+)"
},
"screen9": {
"name": "screen9",
"type": "free",
@@ -13060,28 +13013,6 @@
],
"regexp": "https?://(?:www\\.)?scrolller\\.com/([\\w-]+)"
},
"scte": {
"name": "scte",
"type": "free",
"domains": [
"learning.scte.org"
],
"regexps": [
"https?://learning\\.scte\\.org/mod/scorm/view\\.php?.*?\\bid=(\\d+)"
],
"regexp": "https?://learning\\.scte\\.org/mod/scorm/view\\.php?.*?\\bid=(\\d+)"
},
"sctecourse": {
"name": "sctecourse",
"type": "free",
"domains": [
"learning.scte.org"
],
"regexps": [
"https?://learning\\.scte\\.org/(?:mod/sub)?course/view\\.php?.*?\\bid=(\\d+)"
],
"regexp": "https?://learning\\.scte\\.org/(?:mod/sub)?course/view\\.php?.*?\\bid=(\\d+)"
},
"sejm": {
"name": "sejm",
"type": "free",
@@ -14199,6 +14130,19 @@
],
"regexp": "https?://www\\.taptap\\.io/post/(\\d+)"
},
"tarangplus": {
"name": "tarangplus",
"type": "free",
"domains": [
"tarangplus.in"
],
"regexps": [
"https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/([^#?/]+)/episodes/?(?:$|[?#])",
"https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/all/?(?:$|[?#])",
"https?://(?:www\\.)?tarangplus\\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)([^#?/]+)"
],
"regexp": "(https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/([^#?/]+)/episodes/?(?:$|[?#]))|(https?://(?:www\\.)?tarangplus\\.in/([^#?/]+)/all/?(?:$|[?#]))|(https?://(?:www\\.)?tarangplus\\.in/(?:movies|[^#?/]+/[^#?/]+)/(?!episodes)([^#?/]+))"
},
"tass": {
"name": "tass",
"type": "free",
@@ -15152,10 +15096,10 @@
"tubitv.com"
],
"regexps": [
"https?://(?:www\\.)?tubitv\\.com/(video|movies|tv-shows)/(\\d+)",
"https?://(?:www\\.)?tubitv\\.com/(?:[a-z]{2}-[a-z]{2}/)?(video|movies|tv-shows)/(\\d+)",
"https?://(?:www\\.)?tubitv\\.com/series/\\d+/([^/?#]+)(?:/season-(\\d+))?"
],
"regexp": "(https?://(?:www\\.)?tubitv\\.com/(video|movies|tv-shows)/(\\d+))|(https?://(?:www\\.)?tubitv\\.com/series/\\d+/([^/?#]+)(?:/season-(\\d+))?)"
"regexp": "(https?://(?:www\\.)?tubitv\\.com/(?:[a-z]{2}-[a-z]{2}/)?(video|movies|tv-shows)/(\\d+))|(https?://(?:www\\.)?tubitv\\.com/series/\\d+/([^/?#]+)(?:/season-(\\d+))?)"
},
"tumblr": {
"name": "tumblr",
@@ -15304,10 +15248,10 @@
"tv5unis.ca"
],
"regexps": [
"https?://(?:www\\.)?tv5unis\\.ca/videos/([^/]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$)",
"https?://(?:www\\.)?tv5unis\\.ca/videos/[^/]+/(\\d+)"
"https?://(?:www\\.)?tv5unis\\.ca/videos/([^/?#]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$)",
"https?://(?:www\\.)?tv5unis\\.ca/videos/[^/?#]+/(\\d+)"
],
"regexp": "(https?://(?:www\\.)?tv5unis\\.ca/videos/([^/]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$))|(https?://(?:www\\.)?tv5unis\\.ca/videos/[^/]+/(\\d+))"
"regexp": "(https?://(?:www\\.)?tv5unis\\.ca/videos/([^/?#]+)(?:/saisons/(\\d+)/episodes/(\\d+))?/?(?:[?#&]|$))|(https?://(?:www\\.)?tv5unis\\.ca/videos/[^/?#]+/(\\d+))"
},
"tv8.it": {
"name": "tv8.it",
@@ -17315,8 +17259,6 @@
"name": "yahoo",
"type": "free",
"domains": [
"screen.yahoo.com",
"uk.screen.yahoo.com",
"news.yahoo.com",
"yahoo.com",
"gma.yahoo.com",
@@ -17329,9 +17271,10 @@
],
"regexps": [
"(https?://(?:([a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\\.)?(?:[\\da-zA-Z_-]+\\.)?yahoo\\.com/(?:[^/]+/)*([^?&#]*-[0-9]+(?:-[a-z]+)?)\\.html)",
"https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+)"
"https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+)",
"yvsearch(|[1-9][0-9]*|all):([\\s\\S]+)"
],
"regexp": "((https?://(?:([a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\\.)?(?:[\\da-zA-Z_-]+\\.)?yahoo\\.com/(?:[^/]+/)*([^?&#]*-[0-9]+(?:-[a-z]+)?)\\.html))|(https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+))"
"regexp": "((https?://(?:([a-zA-Z]{2}(?:-[a-zA-Z]{2})?|malaysia)\\.)?(?:[\\da-zA-Z_-]+\\.)?yahoo\\.com/(?:[^/]+/)*([^?&#]*-[0-9]+(?:-[a-z]+)?)\\.html))|(https?://news\\.yahoo\\.co\\.jp/(?:articles|feature)/([a-zA-Z0-9]+))|(yvsearch(|[1-9][0-9]*|all):([\\s\\S]+))"
},
"yandexdisk": {
"name": "yandexdisk",
@@ -17718,14 +17661,17 @@
"name": "zdf",
"type": "free",
"domains": [
"zdf.de"
"zdf.de",
"zdfheute.de",
"logo.de"
],
"regexps": [
"https?://www\\.zdf\\.de/(?:[^/?#]+/)*([^/?#]+)",
"https?://(?:www\\.)?zdf\\.de/(?:video|play)/(?:[^/?#]+/)*([^/?#]+)",
"https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html"
"https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html",
"https?://(?:www\\.)?(?:zdfheute|logo)\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html"
],
"regexp": "(https?://www\\.zdf\\.de/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:video|play)/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html)"
"regexp": "(https?://www\\.zdf\\.de/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:video|play)/(?:[^/?#]+/)*([^/?#]+))|(https?://(?:www\\.)?zdf\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html)|(https?://(?:www\\.)?(?:zdfheute|logo)\\.de/(?:[^/?#]+/)*([^/?#]+)\\.html)"
},
"zee5": {
"name": "zee5",

View File

@@ -8,6 +8,9 @@ import requests
import sys
import json
import subprocess
from API.HTTP import HTTPClient
from ProviderCore.base import SearchResult
try: # Optional dependency for IMDb scraping
from imdbinfo.services import search_title # type: ignore
except ImportError: # pragma: no cover - optional
@@ -15,6 +18,7 @@ except ImportError: # pragma: no cover - optional
from SYS.logger import log, debug
from SYS.metadata import imdb_tag
from SYS.json_table import normalize_record
try: # Optional dependency
import musicbrainzngs # type: ignore
@@ -892,6 +896,524 @@ class YtdlpMetadataProvider(MetadataProvider):
return out
def _coerce_archive_field_list(value: Any) -> List[str]:
"""Coerce an Archive.org metadata field to a list of strings."""
if value is None:
return []
if isinstance(value, list):
out: List[str] = []
for v in value:
try:
s = str(v).strip()
except Exception:
continue
if s:
out.append(s)
return out
if isinstance(value, (tuple, set)):
out = []
for v in value:
try:
s = str(v).strip()
except Exception:
continue
if s:
out.append(s)
return out
try:
s = str(value).strip()
except Exception:
return []
return [s] if s else []
def archive_item_metadata_to_tags(archive_id: str,
item_metadata: Dict[str, Any]) -> List[str]:
"""Coerce Archive.org metadata into a stable set of bibliographic tags."""
archive_id_clean = str(archive_id or "").strip()
meta = item_metadata if isinstance(item_metadata, dict) else {}
tags: List[str] = []
seen: set[str] = set()
def _add(tag: str) -> None:
try:
t = str(tag).strip()
except Exception:
return
if not t:
return
if t.lower() in seen:
return
seen.add(t.lower())
tags.append(t)
if archive_id_clean:
_add(f"internet_archive:{archive_id_clean}")
for title in _coerce_archive_field_list(meta.get("title"))[:1]:
_add(f"title:{title}")
creators: List[str] = []
creators.extend(_coerce_archive_field_list(meta.get("creator")))
creators.extend(_coerce_archive_field_list(meta.get("author")))
for creator in creators[:3]:
_add(f"author:{creator}")
for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
_add(f"publisher:{publisher}")
for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
_add(f"publish_date:{date_val}")
for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
_add(f"publish_date:{year_val}")
for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
_add(f"language:{lang}")
for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
if len(subj) > 200:
subj = subj[:200]
_add(subj)
def _clean_isbn(raw: str) -> str:
return str(raw or "").replace("-", "").strip()
for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
isbn_clean = _clean_isbn(isbn)
if isbn_clean:
_add(f"isbn:{isbn_clean}")
identifiers: List[str] = []
identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
added_other = 0
for ident in identifiers:
ident_s = str(ident or "").strip()
if not ident_s:
continue
low = ident_s.lower()
if low.startswith("urn:isbn:"):
val = _clean_isbn(ident_s.split(":", 2)[-1])
if val:
_add(f"isbn:{val}")
continue
if low.startswith("isbn:"):
val = _clean_isbn(ident_s.split(":", 1)[-1])
if val:
_add(f"isbn:{val}")
continue
if low.startswith("urn:oclc:"):
val = ident_s.split(":", 2)[-1].strip()
if val:
_add(f"oclc:{val}")
continue
if low.startswith("oclc:"):
val = ident_s.split(":", 1)[-1].strip()
if val:
_add(f"oclc:{val}")
continue
if low.startswith("urn:lccn:"):
val = ident_s.split(":", 2)[-1].strip()
if val:
_add(f"lccn:{val}")
continue
if low.startswith("lccn:"):
val = ident_s.split(":", 1)[-1].strip()
if val:
_add(f"lccn:{val}")
continue
if low.startswith("doi:"):
val = ident_s.split(":", 1)[-1].strip()
if val:
_add(f"doi:{val}")
continue
if archive_id_clean and low == archive_id_clean.lower():
continue
if added_other >= 5:
continue
if len(ident_s) > 200:
ident_s = ident_s[:200]
_add(f"identifier:{ident_s}")
added_other += 1
return tags
def fetch_archive_item_metadata(archive_id: str,
*,
timeout: int = 8) -> Dict[str, Any]:
ident = str(archive_id or "").strip()
if not ident:
return {}
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
resp.raise_for_status()
data = resp.json() if resp is not None else {}
if not isinstance(data, dict):
return {}
meta = data.get("metadata")
return meta if isinstance(meta, dict) else {}
def scrape_isbn_metadata(isbn: str) -> List[str]:
"""Scrape metadata tags for an ISBN using OpenLibrary's books API."""
new_tags: List[str] = []
isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
if not isbn_clean:
return []
url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
try:
with HTTPClient() as client:
response = client.get(url)
response.raise_for_status()
data = json.loads(response.content.decode("utf-8"))
except Exception as exc:
log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
return []
if not data:
log(f"No ISBN metadata found for: {isbn}")
return []
book_data = next(iter(data.values()), None)
if not isinstance(book_data, dict):
return []
if "title" in book_data:
new_tags.append(f"title:{book_data['title']}")
authors = book_data.get("authors")
if isinstance(authors, list):
for author in authors[:3]:
if isinstance(author, dict) and author.get("name"):
new_tags.append(f"author:{author['name']}")
if book_data.get("publish_date"):
new_tags.append(f"publish_date:{book_data['publish_date']}")
publishers = book_data.get("publishers")
if isinstance(publishers, list) and publishers:
pub = publishers[0]
if isinstance(pub, dict) and pub.get("name"):
new_tags.append(f"publisher:{pub['name']}")
if "description" in book_data:
desc = book_data.get("description")
if isinstance(desc, dict) and "value" in desc:
desc = desc.get("value")
if desc:
desc_str = str(desc).strip()
if desc_str:
new_tags.append(f"description:{desc_str[:200]}")
page_count = book_data.get("number_of_pages")
if isinstance(page_count, int) and page_count > 0:
new_tags.append(f"pages:{page_count}")
identifiers = book_data.get("identifiers")
if isinstance(identifiers, dict):
def _first(value: Any) -> Any:
if isinstance(value, list) and value:
return value[0]
return value
for key, ns in (
("openlibrary", "openlibrary"),
("lccn", "lccn"),
("oclc", "oclc"),
("goodreads", "goodreads"),
("librarything", "librarything"),
("doi", "doi"),
("internet_archive", "internet_archive"),
):
val = _first(identifiers.get(key))
if val:
new_tags.append(f"{ns}:{val}")
debug(f"Found {len(new_tags)} tag(s) from ISBN lookup")
return new_tags
def scrape_openlibrary_metadata(olid: str) -> List[str]:
"""Scrape metadata tags for an OpenLibrary ID using the edition JSON endpoint."""
new_tags: List[str] = []
olid_text = str(olid or "").strip()
if not olid_text:
return []
olid_norm = olid_text
try:
if not olid_norm.startswith("OL"):
olid_norm = f"OL{olid_norm}"
if not olid_norm.endswith("M"):
olid_norm = f"{olid_norm}M"
except Exception:
olid_norm = olid_text
new_tags.append(f"openlibrary:{olid_norm}")
olid_clean = olid_text.replace("OL", "").replace("M", "")
if not olid_clean.isdigit():
olid_clean = olid_text
if not olid_text.startswith("OL"):
url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
else:
url = f"https://openlibrary.org/books/{olid_text}.json"
try:
with HTTPClient() as client:
response = client.get(url)
response.raise_for_status()
data = json.loads(response.content.decode("utf-8"))
except Exception as exc:
log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
return []
if not isinstance(data, dict) or not data:
log(f"No OpenLibrary metadata found for: {olid_text}")
return []
if "title" in data:
new_tags.append(f"title:{data['title']}")
authors = data.get("authors")
if isinstance(authors, list):
for author in authors[:3]:
if isinstance(author, dict) and author.get("name"):
new_tags.append(f"author:{author['name']}")
continue
author_key = None
if isinstance(author, dict):
if isinstance(author.get("author"), dict):
author_key = author.get("author", {}).get("key")
if not author_key:
author_key = author.get("key")
if isinstance(author_key, str) and author_key.startswith("/"):
try:
author_url = f"https://openlibrary.org{author_key}.json"
with HTTPClient(timeout=10) as client:
author_resp = client.get(author_url)
author_resp.raise_for_status()
author_data = json.loads(author_resp.content.decode("utf-8"))
if isinstance(author_data, dict) and author_data.get("name"):
new_tags.append(f"author:{author_data['name']}")
continue
except Exception:
pass
if isinstance(author, str) and author:
new_tags.append(f"author:{author}")
if data.get("publish_date"):
new_tags.append(f"publish_date:{data['publish_date']}")
publishers = data.get("publishers")
if isinstance(publishers, list) and publishers:
pub = publishers[0]
if isinstance(pub, dict) and pub.get("name"):
new_tags.append(f"publisher:{pub['name']}")
elif isinstance(pub, str) and pub:
new_tags.append(f"publisher:{pub}")
if "description" in data:
desc = data.get("description")
if isinstance(desc, dict) and "value" in desc:
desc = desc.get("value")
if desc:
desc_str = str(desc).strip()
if desc_str:
new_tags.append(f"description:{desc_str[:200]}")
page_count = data.get("number_of_pages")
if isinstance(page_count, int) and page_count > 0:
new_tags.append(f"pages:{page_count}")
subjects = data.get("subjects")
if isinstance(subjects, list):
for subject in subjects[:10]:
if isinstance(subject, str):
subject_clean = subject.strip()
if subject_clean and subject_clean not in new_tags:
new_tags.append(subject_clean)
identifiers = data.get("identifiers")
if isinstance(identifiers, dict):
def _first(value: Any) -> Any:
if isinstance(value, list) and value:
return value[0]
return value
for key, ns in (
("isbn_10", "isbn_10"),
("isbn_13", "isbn_13"),
("lccn", "lccn"),
("oclc_numbers", "oclc"),
("goodreads", "goodreads"),
("internet_archive", "internet_archive"),
):
val = _first(identifiers.get(key))
if val:
new_tags.append(f"{ns}:{val}")
ocaid = data.get("ocaid")
if isinstance(ocaid, str) and ocaid.strip():
new_tags.append(f"internet_archive:{ocaid.strip()}")
debug(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
return new_tags
SAMPLE_ITEMS: List[Dict[str, Any]] = [
{
"title": "Sample OpenLibrary book",
"path": "https://openlibrary.org/books/OL123M",
"openlibrary_id": "OL123M",
"archive_id": "samplearchive123",
"availability": "borrow",
"availability_reason": "sample",
"direct_url": "https://archive.org/download/sample.pdf",
"author_name": ["OpenLibrary Demo"],
"first_publish_year": 2023,
"ia": ["samplearchive123"],
},
]
try:
from typing import Iterable
from SYS.result_table_api import ColumnSpec, ResultModel, metadata_column, title_column
from SYS.result_table_adapters import register_provider
def _ensure_search_result(item: Any) -> SearchResult:
if isinstance(item, SearchResult):
return item
if isinstance(item, dict):
data = dict(item)
title = str(data.get("title") or data.get("name") or "OpenLibrary")
path = str(data.get("path") or data.get("url") or "")
detail = str(data.get("detail") or "")
annotations = list(data.get("annotations") or [])
media_kind = str(data.get("media_kind") or "book")
return SearchResult(
table="openlibrary",
title=title,
path=path,
detail=detail,
annotations=annotations,
media_kind=media_kind,
columns=data.get("columns") or [],
full_metadata={**data, "raw": dict(item)},
)
return SearchResult(
table="openlibrary",
title=str(item or "OpenLibrary"),
path="",
detail="",
annotations=[],
media_kind="book",
full_metadata={"raw": {}},
)
def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
for item in items:
sr = _ensure_search_result(item)
metadata = dict(getattr(sr, "full_metadata", {}) or {})
raw = metadata.get("raw")
if isinstance(raw, dict):
normalized = normalize_record(raw)
for key, val in normalized.items():
metadata.setdefault(key, val)
def _make_url() -> str:
candidate = (
metadata.get("selection_url") or
metadata.get("direct_url") or
metadata.get("url") or
metadata.get("path") or
sr.path or
""
)
return str(candidate or "").strip()
selection_url = _make_url()
if selection_url:
metadata["selection_url"] = selection_url
authors_value = metadata.get("authors_display") or metadata.get("authors") or metadata.get("author_name") or ""
if isinstance(authors_value, list):
authors_value = ", ".join(str(v) for v in authors_value if v)
authors_text = str(authors_value or "").strip()
if authors_text:
metadata["authors_display"] = authors_text
year_value = metadata.get("year") or metadata.get("first_publish_year")
if year_value and not isinstance(year_value, str):
year_value = str(year_value)
if year_value:
metadata["year"] = str(year_value)
metadata.setdefault("openlibrary_id", metadata.get("openlibrary_id") or metadata.get("olid"))
metadata.setdefault("source", metadata.get("source") or "openlibrary")
yield ResultModel(
title=str(sr.title or metadata.get("title") or selection_url or "OpenLibrary"),
path=selection_url or None,
metadata=metadata,
source="openlibrary",
)
def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
cols: List[ColumnSpec] = [title_column()]
def _has(key: str) -> bool:
return any((row.metadata or {}).get(key) for row in rows)
if _has("authors_display"):
cols.append(
ColumnSpec(
"authors_display",
"Author",
lambda r: (r.metadata or {}).get("authors_display") or "",
)
)
if _has("year"):
cols.append(metadata_column("year", "Year"))
if _has("availability"):
cols.append(metadata_column("availability", "Avail"))
if _has("archive_id"):
cols.append(metadata_column("archive_id", "Archive ID"))
if _has("openlibrary_id"):
cols.append(metadata_column("openlibrary_id", "OLID"))
return cols
def _selection_fn(row: ResultModel) -> List[str]:
metadata = row.metadata or {}
url = str(metadata.get("selection_url") or row.path or "").strip()
if url:
return ["-url", url]
return ["-title", row.title or ""]
register_provider(
"openlibrary",
_adapter,
columns=_columns_factory,
selection_fn=_selection_fn,
metadata={"description": "OpenLibrary search provider (JSON result table template)"},
)
except Exception:
pass
# Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str,

View File

@@ -11,18 +11,29 @@ import sys
import tempfile
import time
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from urllib.parse import urlparse
import requests
from API.HTTP import HTTPClient
from API.HTTP import HTTPClient, get_requests_verify_value
from ProviderCore.base import Provider, SearchResult
from SYS.utils import sanitize_filename
from SYS.cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import debug, log
from Provider.metadata_provider import (
archive_item_metadata_to_tags,
fetch_archive_item_metadata,
)
from SYS.utils import unique_path
_ARCHIVE_VERIFY_VALUE = get_requests_verify_value()
def _create_archive_session() -> requests.Session:
session = requests.Session()
session.verify = _ARCHIVE_VERIFY_VALUE
return session
try:
from Crypto.Cipher import AES # type: ignore
from Crypto.Util import Counter # type: ignore
@@ -262,182 +273,6 @@ def title_hint_from_url_slug(u: str) -> str:
return slug or "OpenLibrary"
def _coerce_archive_field_list(value: Any) -> List[str]:
"""Coerce an Archive.org metadata field to a list of strings."""
if value is None:
return []
if isinstance(value, list):
out: List[str] = []
for v in value:
try:
s = str(v).strip()
except Exception:
continue
if s:
out.append(s)
return out
if isinstance(value, (tuple, set)):
out = []
for v in value:
try:
s = str(v).strip()
except Exception:
continue
if s:
out.append(s)
return out
try:
s = str(value).strip()
except Exception:
return []
return [s] if s else []
def _archive_item_metadata_to_tags(archive_id: str,
item_metadata: Dict[str,
Any]) -> List[str]:
"""Map Archive.org metadata JSON (the `metadata` object) to tag strings.
This is intentionally best-effort and conservative: it focuses on stable,
useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
"""
archive_id_clean = str(archive_id or "").strip()
meta = item_metadata if isinstance(item_metadata,
dict) else {}
tags: List[str] = []
seen: set[str] = set()
def _add(tag: str) -> None:
try:
t = str(tag).strip()
except Exception:
return
if not t:
return
if t.lower() in seen:
return
seen.add(t.lower())
tags.append(t)
if archive_id_clean:
_add(f"internet_archive:{archive_id_clean}")
# Title
for title in _coerce_archive_field_list(meta.get("title"))[:1]:
_add(f"title:{title}")
# Authors/creators
creators: List[str] = []
creators.extend(_coerce_archive_field_list(meta.get("creator")))
creators.extend(_coerce_archive_field_list(meta.get("author")))
for creator in creators[:3]:
_add(f"author:{creator}")
# Publisher
for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
_add(f"publisher:{publisher}")
# Publish date/year
for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
_add(f"publish_date:{date_val}")
for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
_add(f"publish_date:{year_val}")
# Language
for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
_add(f"language:{lang}")
# Topics/subjects: follow existing OpenLibrary behavior (un-namespaced tags)
for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
if len(subj) > 200:
subj = subj[:200]
_add(subj)
# ISBNs and identifiers
def _clean_isbn(raw: str) -> str:
return str(raw or "").replace("-", "").strip()
for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
isbn_clean = _clean_isbn(isbn)
if isbn_clean:
_add(f"isbn:{isbn_clean}")
identifiers: List[str] = []
identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
added_other = 0
for ident in identifiers:
ident_s = str(ident or "").strip()
if not ident_s:
continue
low = ident_s.lower()
if low.startswith("urn:isbn:"):
val = _clean_isbn(ident_s.split(":", 2)[-1])
if val:
_add(f"isbn:{val}")
continue
if low.startswith("isbn:"):
val = _clean_isbn(ident_s.split(":", 1)[-1])
if val:
_add(f"isbn:{val}")
continue
if low.startswith("urn:oclc:"):
val = ident_s.split(":", 2)[-1].strip()
if val:
_add(f"oclc:{val}")
continue
if low.startswith("oclc:"):
val = ident_s.split(":", 1)[-1].strip()
if val:
_add(f"oclc:{val}")
continue
if low.startswith("urn:lccn:"):
val = ident_s.split(":", 2)[-1].strip()
if val:
_add(f"lccn:{val}")
continue
if low.startswith("lccn:"):
val = ident_s.split(":", 1)[-1].strip()
if val:
_add(f"lccn:{val}")
continue
if low.startswith("doi:"):
val = ident_s.split(":", 1)[-1].strip()
if val:
_add(f"doi:{val}")
continue
if archive_id_clean and low == archive_id_clean.lower():
continue
if added_other >= 5:
continue
if len(ident_s) > 200:
ident_s = ident_s[:200]
_add(f"identifier:{ident_s}")
added_other += 1
return tags
def _fetch_archive_item_metadata(archive_id: str,
*,
timeout: int = 8) -> Dict[str,
Any]:
ident = str(archive_id or "").strip()
if not ident:
return {}
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
resp.raise_for_status()
data = resp.json() if resp is not None else {}
if not isinstance(data, dict):
return {}
meta = data.get("metadata")
return meta if isinstance(meta,
dict) else {}
class OpenLibrary(Provider):
TABLE_AUTO_STAGES = {
@@ -466,7 +301,7 @@ class OpenLibrary(Provider):
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._session = requests.Session()
self._session = _create_archive_session()
class BookNotAvailableError(Exception):
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
@@ -612,7 +447,7 @@ class OpenLibrary(Provider):
@classmethod
def _archive_login(cls, email: str, password: str) -> requests.Session:
"""Login to archive.org using the token-based services endpoint (matches test-login.py)."""
session = requests.Session()
session = _create_archive_session()
token_resp = session.get(
"https://archive.org/services/account/login/",
@@ -766,7 +601,11 @@ class OpenLibrary(Provider):
if not ident:
return False, "no-archive-id"
try:
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
resp = requests.get(
f"https://archive.org/metadata/{ident}",
timeout=8,
verify=_ARCHIVE_VERIFY_VALUE,
)
resp.raise_for_status()
data = resp.json() if resp is not None else {}
meta = data.get("metadata",
@@ -976,7 +815,11 @@ class OpenLibrary(Provider):
"""Check for a directly downloadable original PDF in Archive.org metadata."""
try:
metadata_url = f"https://archive.org/metadata/{book_id}"
response = requests.get(metadata_url, timeout=6)
response = requests.get(
metadata_url,
timeout=6,
verify=_ARCHIVE_VERIFY_VALUE,
)
response.raise_for_status()
metadata = response.json()
files = metadata.get("files") if isinstance(metadata, dict) else None
@@ -993,7 +836,8 @@ class OpenLibrary(Provider):
check_response = requests.head(
pdf_url,
timeout=4,
allow_redirects=True
allow_redirects=True,
verify=_ARCHIVE_VERIFY_VALUE,
)
if check_response.status_code == 200:
return True, pdf_url
@@ -1001,235 +845,6 @@ class OpenLibrary(Provider):
except Exception:
return False, ""
@staticmethod
def scrape_isbn_metadata(isbn: str) -> List[str]:
"""Scrape tags for an ISBN using Open Library API.
Returns tags such as:
- title:<...>, author:<...>, publish_date:<...>, publisher:<...>, description:<...>, pages:<...>
- identifiers: openlibrary:<...>, lccn:<...>, oclc:<...>, goodreads:<...>, librarything:<...>, doi:<...>, internet_archive:<...>
"""
new_tags: List[str] = []
isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
if not isbn_clean:
return []
url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
try:
with HTTPClient() as client:
response = client.get(url)
response.raise_for_status()
data = json_module.loads(response.content.decode("utf-8"))
except Exception as exc:
log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
return []
if not data:
log(f"No ISBN metadata found for: {isbn}")
return []
book_data = next(iter(data.values()), None)
if not isinstance(book_data, dict):
return []
if "title" in book_data:
new_tags.append(f"title:{book_data['title']}")
authors = book_data.get("authors")
if isinstance(authors, list):
for author in authors[:3]:
if isinstance(author, dict) and author.get("name"):
new_tags.append(f"author:{author['name']}")
if book_data.get("publish_date"):
new_tags.append(f"publish_date:{book_data['publish_date']}")
publishers = book_data.get("publishers")
if isinstance(publishers, list) and publishers:
pub = publishers[0]
if isinstance(pub, dict) and pub.get("name"):
new_tags.append(f"publisher:{pub['name']}")
if "description" in book_data:
desc = book_data.get("description")
if isinstance(desc, dict) and "value" in desc:
desc = desc.get("value")
if desc:
desc_str = str(desc).strip()
if desc_str:
new_tags.append(f"description:{desc_str[:200]}")
page_count = book_data.get("number_of_pages")
if isinstance(page_count, int) and page_count > 0:
new_tags.append(f"pages:{page_count}")
identifiers = book_data.get("identifiers")
if isinstance(identifiers, dict):
def _first(value: Any) -> Any:
if isinstance(value, list) and value:
return value[0]
return value
for key, ns in (
("openlibrary", "openlibrary"),
("lccn", "lccn"),
("oclc", "oclc"),
("goodreads", "goodreads"),
("librarything", "librarything"),
("doi", "doi"),
("internet_archive", "internet_archive"),
):
val = _first(identifiers.get(key))
if val:
new_tags.append(f"{ns}:{val}")
debug(f"Found {len(new_tags)} tag(s) from ISBN lookup")
return new_tags
@staticmethod
def scrape_openlibrary_metadata(olid: str) -> List[str]:
"""Scrape tags for an OpenLibrary ID using the .json API endpoint."""
new_tags: List[str] = []
olid_text = str(olid or "").strip()
if not olid_text:
return []
# Normalize OLID to the common "OL<digits>M" form when possible.
olid_norm = olid_text
try:
if not olid_norm.startswith("OL"):
olid_norm = f"OL{olid_norm}"
if not olid_norm.endswith("M"):
olid_norm = f"{olid_norm}M"
except Exception:
olid_norm = olid_text
# Ensure we always include a scrapeable identifier tag.
new_tags.append(f"openlibrary:{olid_norm}")
# Accept OL9674499M, 9674499M, or just digits.
olid_clean = olid_text.replace("OL", "").replace("M", "")
if not olid_clean.isdigit():
olid_clean = olid_text
if not olid_text.startswith("OL"):
url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
else:
url = f"https://openlibrary.org/books/{olid_text}.json"
try:
with HTTPClient() as client:
response = client.get(url)
response.raise_for_status()
data = json_module.loads(response.content.decode("utf-8"))
except Exception as exc:
log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
return []
if not isinstance(data, dict) or not data:
log(f"No OpenLibrary metadata found for: {olid_text}")
return []
if "title" in data:
new_tags.append(f"title:{data['title']}")
authors = data.get("authors")
if isinstance(authors, list):
for author in authors[:3]:
if isinstance(author, dict) and author.get("name"):
new_tags.append(f"author:{author['name']}")
continue
# Common OL shape: {"key": "/authors/OL...A"} or {"author": {"key": ...}}
author_key = None
if isinstance(author, dict):
if isinstance(author.get("author"), dict):
author_key = author.get("author",
{}).get("key")
if not author_key:
author_key = author.get("key")
if isinstance(author_key, str) and author_key.startswith("/"):
try:
author_url = f"https://openlibrary.org{author_key}.json"
with HTTPClient(timeout=10) as client:
author_resp = client.get(author_url)
author_resp.raise_for_status()
author_data = json_module.loads(
author_resp.content.decode("utf-8")
)
if isinstance(author_data, dict) and author_data.get("name"):
new_tags.append(f"author:{author_data['name']}")
continue
except Exception:
pass
if isinstance(author, str) and author:
new_tags.append(f"author:{author}")
if data.get("publish_date"):
new_tags.append(f"publish_date:{data['publish_date']}")
publishers = data.get("publishers")
if isinstance(publishers, list) and publishers:
pub = publishers[0]
if isinstance(pub, dict) and pub.get("name"):
new_tags.append(f"publisher:{pub['name']}")
elif isinstance(pub, str) and pub:
new_tags.append(f"publisher:{pub}")
if "description" in data:
desc = data.get("description")
if isinstance(desc, dict) and "value" in desc:
desc = desc.get("value")
if desc:
desc_str = str(desc).strip()
if desc_str:
new_tags.append(f"description:{desc_str[:200]}")
page_count = data.get("number_of_pages")
if isinstance(page_count, int) and page_count > 0:
new_tags.append(f"pages:{page_count}")
subjects = data.get("subjects")
if isinstance(subjects, list):
for subject in subjects[:10]:
if isinstance(subject, str):
subject_clean = subject.strip()
if subject_clean and subject_clean not in new_tags:
new_tags.append(subject_clean)
identifiers = data.get("identifiers")
if isinstance(identifiers, dict):
def _first(value: Any) -> Any:
if isinstance(value, list) and value:
return value[0]
return value
for key, ns in (
("isbn_10", "isbn_10"),
("isbn_13", "isbn_13"),
("lccn", "lccn"),
("oclc_numbers", "oclc"),
("goodreads", "goodreads"),
("internet_archive", "internet_archive"),
):
val = _first(identifiers.get(key))
if val:
new_tags.append(f"{ns}:{val}")
# Some editions expose a direct Archive.org identifier as "ocaid".
ocaid = data.get("ocaid")
if isinstance(ocaid, str) and ocaid.strip():
new_tags.append(f"internet_archive:{ocaid.strip()}")
debug(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
return new_tags
def search(
self,
query: str,
@@ -1293,7 +908,7 @@ class OpenLibrary(Provider):
ia_val_local = []
ia_ids_local = [str(x) for x in ia_val_local if x]
session_local = requests.Session()
session_local = _create_archive_session()
try:
archive_id_local = _resolve_archive_id(
@@ -1423,19 +1038,38 @@ class OpenLibrary(Provider):
"borrow"}:
annotations.append(availability)
book_path = (
f"https://openlibrary.org/books/{edition_id}" if edition_id else
(
f"https://openlibrary.org{work_key}"
if isinstance(work_key, str) and work_key.startswith("/") else
"https://openlibrary.org"
)
)
metadata = {
"openlibrary_id": edition_id,
"openlibrary_key": work_key,
"authors": authors_list,
"year": year,
"isbn_10": isbn_10,
"isbn_13": isbn_13,
"ia": ia_ids,
"availability": availability,
"availability_reason": availability_reason,
"archive_id": archive_id,
"direct_url": direct_url,
"raw": doc,
}
if book_path:
metadata["selection_url"] = book_path
metadata["_selection_args"] = ["-url", book_path]
metadata["_selection_action"] = ["download-file", "-url", book_path]
results.append(
SearchResult(
table="openlibrary",
title=book_title,
path=(
f"https://openlibrary.org/books/{edition_id}" if edition_id else
(
f"https://openlibrary.org{work_key}"
if isinstance(work_key,
str) and work_key.startswith("/") else
"https://openlibrary.org"
)
),
path=book_path,
detail=(
(f"By: {', '.join(authors_list)}" if authors_list else "") +
(f" ({year})" if year else "")
@@ -1443,20 +1077,7 @@ class OpenLibrary(Provider):
annotations=annotations,
media_kind="book",
columns=columns,
full_metadata={
"openlibrary_id": edition_id,
"openlibrary_key": work_key,
"authors": authors_list,
"year": year,
"isbn_10": isbn_10,
"isbn_13": isbn_13,
"ia": ia_ids,
"availability": availability,
"availability_reason": availability_reason,
"archive_id": archive_id,
"direct_url": direct_url,
"raw": doc,
},
full_metadata=metadata,
)
)
@@ -1507,8 +1128,8 @@ class OpenLibrary(Provider):
# Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
try:
archive_meta = _fetch_archive_item_metadata(archive_id)
tags = _archive_item_metadata_to_tags(archive_id, archive_meta)
archive_meta = fetch_archive_item_metadata(archive_id)
tags = archive_item_metadata_to_tags(archive_id, archive_meta)
if tags:
try:
result.tag.update(tags)

110
SYS/json_table.py Normal file
View File

@@ -0,0 +1,110 @@
"""Helper utilities for normalizing JSON result tables.
This mirrors the intent of the existing `SYS.html_table` helper but operates on
JSON payloads (API responses, JSON APIs, etc.). It exposes:
- `extract_records` for locating and normalizing the first list of record dicts
from a JSON document.
- `normalize_record` for coercing arbitrary values into printable strings.
These helpers make it easy for providers that consume JSON to populate
`ResultModel` metadata without hand-writing ad-hoc sanitizers.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional, Sequence, Tuple
_DEFAULT_LIST_KEYS: Tuple[str, ...] = ("results", "items", "docs", "records")
def _coerce_value(value: Any) -> str:
"""Convert a JSON value into a compact string representation."""
if value is None:
return ""
if isinstance(value, bool):
return "true" if value else "false"
if isinstance(value, (list, tuple, set)):
parts = [_coerce_value(v) for v in value]
cleaned = [part for part in parts if part]
return ", ".join(cleaned)
if isinstance(value, dict):
parts: List[str] = []
for subkey, subvalue in value.items():
part = _coerce_value(subvalue)
if part:
parts.append(f"{subkey}:{part}")
return ", ".join(parts)
try:
return str(value).strip()
except Exception:
return ""
def normalize_record(record: Dict[str, Any]) -> Dict[str, str]:
"""Return a copy of ``record`` with keys lowered and values coerced to strings."""
out: Dict[str, str] = {}
if not isinstance(record, dict):
return out
for key, value in record.items():
normalized_key = str(key or "").strip().lower()
if not normalized_key:
continue
normalized_value = _coerce_value(value)
if normalized_value:
out[normalized_key] = normalized_value
return out
def _traverse(data: Any, path: Sequence[str]) -> Optional[Any]:
current = data
for key in path:
if not isinstance(current, dict):
return None
current = current.get(key)
return current
def extract_records(
data: Any,
*,
path: Optional[Sequence[str]] = None,
list_keys: Optional[Sequence[str]] = None,
) -> Tuple[List[Dict[str, str]], Optional[str]]:
"""Extract normalized record dicts from ``data``.
Args:
data: JSON document (dict/list) that may contain tabular records.
path: optional key path to traverse before looking for a list.
list_keys: candidate keys to inspect when ``path`` is not provided.
Returns:
(records, chosen_path) where ``records`` is the list of normalized dicts
and ``chosen_path`` is either the traversed path or the key that matched.
"""
list_keys = list_keys or _DEFAULT_LIST_KEYS
chosen_path: Optional[str] = None
candidates: List[Any] = []
if path:
found = _traverse(data, path)
if isinstance(found, list):
candidates = found
chosen_path = ".".join(path)
if not candidates and isinstance(data, dict):
for key in list_keys:
found = data.get(key)
if isinstance(found, list):
candidates = found
chosen_path = key
break
if not candidates and isinstance(data, list):
candidates = data
chosen_path = ""
records: List[Dict[str, str]] = []
for entry in candidates:
if isinstance(entry, dict):
records.append(normalize_record(entry))
return records, chosen_path

View File

@@ -783,56 +783,56 @@ class ResultTable:
def _add_search_result(self, row: ResultRow, result: Any) -> None:
"""Extract and add SearchResult fields to row."""
# If provider supplied explicit columns, render those and skip legacy defaults
cols = getattr(result, "columns", None)
used_explicit_columns = False
if cols:
used_explicit_columns = True
for name, value in cols:
row.add_column(name, value)
return
else:
# Core fields (legacy fallback)
title = getattr(result, "title", "")
table = str(getattr(result, "table", "") or "").lower()
# Core fields (legacy fallback)
title = getattr(result, "title", "")
table = str(getattr(result, "table", "") or "").lower()
# Handle extension separation for local files
extension = ""
if title and table == "local":
path_obj = Path(title)
if path_obj.suffix:
extension = path_obj.suffix.lstrip(".")
title = path_obj.stem
# Handle extension separation for local files
extension = ""
if title and table == "local":
path_obj = Path(title)
if path_obj.suffix:
extension = path_obj.suffix.lstrip(".")
title = path_obj.stem
if title:
row.add_column("Title", title)
if title:
row.add_column("Title", title)
# Extension column
row.add_column("Ext", extension)
# Extension column
row.add_column("Ext", extension)
if hasattr(result, "table") and getattr(result, "table", None):
row.add_column("Source", str(getattr(result, "table")))
if hasattr(result, "table") and getattr(result, "table", None):
row.add_column("Source", str(getattr(result, "table")))
if hasattr(result, "detail") and result.detail:
row.add_column("Detail", result.detail)
if hasattr(result, "detail") and result.detail:
row.add_column("Detail", result.detail)
if hasattr(result, "media_kind") and result.media_kind:
row.add_column("Type", result.media_kind)
if hasattr(result, "media_kind") and result.media_kind:
row.add_column("Type", result.media_kind)
# Tag summary
if hasattr(result, "tag_summary") and result.tag_summary:
row.add_column("Tag", str(result.tag_summary))
# Tag summary
if hasattr(result, "tag_summary") and result.tag_summary:
row.add_column("Tag", str(result.tag_summary))
# Duration (for media)
if hasattr(result, "duration_seconds") and result.duration_seconds:
dur = _format_duration_hms(result.duration_seconds)
row.add_column("Duration", dur or str(result.duration_seconds))
# Duration (for media)
if hasattr(result, "duration_seconds") and result.duration_seconds:
dur = _format_duration_hms(result.duration_seconds)
row.add_column("Duration", dur or str(result.duration_seconds))
# Size (for files)
if hasattr(result, "size_bytes") and result.size_bytes:
row.add_column("Size", _format_size(result.size_bytes, integer_only=False))
# Size (for files)
if hasattr(result, "size_bytes") and result.size_bytes:
row.add_column("Size", _format_size(result.size_bytes, integer_only=False))
# Annotations
if hasattr(result, "annotations") and result.annotations:
row.add_column("Annotations", ", ".join(str(a) for a in result.annotations))
# Annotations
if hasattr(result, "annotations") and result.annotations:
row.add_column("Annotations", ", ".join(str(a) for a in result.annotations))
try:
md = getattr(result, "full_metadata", None)

View File

@@ -61,15 +61,22 @@ class Provider:
def serialize_row(self, row: ResultModel) -> Dict[str, Any]:
r = ensure_result_model(row)
return {
metadata = r.metadata or {}
out: Dict[str, Any] = {
"title": r.title,
"path": r.path,
"ext": r.ext,
"size_bytes": r.size_bytes,
"metadata": r.metadata or {},
"metadata": metadata,
"source": r.source or self.name,
"_selection_args": self.selection_args(r),
}
selection_action = metadata.get("_selection_action") or metadata.get("selection_action")
if selection_action:
out["_selection_action"] = [
str(x) for x in selection_action if x is not None
]
return out
def serialize_rows(self, rows: Iterable[ResultModel]) -> List[Dict[str, Any]]:
return [self.serialize_row(r) for r in rows]

View File

@@ -30,6 +30,7 @@ from SYS.result_table import ResultTable
from SYS.rich_display import stderr_console as get_stderr_console
from SYS import pipeline as pipeline_context
from SYS.utils import sha256_file
from SYS.metadata import normalize_urls as normalize_url_list
from rich.prompt import Confirm
from tool.ytdlp import (
@@ -125,243 +126,6 @@ class Download_File(Cmdlet):
debug(f"[download-file] run invoked with args: {list(args)}")
return self._run_impl(result, args, config)
@staticmethod
def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
raw_url = parsed.get("url", [])
if isinstance(raw_url, str):
raw_url = [raw_url]
expanded_urls: List[str] = []
for u in raw_url or []:
if u is None:
continue
s = str(u).strip()
if not s:
continue
if "," in s:
parts = [p.strip() for p in s.split(",")]
expanded_urls.extend([p for p in parts if p])
else:
expanded_urls.append(s)
return expanded_urls
@staticmethod
def _rewrite_archive_org_urls(raw_urls: Sequence[str]) -> List[str]:
"""Rewrite Archive.org URLs using metadata JSON to pick the right flow.
- /metadata/<id>:
- if lendable (collection contains inlibrary/printdisabled/lendinglibrary) -> /borrow/<id>
- else -> /details/<id>
- /details/<id>:
- if lendable -> /borrow/<id>
This makes `download-file` do the right thing for borrow-only items.
"""
out: List[str] = []
for u in list(raw_urls or []):
s = str(u or "").strip()
if not s:
continue
try:
p = urlparse(s)
host = (p.hostname or "").strip().lower()
path = (p.path or "").strip()
except Exception:
out.append(s)
continue
if not host or (host != "archive.org" and not host.endswith(".archive.org")):
out.append(s)
continue
low_path = path.lower().strip()
if not (low_path.startswith("/metadata/") or low_path.startswith("/details/")):
out.append(s)
continue
parts = [x for x in path.split("/") if x]
if len(parts) < 2:
out.append(s)
continue
head = str(parts[0] or "").strip().lower()
archive_id = str(parts[1] or "").strip()
if head not in {"metadata", "details"} or not archive_id:
out.append(s)
continue
lendable = False
try:
meta_url = f"https://archive.org/metadata/{archive_id}"
resp = requests.get(meta_url, timeout=8)
resp.raise_for_status()
data = resp.json() if resp is not None else {}
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
collection = meta.get("collection") if isinstance(meta, dict) else None
values: List[str] = []
if isinstance(collection, list):
values = [str(x).strip().lower() for x in collection if str(x).strip()]
elif isinstance(collection, str):
values = [collection.strip().lower()] if collection.strip() else []
lendable = any(v in {"inlibrary", "lendinglibrary"} for v in values)
except Exception:
lendable = False
if lendable:
debug(f"[download-file] archive.org item '{archive_id}' looks lendable; using borrow flow")
out.append(f"https://archive.org/borrow/{archive_id}")
continue
# Non-lendable: turn metadata URLs into details URLs so IA picker can show files.
if head == "metadata":
out.append(f"https://archive.org/details/{archive_id}")
continue
out.append(s)
return out
@staticmethod
def _collect_piped_items_if_no_urls(result: Any,
raw_urls: Sequence[str]) -> List[Any]:
if raw_urls:
return []
if isinstance(result, list):
return list(result)
if result:
return [result]
return []
@staticmethod
def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int:
try:
return int(len(raw_urls or []) + len(piped_items or []))
except Exception:
return 1
@staticmethod
def _build_preview(
raw_urls: Sequence[str],
piped_items: Sequence[Any],
total_items: int
) -> List[Any]:
try:
preview: List[Any] = []
preview.extend(list(raw_urls or [])[:max(0, total_items)])
if len(preview) < total_items:
preview.extend(
list(piped_items or [])[:max(0,
total_items - len(preview))]
)
return preview
except Exception:
return []
@staticmethod
def _load_provider_registry() -> Dict[str, Any]:
try:
from ProviderCore.registry import (
get_search_provider as _get_search_provider,
get_provider as _get_provider,
match_provider_name_for_url as _match_provider_name_for_url,
SearchResult as _SearchResult,
)
return {
"get_search_provider": _get_search_provider,
"get_provider": _get_provider,
"match_provider_name_for_url": _match_provider_name_for_url,
"SearchResult": _SearchResult,
}
except Exception:
return {
"get_search_provider": None,
"get_provider": None,
"match_provider_name_for_url": None,
"SearchResult": None,
}
@staticmethod
def _path_from_download_result(result_obj: Any) -> Path:
file_path = None
if hasattr(result_obj, "path"):
file_path = getattr(result_obj, "path")
elif isinstance(result_obj, dict):
file_path = result_obj.get("path")
if not file_path:
file_path = str(result_obj)
return Path(str(file_path))
def _emit_local_file(
self,
*,
downloaded_path: Path,
source: Optional[str],
title_hint: Optional[str],
tags_hint: Optional[List[str]],
media_kind_hint: Optional[str],
full_metadata: Optional[Dict[str,
Any]],
progress: PipelineProgress,
config: Dict[str,
Any],
provider_hint: Optional[str] = None,
) -> None:
title_val = (title_hint or downloaded_path.stem
or "Unknown").strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
notes: Optional[Dict[str, str]] = None
try:
if isinstance(full_metadata, dict):
subtitles = full_metadata.get("_tidal_lyrics_subtitles")
if isinstance(subtitles, str) and subtitles.strip():
notes = {"lyric": subtitles}
except Exception:
notes = None
tag: List[str] = []
if tags_hint:
tag.extend([str(t) for t in tags_hint if t])
if not any(str(t).lower().startswith("title:") for t in tag):
tag.insert(0, f"title:{title_val}")
payload: Dict[str,
Any] = {
"path": str(downloaded_path),
"hash": hash_value,
"title": title_val,
"action": "cmdlet:download-file",
"download_mode": "file",
"store": "local",
"media_kind": media_kind_hint or "file",
"tag": tag,
}
if provider_hint:
payload["provider"] = str(provider_hint)
if full_metadata:
payload["full_metadata"] = full_metadata
if notes:
payload["notes"] = notes
if source and str(source).startswith("http"):
payload["url"] = source
elif source:
payload["source_url"] = source
pipeline_context.emit(payload)
# When running with a local progress UI (standalone cmdlet), ensure
# the pipe advances on emit.
progress.on_emit(payload)
# Automatically register url with local library
if payload.get("url"):
pipe_obj = coerce_to_pipe_object(payload)
register_url_with_local_library(pipe_obj, config)
def _process_explicit_urls(
self,
*,
@@ -373,6 +137,7 @@ class Download_File(Cmdlet):
registry: Dict[str,
Any],
progress: PipelineProgress,
context_items: Sequence[Any] = (),
) -> tuple[int,
Optional[int]]:
downloaded_count = 0
@@ -381,6 +146,12 @@ class Download_File(Cmdlet):
get_provider = registry.get("get_provider")
match_provider_name_for_url = registry.get("match_provider_name_for_url")
context_items_list: List[Any]
try:
context_items_list = list(context_items) if context_items else []
except Exception:
context_items_list = []
for url in raw_urls:
try:
debug(f"Processing URL: {url}")
@@ -521,14 +292,15 @@ class Download_File(Cmdlet):
if provider_name and get_provider is not None and SearchResult is not None:
# OpenLibrary URLs should be handled by the OpenLibrary provider.
if provider_name == "openlibrary":
url_str = str(url).strip()
provider = get_provider("openlibrary", config)
if provider is None:
raise DownloadError(
"OpenLibrary provider not configured or not available"
)
edition_id = ol_provider.edition_id_from_url(str(url))
title_hint = ol_provider.title_hint_from_url_slug(str(url))
edition_id = ol_provider.edition_id_from_url(url_str)
title_hint = ol_provider.title_hint_from_url_slug(url_str)
download_payload: Optional[Dict[str, Any]] = None
try:
@@ -596,9 +368,95 @@ class Download_File(Cmdlet):
progress_cb = _progress
if hasattr(provider, "download_url"):
# Prefer piped OpenLibrary context (selection row) when present so we keep
# resolved metadata like archive_id and availability.
ctx_item = None
ctx_md: Dict[str, Any] = {}
ctx_title: Optional[str] = None
ctx_tags: Optional[List[str]] = None
ctx_media_kind: Optional[str] = None
for candidate in context_items_list:
try:
table_val = get_field(candidate, "table")
except Exception:
table_val = None
if str(table_val or "").lower() != "openlibrary":
continue
md_val = get_field(candidate, "full_metadata")
md_dict = md_val if isinstance(md_val, dict) else {}
cand_olid = str(md_dict.get("openlibrary_id") or md_dict.get("olid") or "").strip()
cand_archive = str(md_dict.get("archive_id") or "").strip()
cand_url = str(
get_field(candidate, "path")
or get_field(candidate, "url")
or md_dict.get("selection_url")
or ""
).strip()
matched = False
if edition_id and cand_olid and cand_olid == edition_id:
matched = True
elif cand_url and url_str and cand_url == url_str:
matched = True
elif (not edition_id) and cand_archive and cand_archive in url_str:
matched = True
if matched:
ctx_item = candidate
ctx_md = md_dict
ctx_title = get_field(candidate, "title")
ctx_media_kind = get_field(candidate, "media_kind")
tags_val = get_field(candidate, "tag")
if isinstance(tags_val, list):
ctx_tags = [str(t) for t in tags_val if t]
break
if ctx_item is not None and SearchResult is not None:
sr_meta = dict(ctx_md) if isinstance(ctx_md, dict) else {}
if edition_id and not sr_meta.get("openlibrary_id"):
sr_meta["openlibrary_id"] = edition_id
sr_title = str(ctx_title or title_hint or "").strip() or title_hint
sr_media_kind = str(ctx_media_kind or "book")
sr_obj = (
ctx_item
if isinstance(ctx_item, SearchResult)
else SearchResult(
table="openlibrary",
title=sr_title,
path=url_str,
media_kind=sr_media_kind,
full_metadata=sr_meta,
)
)
try:
sr_obj.path = url_str # type: ignore[attr-defined]
except Exception:
pass
try:
if ctx_tags:
sr_obj.tag = set(ctx_tags) # type: ignore[attr-defined]
except Exception:
pass
downloaded_path = provider.download(
sr_obj,
final_output_dir,
progress_callback=progress_cb
) # type: ignore[call-arg]
if downloaded_path:
download_payload = {
"path": Path(downloaded_path),
"search_result": sr_obj,
}
if download_payload is None and hasattr(provider, "download_url"):
download_payload = provider.download_url( # type: ignore[attr-defined]
str(url),
url_str,
final_output_dir,
progress_cb,
)
@@ -606,12 +464,12 @@ class Download_File(Cmdlet):
if download_payload is None:
sr = None
if hasattr(provider, "search_result_from_url"):
sr = provider.search_result_from_url(str(url)) # type: ignore[attr-defined]
sr = provider.search_result_from_url(url_str) # type: ignore[attr-defined]
if sr is None:
sr = SearchResult(
table="openlibrary",
title=title_hint,
path=str(url),
path=url_str,
media_kind="book",
full_metadata={
"openlibrary_id": edition_id,
@@ -811,6 +669,97 @@ class Download_File(Cmdlet):
downloaded_count += 1
continue
if provider_name and get_provider is not None and SearchResult is not None:
provider = get_provider(provider_name, config)
if provider is not None and hasattr(provider, "download_url"):
try:
downloaded_path = provider.download_url(
str(url),
final_output_dir
) # type: ignore[attr-defined]
except Exception as exc:
raise DownloadError(str(exc))
if downloaded_path:
self._emit_local_file(
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=Path(str(downloaded_path)).stem,
tags_hint=None,
media_kind_hint="file",
full_metadata=None,
provider_hint=str(provider_name),
progress=progress,
config=config,
)
downloaded_count += 1
continue
if provider is not None:
sr_obj = None
try:
sr_obj = SearchResult(
table=str(provider_name),
title=str(url),
path=str(url),
full_metadata={},
)
downloaded_path = provider.download(
sr_obj,
final_output_dir
) # type: ignore[call-arg]
except Exception:
downloaded_path = None
if (not downloaded_path
) and str(provider_name).lower() == "libgen":
raise DownloadError(
"LibGen URL did not resolve to a downloadable file"
)
if downloaded_path:
emit_tags: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
title_hint = Path(str(downloaded_path)).stem
media_kind_hint = "file"
if str(provider_name
).lower() == "libgen" and sr_obj is not None:
media_kind_hint = "book"
try:
sr_tags = getattr(sr_obj, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
emit_tags = sorted(
[str(t) for t in sr_tags if t]
)
except Exception:
emit_tags = None
try:
sr_full_md = getattr(sr_obj, "full_metadata", None)
if isinstance(sr_full_md, dict):
full_md = sr_full_md
t = str(sr_full_md.get("title") or "").strip()
if t:
title_hint = t
except Exception:
full_md = None
self._emit_local_file(
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=title_hint,
tags_hint=emit_tags,
media_kind_hint=media_kind_hint,
full_metadata=full_md,
provider_hint=str(provider_name),
progress=progress,
config=config,
)
downloaded_count += 1
continue
result_obj = _download_direct_file(
str(url),
final_output_dir,
@@ -1237,6 +1186,170 @@ class Download_File(Cmdlet):
return downloaded_count, queued_magnet_submissions
@staticmethod
def _path_from_download_result(result_obj: Any) -> Path:
file_path = None
if hasattr(result_obj, "path"):
file_path = getattr(result_obj, "path")
elif isinstance(result_obj, dict):
file_path = result_obj.get("path")
if not file_path:
file_path = str(result_obj)
return Path(str(file_path))
def _emit_local_file(
self,
*,
downloaded_path: Path,
source: Optional[str],
title_hint: Optional[str],
tags_hint: Optional[List[str]],
media_kind_hint: Optional[str],
full_metadata: Optional[Dict[str, Any]],
progress: PipelineProgress,
config: Dict[str, Any],
provider_hint: Optional[str] = None,
) -> None:
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
notes: Optional[Dict[str, str]] = None
try:
if isinstance(full_metadata, dict):
subtitles = full_metadata.get("_tidal_lyrics_subtitles")
if isinstance(subtitles, str) and subtitles.strip():
notes = {"lyric": subtitles}
except Exception:
notes = None
tag: List[str] = []
if tags_hint:
tag.extend([str(t) for t in tags_hint if t])
if not any(str(t).lower().startswith("title:") for t in tag):
tag.insert(0, f"title:{title_val}")
payload: Dict[str, Any] = {
"path": str(downloaded_path),
"hash": hash_value,
"title": title_val,
"action": "cmdlet:download-file",
"download_mode": "file",
"store": "local",
"media_kind": media_kind_hint or "file",
"tag": tag,
}
if provider_hint:
payload["provider"] = str(provider_hint)
if full_metadata:
payload["full_metadata"] = full_metadata
if notes:
payload["notes"] = notes
if source and str(source).startswith("http"):
payload["url"] = source
elif source:
payload["source_url"] = source
pipeline_context.emit(payload)
@staticmethod
def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
urls: List[str] = []
url_value: Any = None
if isinstance(parsed, dict):
url_value = parsed.get("url")
try:
urls = normalize_url_list(url_value)
except Exception:
urls = []
if not urls and isinstance(parsed, dict):
query_val = parsed.get("query")
try:
if isinstance(query_val, str) and query_val.strip().lower().startswith("url:"):
urls = normalize_url_list(query_val)
except Exception:
pass
return urls
@staticmethod
def _collect_piped_items_if_no_urls(result: Any, raw_url: Sequence[str]) -> List[Any]:
if raw_url:
return []
if result is None:
return []
if isinstance(result, list):
return list(result)
return [result]
@staticmethod
def _load_provider_registry() -> Dict[str, Any]:
"""Lightweight accessor for provider helpers without hard dependencies."""
try:
from ProviderCore import registry as provider_registry # type: ignore
from ProviderCore.base import SearchResult # type: ignore
return {
"get_provider": getattr(provider_registry, "get_provider", None),
"get_search_provider": getattr(provider_registry, "get_search_provider", None),
"match_provider_name_for_url": getattr(provider_registry, "match_provider_name_for_url", None),
"SearchResult": SearchResult,
}
except Exception:
return {
"get_provider": None,
"get_search_provider": None,
"match_provider_name_for_url": None,
"SearchResult": None,
}
@staticmethod
def _safe_total_items(raw_url: Sequence[str], piped_items: Sequence[Any]) -> int:
"""Return a sane item count for progress display."""
try:
url_count = len(raw_url or [])
except Exception:
url_count = 0
try:
piped_count = len(piped_items or [])
except Exception:
piped_count = 0
total = url_count + piped_count
return total if total > 0 else 1
@staticmethod
def _build_preview(raw_url: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[str]:
"""Construct a short preview list for the local progress UI."""
preview: List[str] = []
try:
for url in raw_url or []:
if len(preview) >= 5:
break
preview.append(str(url))
except Exception:
pass
if len(preview) < 5:
try:
items = piped_items if isinstance(piped_items, list) else list(piped_items or [])
except Exception:
items = []
for item in items:
if len(preview) >= 5:
break
try:
label = get_field(item, "title") or get_field(item, "path") or get_field(item, "url")
except Exception:
label = None
if label:
preview.append(str(label))
# If we still have nothing, supply a generic placeholder to avoid empty previews.
if not preview and total_items:
preview.append(f"{total_items} item(s)")
return preview
# === Streaming helpers (yt-dlp) ===
@staticmethod
@@ -3531,7 +3644,6 @@ class Download_File(Cmdlet):
parsed = parse_cmdlet_args(args, self)
raw_url = self._normalize_urls(parsed)
raw_url = self._rewrite_archive_org_urls(raw_url)
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
had_piped_input = False
@@ -3660,6 +3772,7 @@ class Download_File(Cmdlet):
quiet_mode=quiet_mode,
registry=registry,
progress=progress,
context_items=(result if isinstance(result, list) else ([result] if result else [])),
)
downloaded_count += int(urls_downloaded)
if early_exit is not None:

View File

@@ -14,15 +14,12 @@ import sys
from SYS.logger import log, debug
try:
from Provider.openlibrary import OpenLibrary
_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
_ol_scrape_openlibrary_metadata = OpenLibrary.scrape_openlibrary_metadata
except Exception:
_ol_scrape_isbn_metadata = None # type: ignore[assignment]
_ol_scrape_openlibrary_metadata = None # type: ignore[assignment]
from Provider.metadata_provider import get_metadata_provider, list_metadata_providers
from Provider.metadata_provider import (
get_metadata_provider,
list_metadata_providers,
scrape_isbn_metadata,
scrape_openlibrary_metadata,
)
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
@@ -270,9 +267,6 @@ def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
return candidates[0] if candidates else None
_scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment]
_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment]
# Tag item for ResultTable display and piping
from dataclasses import dataclass
@@ -1039,22 +1033,16 @@ def _extract_url_formats(formats: list) -> List[Tuple[str, str]]:
def _scrape_isbn_metadata(isbn: str) -> List[str]:
if _ol_scrape_isbn_metadata is None:
log("OpenLibrary scraper unavailable", file=sys.stderr)
return []
try:
return list(_ol_scrape_isbn_metadata(isbn))
return list(scrape_isbn_metadata(isbn))
except Exception as e:
log(f"ISBN scraping error: {e}", file=sys.stderr)
return []
def _scrape_openlibrary_metadata(olid: str) -> List[str]:
if _ol_scrape_openlibrary_metadata is None:
log("OpenLibrary scraper unavailable", file=sys.stderr)
return []
try:
return list(_ol_scrape_openlibrary_metadata(olid))
return list(scrape_openlibrary_metadata(olid))
except Exception as e:
log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
return []