dfdfsdd

2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions
--- a/cmdlet/get_tag.py
+++ b/cmdlet/get_tag.py
@@ -12,7 +12,13 @@ from __future__ import annotations

 import sys

-from SYS.logger import log, debug
+try:
+	from Provider.openlibrary import OpenLibrary
+	_ol_scrape_isbn_metadata = OpenLibrary.scrape_isbn_metadata
+	_ol_scrape_openlibrary_metadata = OpenLibrary.scrape_openlibrary_metadata
+except Exception:
+	_ol_scrape_isbn_metadata = None  # type: ignore[assignment]
+	_ol_scrape_openlibrary_metadata = None  # type: ignore[assignment]
 from Provider.metadata_provider import get_metadata_provider, list_metadata_providers
 import subprocess
 from pathlib import Path
@@ -31,6 +37,10 @@ except ImportError:
 	extract_title = None


+_scrape_isbn_metadata = _ol_scrape_isbn_metadata  # type: ignore[assignment]
+_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata  # type: ignore[assignment]
+
+



@@ -691,249 +701,22 @@ def _extract_url_formats(formats: list) -> List[Tuple[str, str]]:


 def _scrape_isbn_metadata(isbn: str) -> List[str]:
-	"""Scrape metadata for an ISBN using Open Library API."""
-	new_tags = []
+	if _ol_scrape_isbn_metadata is None:
+		log("OpenLibrary scraper unavailable", file=sys.stderr)
+		return []
 	try:
-		from ..API.HTTP import HTTPClient
-		import json as json_module
-		
-		isbn_clean = isbn.replace('-', '').strip()
-		url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
-		
-		try:
-			with HTTPClient() as client:
-				response = client.get(url)
-				response.raise_for_status()
-				data = json_module.loads(response.content.decode('utf-8'))
-		except Exception as e:
-			log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr)
-			return []
-		
-		if not data:
-			log(f"No ISBN metadata found for: {isbn}")
-			return []
-		
-		book_data = next(iter(data.values()), None)
-		if not book_data:
-			return []
-		
-		if 'title' in book_data:
-			new_tags.append(f"title:{book_data['title']}")
-		
-		if 'authors' in book_data and isinstance(book_data['authors'], list):
-			for author in book_data['authors'][:3]:
-				if 'name' in author:
-					new_tags.append(f"author:{author['name']}")
-		
-		if 'publish_date' in book_data:
-			new_tags.append(f"publish_date:{book_data['publish_date']}")
-		
-		if 'publishers' in book_data and isinstance(book_data['publishers'], list):
-			for pub in book_data['publishers'][:1]:
-				if 'name' in pub:
-					new_tags.append(f"publisher:{pub['name']}")
-		
-		if 'description' in book_data:
-			desc = book_data['description']
-			if isinstance(desc, dict) and 'value' in desc:
-				desc = desc['value']
-			if desc:
-				desc_str = str(desc).strip()
-				# Include description if available (limit to 200 chars to keep it manageable)
-				if len(desc_str) > 0:
-					new_tags.append(f"description:{desc_str[:200]}")
-		
-		if 'number_of_pages' in book_data:
-			page_count = book_data['number_of_pages']
-			if page_count and isinstance(page_count, int) and page_count > 0:
-				new_tags.append(f"pages:{page_count}")
-		
-		if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict):
-			identifiers = book_data['identifiers']
-			
-			if 'openlibrary' in identifiers:
-				ol_ids = identifiers['openlibrary']
-				if isinstance(ol_ids, list) and ol_ids:
-					new_tags.append(f"openlibrary:{ol_ids[0]}")
-				elif isinstance(ol_ids, str):
-					new_tags.append(f"openlibrary:{ol_ids}")
-			
-			if 'lccn' in identifiers:
-				lccn_list = identifiers['lccn']
-				if isinstance(lccn_list, list) and lccn_list:
-					new_tags.append(f"lccn:{lccn_list[0]}")
-				elif isinstance(lccn_list, str):
-					new_tags.append(f"lccn:{lccn_list}")
-			
-			if 'oclc' in identifiers:
-				oclc_list = identifiers['oclc']
-				if isinstance(oclc_list, list) and oclc_list:
-					new_tags.append(f"oclc:{oclc_list[0]}")
-				elif isinstance(oclc_list, str):
-					new_tags.append(f"oclc:{oclc_list}")
-			
-			if 'goodreads' in identifiers:
-				goodreads_list = identifiers['goodreads']
-				if isinstance(goodreads_list, list) and goodreads_list:
-					new_tags.append(f"goodreads:{goodreads_list[0]}")
-				elif isinstance(goodreads_list, str):
-					new_tags.append(f"goodreads:{goodreads_list}")
-			
-			if 'librarything' in identifiers:
-				lt_list = identifiers['librarything']
-				if isinstance(lt_list, list) and lt_list:
-					new_tags.append(f"librarything:{lt_list[0]}")
-				elif isinstance(lt_list, str):
-					new_tags.append(f"librarything:{lt_list}")
-			
-			if 'doi' in identifiers:
-				doi_list = identifiers['doi']
-				if isinstance(doi_list, list) and doi_list:
-					new_tags.append(f"doi:{doi_list[0]}")
-				elif isinstance(doi_list, str):
-					new_tags.append(f"doi:{doi_list}")
-			
-			if 'internet_archive' in identifiers:
-				ia_list = identifiers['internet_archive']
-				if isinstance(ia_list, list) and ia_list:
-					new_tags.append(f"internet_archive:{ia_list[0]}")
-				elif isinstance(ia_list, str):
-					new_tags.append(f"internet_archive:{ia_list}")
-		
-		log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
-		return new_tags
+		return list(_ol_scrape_isbn_metadata(isbn))
 	except Exception as e:
 		log(f"ISBN scraping error: {e}", file=sys.stderr)
 		return []


 def _scrape_openlibrary_metadata(olid: str) -> List[str]:
-	"""Scrape metadata for an OpenLibrary ID using the .json API endpoint.
-	
-	Fetches from https://openlibrary.org/books/{OLID}.json and extracts:
-	- Title, authors, publish date, publishers
-	- Description
-	- Subjects as freeform tags (without namespace prefix)
-	- Identifiers (ISBN, LCCN, OCLC, etc.)
-	"""
-	new_tags = []
+	if _ol_scrape_openlibrary_metadata is None:
+		log("OpenLibrary scraper unavailable", file=sys.stderr)
+		return []
 	try:
-		from ..API.HTTP import HTTPClient
-		import json as json_module
-		
-		# Format: OL9674499M or just 9674499M
-		olid_clean = olid.replace('OL', '').replace('M', '')
-		if not olid_clean.isdigit():
-			olid_clean = olid
-		
-		# Ensure we have the full OLID format for the URL
-		if not olid.startswith('OL'):
-			url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
-		else:
-			url = f"https://openlibrary.org/books/{olid}.json"
-		
-		try:
-			with HTTPClient() as client:
-				response = client.get(url)
-				response.raise_for_status()
-				data = json_module.loads(response.content.decode('utf-8'))
-		except Exception as e:
-			log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr)
-			return []
-		
-		if not data:
-			log(f"No OpenLibrary metadata found for: {olid}")
-			return []
-		
-		# Add title
-		if 'title' in data:
-			new_tags.append(f"title:{data['title']}")
-		
-		# Add authors
-		if 'authors' in data and isinstance(data['authors'], list):
-			for author in data['authors'][:3]:
-				if isinstance(author, dict) and 'name' in author:
-					new_tags.append(f"author:{author['name']}")
-				elif isinstance(author, str):
-					new_tags.append(f"author:{author}")
-		
-		# Add publish date
-		if 'publish_date' in data:
-			new_tags.append(f"publish_date:{data['publish_date']}")
-		
-		# Add publishers
-		if 'publishers' in data and isinstance(data['publishers'], list):
-			for pub in data['publishers'][:1]:
-				if isinstance(pub, dict) and 'name' in pub:
-					new_tags.append(f"publisher:{pub['name']}")
-				elif isinstance(pub, str):
-					new_tags.append(f"publisher:{pub}")
-		
-		# Add description
-		if 'description' in data:
-			desc = data['description']
-			if isinstance(desc, dict) and 'value' in desc:
-				desc = desc['value']
-			if desc:
-				desc_str = str(desc).strip()
-				if len(desc_str) > 0:
-					new_tags.append(f"description:{desc_str[:200]}")
-		
-		# Add number of pages
-		if 'number_of_pages' in data:
-			page_count = data['number_of_pages']
-			if page_count and isinstance(page_count, int) and page_count > 0:
-				new_tags.append(f"pages:{page_count}")
-		
-		# Add subjects as FREEFORM tags (no namespace prefix)
-		if 'subjects' in data and isinstance(data['subjects'], list):
-			for subject in data['subjects'][:10]:
-				if subject and isinstance(subject, str):
-					subject_clean = str(subject).strip()
-					if subject_clean and subject_clean not in new_tags:
-						new_tags.append(subject_clean)
-		
-		# Add identifiers
-		if 'identifiers' in data and isinstance(data['identifiers'], dict):
-			identifiers = data['identifiers']
-			
-			if 'isbn_10' in identifiers:
-				isbn_10_list = identifiers['isbn_10']
-				if isinstance(isbn_10_list, list) and isbn_10_list:
-					new_tags.append(f"isbn_10:{isbn_10_list[0]}")
-				elif isinstance(isbn_10_list, str):
-					new_tags.append(f"isbn_10:{isbn_10_list}")
-			
-			if 'isbn_13' in identifiers:
-				isbn_13_list = identifiers['isbn_13']
-				if isinstance(isbn_13_list, list) and isbn_13_list:
-					new_tags.append(f"isbn_13:{isbn_13_list[0]}")
-				elif isinstance(isbn_13_list, str):
-					new_tags.append(f"isbn_13:{isbn_13_list}")
-			
-			if 'lccn' in identifiers:
-				lccn_list = identifiers['lccn']
-				if isinstance(lccn_list, list) and lccn_list:
-					new_tags.append(f"lccn:{lccn_list[0]}")
-				elif isinstance(lccn_list, str):
-					new_tags.append(f"lccn:{lccn_list}")
-			
-			if 'oclc_numbers' in identifiers:
-				oclc_list = identifiers['oclc_numbers']
-				if isinstance(oclc_list, list) and oclc_list:
-					new_tags.append(f"oclc:{oclc_list[0]}")
-				elif isinstance(oclc_list, str):
-					new_tags.append(f"oclc:{oclc_list}")
-			
-			if 'goodreads' in identifiers:
-				goodreads_list = identifiers['goodreads']
-				if isinstance(goodreads_list, list) and goodreads_list:
-					new_tags.append(f"goodreads:{goodreads_list[0]}")
-				elif isinstance(goodreads_list, str):
-					new_tags.append(f"goodreads:{goodreads_list}")
-		
-		log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
-		return new_tags
+		return list(_ol_scrape_openlibrary_metadata(olid))
 	except Exception as e:
 		log(f"OpenLibrary scraping error: {e}", file=sys.stderr)
 		return []