This commit is contained in:
2026-01-19 06:24:09 -08:00
parent a961ac3ce7
commit 7ddf0065d1
45 changed files with 627 additions and 411 deletions

View File

@@ -220,11 +220,11 @@ def extract_records(doc_or_html: Any, base_url: Optional[str] = None, xpaths: Op
records: List[Dict[str, str]] = []
for row in rows:
nr: Dict[str, str] = {}
row_norm: Dict[str, str] = {}
for k, v in (row or {}).items():
nk = normalize_header(str(k or ""))
nr[nk] = (str(v).strip() if v is not None else "")
records.append(nr)
row_norm[nk] = (str(v).strip() if v is not None else "")
records.append(row_norm)
# Attempt to recover hrefs by matching anchor text -> href
try:
@@ -265,11 +265,11 @@ def extract_records(doc_or_html: Any, base_url: Optional[str] = None, xpaths: Op
# Normalize keys (map platform->system etc)
normed: List[Dict[str, str]] = []
for r in records:
nr: Dict[str, str] = {}
norm_row: Dict[str, str] = {}
for k, v in (r or {}).items():
nk = normalize_header(k)
nr[nk] = v
normed.append(nr)
norm_row[nk] = v
normed.append(norm_row)
return normed, chosen