This commit is contained in:
2026-01-31 19:00:04 -08:00
parent dcf16e0cc4
commit 6513a3ad04
25 changed files with 617 additions and 397 deletions

View File

@@ -428,7 +428,8 @@ class FlorenceVisionTool:
if self._model is not None and not hasattr(self._model, "_supports_sdpa"):
setattr(self._model, "_supports_sdpa", False)
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to set model compatibility flag _supports_sdpa")
try:
self._model.to(device) # type: ignore[union-attr]
@@ -439,7 +440,8 @@ class FlorenceVisionTool:
try:
self._model.eval() # type: ignore[union-attr]
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to set Florence model to eval mode")
try:
md = getattr(self._model, "device", None)
@@ -450,7 +452,8 @@ class FlorenceVisionTool:
dt = None
debug(f"[florencevision] Model loaded: device={md} param_dtype={dt}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to inspect Florence model device/dtype")
def tags_for_image(self, media_path: Path) -> List[str]:
"""Return Florence-derived tags for an image.
@@ -472,7 +475,8 @@ class FlorenceVisionTool:
try:
debug(f"[florencevision] Task prompt: {prompt}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to emit debug Task prompt for FlorenceVision")
max_tags = max(0, int(self.defaults.max_tags or 0))
@@ -487,7 +491,8 @@ class FlorenceVisionTool:
try:
debug(f"[florencevision] Image loaded: mode={image.mode} size={image.width}x{image.height}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to emit debug for image load")
processor = self._processor
model = self._model
@@ -544,19 +549,22 @@ class FlorenceVisionTool:
)
continue
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to debug tensor shape for processor key '%s'", k)
if isinstance(v, (list, tuple)):
has_none = any(x is None for x in v)
debug(f"[florencevision] {k}: {type(v).__name__} len={len(v)} has_none={has_none}")
continue
debug(f"[florencevision] {k}: type={type(v).__name__}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed while inspecting processor output keys")
try:
inputs = inputs.to(model.device) # type: ignore[attr-defined]
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to move processor inputs to device %s", getattr(model, 'device', None))
# Align floating-point input tensors with the model's parameter dtype.
try:
@@ -575,7 +583,8 @@ class FlorenceVisionTool:
except Exception:
continue
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to inspect/align model dtype for Florence inputs")
try:
gen_inputs_all = {k: v for k, v in dict(inputs).items() if v is not None}
@@ -602,7 +611,8 @@ class FlorenceVisionTool:
):
gen_inputs["attention_mask"] = attention_mask
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to reconcile attention mask shape with input_ids for Florence processor")
try:
debug(
@@ -612,18 +622,21 @@ class FlorenceVisionTool:
f"pixel_attention_mask={'pixel_attention_mask' in forward_params}"
)
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to debug model forward supports")
try:
gen_inputs.setdefault("use_cache", False)
gen_inputs.setdefault("num_beams", 1)
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to set default gen_inputs values")
try:
debug(f"[florencevision] generate kwargs: {sorted(list(gen_inputs.keys()))}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to debug generate kwargs")
pv = gen_inputs.get("pixel_values")
if pv is None:
@@ -654,7 +667,8 @@ class FlorenceVisionTool:
if not hasattr(model, "_supports_sdpa"):
setattr(model, "_supports_sdpa", False)
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to patch model _supports_sdpa flag in retry handler")
generated_ids = _do_generate(gen_inputs)
elif "NoneType" in msg and "shape" in msg:
retry_inputs = dict(gen_inputs)
@@ -676,7 +690,8 @@ class FlorenceVisionTool:
):
retry_inputs["attention_mask"] = am
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed while filling retry_inputs attention_mask in AttributeError handler")
try:
import torch
@@ -692,14 +707,16 @@ class FlorenceVisionTool:
elif "pixel_attention_mask" in forward_params and "pixel_attention_mask" not in retry_inputs:
retry_inputs["pixel_attention_mask"] = mask
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to build mask or adjust retry_inputs in AttributeError handler")
try:
debug(
f"[florencevision] generate retry kwargs: {sorted(list(retry_inputs.keys()))}"
)
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to debug generate retry kwargs")
generated_ids = _do_generate(retry_inputs)
else:
@@ -708,7 +725,8 @@ class FlorenceVisionTool:
try:
debug(f"[florencevision] generated_ids type={type(generated_ids).__name__}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to debug generated_ids type")
seq = getattr(generated_ids, "sequences", generated_ids)
generated_text = processor.batch_decode(seq, skip_special_tokens=False)[0]
@@ -719,7 +737,8 @@ class FlorenceVisionTool:
debug(f"[florencevision] prompt run failed: {type(exc).__name__}: {exc}")
debug("[florencevision] traceback:\n" + traceback.format_exc())
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to emit debug for prompt run failure: %s", exc)
raise
parsed = None
@@ -766,12 +785,14 @@ class FlorenceVisionTool:
debug(f"[florencevision] post_process[{k!r}] type={type(parsed.get(k)).__name__}")
debug("[florencevision] post_process[key] repr:\n" + _debug_repr(parsed.get(k)))
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed while debugging parsed post_process output for prompt %s", task_prompt)
else:
debug(f"[florencevision] post_process_generation: type={type(parsed).__name__}")
debug("[florencevision] post_process repr:\n" + _debug_repr(parsed))
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to post-process generated output for prompt %s", task_prompt)
return generated_text, parsed, seq
@@ -800,7 +821,8 @@ class FlorenceVisionTool:
try:
debug(f"[florencevision] candidate label strings ({len(labels)}): {labels!r}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to emit candidate label strings debug")
out: List[str] = []
seen: set[str] = set()
@@ -848,7 +870,8 @@ class FlorenceVisionTool:
for raw_lab, cleaned, reason in dropped:
debug(f"[florencevision] drop reason={reason} raw={raw_lab!r} cleaned={cleaned!r}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to emit cleaned/dropped tags debug info")
return labels, caption_candidates, out, dropped
@@ -871,7 +894,12 @@ class FlorenceVisionTool:
try:
return max(cleaned, key=lambda s: len(str(s)), default=None)
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to choose best caption from cleaned candidates")
try:
return max(raw, key=lambda s: len(str(s)), default=None)
except Exception:
return None
try:
return max(raw, key=lambda s: len(str(s)), default=None)
except Exception:
@@ -936,7 +964,8 @@ class FlorenceVisionTool:
try:
debug(f"[florencevision] grounding prompt: {grounding_prompt}")
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to emit grounding prompt debug")
grd_text, grd_parsed, _grd_seq = _run_prompt(grounding_prompt)
_grd_labels, grd_captions, grd_cleaned, _grd_dropped = _extract_labels_and_captions(grounding_prompt, grd_text, grd_parsed)
@@ -962,6 +991,8 @@ class FlorenceVisionTool:
is_combo = "<|detailed_caption|>" in prompt and "<|grounding|>" in prompt
only_task_tokens = not final_tags or all(t in {"object_detection", "grounding", "tag"} for t in final_tags)
except Exception:
from SYS.logger import logger
logger.exception("Failed to compute is_combo/only_task_tokens for prompt '%s'", prompt)
is_combo = False
only_task_tokens = False
@@ -973,13 +1004,15 @@ class FlorenceVisionTool:
try:
self.defaults.task = "<|detailed_caption|>"
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to set self.defaults.task to '<|detailed_caption|>' during od retry")
final_tags = self.tags_for_image(media_path)
finally:
try:
self.defaults.task = original_task
except Exception:
pass
from SYS.logger import logger
logger.exception("Failed to restore self.defaults.task after od retry")
self._od_tag_retrying = False
self._last_caption = caption_text if caption_text else None