h
This commit is contained in:
@@ -428,7 +428,8 @@ class FlorenceVisionTool:
|
||||
if self._model is not None and not hasattr(self._model, "_supports_sdpa"):
|
||||
setattr(self._model, "_supports_sdpa", False)
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to set model compatibility flag _supports_sdpa")
|
||||
|
||||
try:
|
||||
self._model.to(device) # type: ignore[union-attr]
|
||||
@@ -439,7 +440,8 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
self._model.eval() # type: ignore[union-attr]
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to set Florence model to eval mode")
|
||||
|
||||
try:
|
||||
md = getattr(self._model, "device", None)
|
||||
@@ -450,7 +452,8 @@ class FlorenceVisionTool:
|
||||
dt = None
|
||||
debug(f"[florencevision] Model loaded: device={md} param_dtype={dt}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to inspect Florence model device/dtype")
|
||||
|
||||
def tags_for_image(self, media_path: Path) -> List[str]:
|
||||
"""Return Florence-derived tags for an image.
|
||||
@@ -472,7 +475,8 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
debug(f"[florencevision] Task prompt: {prompt}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to emit debug Task prompt for FlorenceVision")
|
||||
|
||||
max_tags = max(0, int(self.defaults.max_tags or 0))
|
||||
|
||||
@@ -487,7 +491,8 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
debug(f"[florencevision] Image loaded: mode={image.mode} size={image.width}x{image.height}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to emit debug for image load")
|
||||
|
||||
processor = self._processor
|
||||
model = self._model
|
||||
@@ -544,19 +549,22 @@ class FlorenceVisionTool:
|
||||
)
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to debug tensor shape for processor key '%s'", k)
|
||||
if isinstance(v, (list, tuple)):
|
||||
has_none = any(x is None for x in v)
|
||||
debug(f"[florencevision] {k}: {type(v).__name__} len={len(v)} has_none={has_none}")
|
||||
continue
|
||||
debug(f"[florencevision] {k}: type={type(v).__name__}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed while inspecting processor output keys")
|
||||
|
||||
try:
|
||||
inputs = inputs.to(model.device) # type: ignore[attr-defined]
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to move processor inputs to device %s", getattr(model, 'device', None))
|
||||
|
||||
# Align floating-point input tensors with the model's parameter dtype.
|
||||
try:
|
||||
@@ -575,7 +583,8 @@ class FlorenceVisionTool:
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to inspect/align model dtype for Florence inputs")
|
||||
|
||||
try:
|
||||
gen_inputs_all = {k: v for k, v in dict(inputs).items() if v is not None}
|
||||
@@ -602,7 +611,8 @@ class FlorenceVisionTool:
|
||||
):
|
||||
gen_inputs["attention_mask"] = attention_mask
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to reconcile attention mask shape with input_ids for Florence processor")
|
||||
|
||||
try:
|
||||
debug(
|
||||
@@ -612,18 +622,21 @@ class FlorenceVisionTool:
|
||||
f"pixel_attention_mask={'pixel_attention_mask' in forward_params}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to debug model forward supports")
|
||||
|
||||
try:
|
||||
gen_inputs.setdefault("use_cache", False)
|
||||
gen_inputs.setdefault("num_beams", 1)
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to set default gen_inputs values")
|
||||
|
||||
try:
|
||||
debug(f"[florencevision] generate kwargs: {sorted(list(gen_inputs.keys()))}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to debug generate kwargs")
|
||||
|
||||
pv = gen_inputs.get("pixel_values")
|
||||
if pv is None:
|
||||
@@ -654,7 +667,8 @@ class FlorenceVisionTool:
|
||||
if not hasattr(model, "_supports_sdpa"):
|
||||
setattr(model, "_supports_sdpa", False)
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to patch model _supports_sdpa flag in retry handler")
|
||||
generated_ids = _do_generate(gen_inputs)
|
||||
elif "NoneType" in msg and "shape" in msg:
|
||||
retry_inputs = dict(gen_inputs)
|
||||
@@ -676,7 +690,8 @@ class FlorenceVisionTool:
|
||||
):
|
||||
retry_inputs["attention_mask"] = am
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed while filling retry_inputs attention_mask in AttributeError handler")
|
||||
|
||||
try:
|
||||
import torch
|
||||
@@ -692,14 +707,16 @@ class FlorenceVisionTool:
|
||||
elif "pixel_attention_mask" in forward_params and "pixel_attention_mask" not in retry_inputs:
|
||||
retry_inputs["pixel_attention_mask"] = mask
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to build mask or adjust retry_inputs in AttributeError handler")
|
||||
|
||||
try:
|
||||
debug(
|
||||
f"[florencevision] generate retry kwargs: {sorted(list(retry_inputs.keys()))}"
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to debug generate retry kwargs")
|
||||
|
||||
generated_ids = _do_generate(retry_inputs)
|
||||
else:
|
||||
@@ -708,7 +725,8 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
debug(f"[florencevision] generated_ids type={type(generated_ids).__name__}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to debug generated_ids type")
|
||||
|
||||
seq = getattr(generated_ids, "sequences", generated_ids)
|
||||
generated_text = processor.batch_decode(seq, skip_special_tokens=False)[0]
|
||||
@@ -719,7 +737,8 @@ class FlorenceVisionTool:
|
||||
debug(f"[florencevision] prompt run failed: {type(exc).__name__}: {exc}")
|
||||
debug("[florencevision] traceback:\n" + traceback.format_exc())
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to emit debug for prompt run failure: %s", exc)
|
||||
raise
|
||||
|
||||
parsed = None
|
||||
@@ -766,12 +785,14 @@ class FlorenceVisionTool:
|
||||
debug(f"[florencevision] post_process[{k!r}] type={type(parsed.get(k)).__name__}")
|
||||
debug("[florencevision] post_process[key] repr:\n" + _debug_repr(parsed.get(k)))
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed while debugging parsed post_process output for prompt %s", task_prompt)
|
||||
else:
|
||||
debug(f"[florencevision] post_process_generation: type={type(parsed).__name__}")
|
||||
debug("[florencevision] post_process repr:\n" + _debug_repr(parsed))
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to post-process generated output for prompt %s", task_prompt)
|
||||
|
||||
return generated_text, parsed, seq
|
||||
|
||||
@@ -800,7 +821,8 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
debug(f"[florencevision] candidate label strings ({len(labels)}): {labels!r}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to emit candidate label strings debug")
|
||||
|
||||
out: List[str] = []
|
||||
seen: set[str] = set()
|
||||
@@ -848,7 +870,8 @@ class FlorenceVisionTool:
|
||||
for raw_lab, cleaned, reason in dropped:
|
||||
debug(f"[florencevision] drop reason={reason} raw={raw_lab!r} cleaned={cleaned!r}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to emit cleaned/dropped tags debug info")
|
||||
|
||||
return labels, caption_candidates, out, dropped
|
||||
|
||||
@@ -871,7 +894,12 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
return max(cleaned, key=lambda s: len(str(s)), default=None)
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to choose best caption from cleaned candidates")
|
||||
try:
|
||||
return max(raw, key=lambda s: len(str(s)), default=None)
|
||||
except Exception:
|
||||
return None
|
||||
try:
|
||||
return max(raw, key=lambda s: len(str(s)), default=None)
|
||||
except Exception:
|
||||
@@ -936,7 +964,8 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
debug(f"[florencevision] grounding prompt: {grounding_prompt}")
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to emit grounding prompt debug")
|
||||
|
||||
grd_text, grd_parsed, _grd_seq = _run_prompt(grounding_prompt)
|
||||
_grd_labels, grd_captions, grd_cleaned, _grd_dropped = _extract_labels_and_captions(grounding_prompt, grd_text, grd_parsed)
|
||||
@@ -962,6 +991,8 @@ class FlorenceVisionTool:
|
||||
is_combo = "<|detailed_caption|>" in prompt and "<|grounding|>" in prompt
|
||||
only_task_tokens = not final_tags or all(t in {"object_detection", "grounding", "tag"} for t in final_tags)
|
||||
except Exception:
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to compute is_combo/only_task_tokens for prompt '%s'", prompt)
|
||||
is_combo = False
|
||||
only_task_tokens = False
|
||||
|
||||
@@ -973,13 +1004,15 @@ class FlorenceVisionTool:
|
||||
try:
|
||||
self.defaults.task = "<|detailed_caption|>"
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to set self.defaults.task to '<|detailed_caption|>' during od retry")
|
||||
final_tags = self.tags_for_image(media_path)
|
||||
finally:
|
||||
try:
|
||||
self.defaults.task = original_task
|
||||
except Exception:
|
||||
pass
|
||||
from SYS.logger import logger
|
||||
logger.exception("Failed to restore self.defaults.task after od retry")
|
||||
self._od_tag_retrying = False
|
||||
|
||||
self._last_caption = caption_text if caption_text else None
|
||||
|
||||
Reference in New Issue
Block a user