diff --git a/src/handlers/message_handlers.py b/src/handlers/message_handlers.py index e0f26f0..8d99e7f 100644 --- a/src/handlers/message_handlers.py +++ b/src/handlers/message_handlers.py @@ -12,7 +12,7 @@ from telegram.error import NetworkError, TelegramError from telegram.ext import ContextTypes from decorators import authorized -from core.extractor import scrape_article +from core.extractor import scrape_article, ArticleContent from core.summarizer import summarize_article, answer_question from core.history_manager import add_to_history from keyboards import get_retry_keyboard @@ -116,21 +116,43 @@ async def process_url( ) ) + # Determine model early to check for fallback eligibility + default_model = ( + load_available_models()[0] + if load_available_models() + else "gemini-2.5-flash" + ) + model_name = context.user_data.get("short_summary_model", default_model) + if not article_content: - if animation_task: - stop_animation_event.set() - await animation_task - error_message = ( - f"😥 Unable to extract content from the URL.\n" - f"Here are the technical details:\n
{error_details}"
- )
- await context.bot.edit_message_text(
- chat_id=chat_id,
- message_id=processing_message.message_id,
- text=error_message,
- parse_mode="HTML",
- )
- return
+ # Fallback: If Gemini, try to use URL context directly
+ if "gemini" in model_name.lower():
+ print(f"Scraping failed for {url}. Attempting Gemini URL context fallback.")
+ article_content = ArticleContent(
+ title="URL Content (Fallback)",
+ text="Content not extracted. Please utilize the available tools (Google Search/URL Context) to read and summarize the content directly from the provided URL.",
+ url=url,
+ tags=[],
+ images=[]
+ )
+ use_web_search = True
+ use_url_context = True
+ fallback_used = True
+ else:
+ if animation_task:
+ stop_animation_event.set()
+ await animation_task
+ error_message = (
+ f"😥 Unable to extract content from the URL.\n"
+ f"Here are the technical details:\n{error_details}"
+ )
+ await context.bot.edit_message_text(
+ chat_id=chat_id,
+ message_id=processing_message.message_id,
+ text=error_message,
+ parse_mode="HTML",
+ )
+ return
article_id = hashlib.sha256(url.encode()).hexdigest()[:32]
if "articles" not in context.user_data:
@@ -139,13 +161,6 @@ async def process_url(
"article_content": article_content
}
- default_model = (
- load_available_models()[0]
- if load_available_models()
- else "gemini-2.5-flash"
- )
- model_name = context.user_data.get("short_summary_model", default_model)
-
summary_data = await summarize_article(
article_content,
summary_type,