Skip to content

Commit df57b9c

Browse files
committed
fix: improve extensionless file detection & debug logs
Signed-off-by: Mouad BANI <mouad-mb@outlook.com>
1 parent 615898e commit df57b9c

1 file changed

Lines changed: 33 additions & 8 deletions

File tree

services/apps/git_integration/src/crowdgit/services/maintainer/maintainer_service.py

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ class MaintainerService(BaseService):
6666
".github/maintainers.md",
6767
".github/contributors.md",
6868
".github/codeowners",
69-
"SECURITY-INSIGHTS.md",
69+
"security-insights.md",
7070
}
7171

7272
# Governance stems (basename without extension, lowercased) for filename search
@@ -445,15 +445,16 @@ async def _list_repo_files(self, repo_path: str) -> list[str]:
445445
"""List non-code files in the repo recursively, filtered by VALID_EXTENSIONS."""
446446
glob_args = ["--glob", "!.git/"]
447447
for ext in self.VALID_EXTENSIONS:
448-
glob_args.extend(["--iglob", f"*{ext}"])
448+
if ext:
449+
glob_args.extend(["--iglob", f"*{ext}"])
449450

450451
output = await run_shell_command(
451452
["rg", "--files", "--hidden", *glob_args, "."], cwd=repo_path
452453
)
453454
return [
454455
line[2:] if line.startswith("./") else line
455456
for line in output.strip().split("\n")
456-
if line.strip()
457+
if line.strip() and os.path.splitext(line)[1] in self.VALID_EXTENSIONS
457458
]
458459

459460
async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) -> list[str]:
@@ -475,6 +476,16 @@ async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) ->
475476
except CommandExecutionError:
476477
self.logger.info("Ripgrep found no governance files by filename")
477478
return []
479+
except FileNotFoundError as e:
480+
if not os.path.isdir(repo_path):
481+
self.logger.warning(
482+
f"Ripgrep search failed: repo_path does not exist: '{repo_path}'"
483+
)
484+
else:
485+
self.logger.warning(
486+
f"Ripgrep search failed: 'rg' binary not found in PATH. Install ripgrep. ({repr(e)})"
487+
)
488+
return []
478489
except Exception as e:
479490
self.logger.warning(f"Ripgrep search failed: {repr(e)}")
480491
return []
@@ -488,9 +499,11 @@ async def _ripgrep_search(self, repo_path: str, max_depth: int | None = None) ->
488499
line = line[2:]
489500
basename = os.path.basename(line).lower()
490501
if basename in self.EXCLUDED_FILENAMES:
502+
self.logger.debug(f"Excluding '{line}': basename in EXCLUDED_FILENAMES")
491503
continue
492504
ext = os.path.splitext(basename)[1]
493505
if ext not in self.VALID_EXTENSIONS:
506+
self.logger.debug(f"Excluding '{line}': extension '{ext}' not in VALID_EXTENSIONS")
494507
continue
495508
results.append(line)
496509

@@ -547,9 +560,9 @@ async def find_candidate_files(
547560
else:
548561
subdir_scored.append(entry)
549562

550-
self.logger.info(
563+
self.logger.debug(
551564
f"Candidate: {candidate_path} "
552-
f"(filename: {filename_score}, content: {content_score}, total: {total})"
565+
f"(filename_score={filename_score}, content_score={content_score}, total={total})"
553566
)
554567

555568
root_scored.sort(key=lambda c: c[2], reverse=True)
@@ -591,13 +604,15 @@ async def try_saved_maintainer_file(
591604
"""
592605
cost = 0.0
593606
file_path = os.path.join(repo_path, saved_maintainer_file)
607+
self.logger.debug(f"Checking saved maintainer file on disk: '{file_path}'")
594608

595609
if not await aiofiles.os.path.isfile(file_path):
596610
self.logger.warning(
597611
f"Saved maintainer file '{saved_maintainer_file}' no longer exists on disk"
598612
)
599613
return None, cost
600614

615+
self.logger.debug(f"Saved maintainer file exists, reading content: '{saved_maintainer_file}'")
601616
try:
602617
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
603618
content = await f.read()
@@ -645,6 +660,12 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
645660
root_candidates, subdir_candidates = await self.find_candidate_files(repo_path)
646661
all_candidates = root_candidates + subdir_candidates
647662
candidate_files = [(path, score) for path, _, score in all_candidates][:100]
663+
self.logger.debug(
664+
f"Detection step 2: {len(root_candidates)} root candidate(s), "
665+
f"{len(subdir_candidates)} subdir candidate(s); "
666+
f"root={[p for p, _, _ in root_candidates]}, "
667+
f"subdir_top={[p for p, _, _ in subdir_candidates[:3]]}"
668+
)
648669

649670
# Step 3: Try root-level files first (in score order), then top subdirectory file
650671
failed_candidates: set[str] = set()
@@ -656,7 +677,8 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
656677
best_file: str | None = None
657678
best_file_count: int = 0
658679

659-
for filename, content, _ in root_candidates:
680+
for filename, content, score in root_candidates:
681+
self.logger.debug(f"Detection step 3: trying root candidate '{filename}' (score={score})")
660682
try:
661683
result = await self.analyze_and_build_result(filename, content)
662684
total_cost += result.total_cost
@@ -690,7 +712,8 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
690712
self.logger.warning("All root candidates failed, trying AI file detection")
691713

692714
if subdir_candidates:
693-
filename, content, _ = subdir_candidates[0]
715+
filename, content, score = subdir_candidates[0]
716+
self.logger.debug(f"Detection step 3b: trying top subdir candidate '{filename}' (score={score})")
694717
try:
695718
result = await self.analyze_and_build_result(filename, content)
696719
total_cost += result.total_cost
@@ -726,10 +749,12 @@ def _attach_metadata(result: MaintainerResult) -> MaintainerResult:
726749
f"Passing {len(ai_input_files)} files to AI for maintainer file detection "
727750
f"(total repo files: {len(file_names)})"
728751
)
752+
self.logger.debug(f"AI input files: {[f for f, _ in ai_input_files]}")
729753
ai_file_name, ai_cost = await self.find_maintainer_file_with_ai(ai_input_files)
730754
ai_suggested_file = ai_file_name
731755
total_cost += ai_cost
732756

757+
self.logger.debug(f"AI suggested file: '{ai_file_name}' (cost={ai_cost:.4f})")
733758
if ai_file_name:
734759
file_path = os.path.join(repo_path, ai_file_name)
735760
if not await aiofiles.os.path.isfile(file_path):
@@ -826,7 +851,7 @@ async def process_maintainers(
826851
ai_cost = 0.0
827852
maintainers_found = 0
828853
maintainers_skipped = 0
829-
candidate_files: list[str] = []
854+
candidate_files: list[tuple[str, int]] = []
830855
ai_suggested_file: str | None = None
831856

832857
try:

0 commit comments

Comments
 (0)