Skip to content

Commit 30a01a9

Browse files
committed
Merge branch 'clamv-rules' into collect-sigma-rules-api
# Conflicts: # vulnerabilities/improvers/__init__.py # vulnerabilities/models.py
2 parents d4ed496 + 89a8812 commit 30a01a9

8 files changed

Lines changed: 462 additions & 0 deletions

File tree

vulnerabilities/improvers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from vulnerabilities.pipelines import flag_ghost_packages
2020
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2121
from vulnerabilities.pipelines import remove_duplicate_advisories
22+
from vulnerabilities.pipelines.v2_improvers import clamav_rules
2223
from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees
2324
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
2425
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
@@ -112,5 +113,6 @@
112113
yara_rules.TenableYaraRules,
113114
yara_rules.Dr4k0niaYaraRules,
114115
yara_rules.Umair9747YaraRules,
116+
clamav_rules.ClamVRulesImproverPipeline,
115117
]
116118
)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Generated by Django 4.2.25 on 2025-12-01 20:13
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("vulnerabilities", "0103_codecommit_impactedpackage_affecting_commits_and_more"),
11+
]
12+
13+
operations = [
14+
migrations.CreateModel(
15+
name="AdvisoryDetectionRule",
16+
fields=[
17+
(
18+
"id",
19+
models.AutoField(
20+
auto_created=True, primary_key=True, serialize=False, verbose_name="ID"
21+
),
22+
),
23+
(
24+
"rule_text",
25+
models.TextField(
26+
help_text="Full text of the detection rule, script, or signature."
27+
),
28+
),
29+
(
30+
"rule_type",
31+
models.CharField(
32+
blank=True,
33+
choices=[
34+
("yara", "YARA"),
35+
("sigma", "Sigma Detection Rule"),
36+
("clamav", "ClamAV Signature"),
37+
],
38+
max_length=100,
39+
),
40+
),
41+
(
42+
"source_url",
43+
models.URLField(
44+
blank=True,
45+
help_text="URL or reference to the source of the rule (vendor feed, GitHub repo, etc.).",
46+
null=True,
47+
),
48+
),
49+
(
50+
"advisory",
51+
models.ForeignKey(
52+
on_delete=django.db.models.deletion.CASCADE,
53+
related_name="detection_rules",
54+
to="vulnerabilities.advisoryv2",
55+
),
56+
),
57+
],
58+
),
59+
]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Generated by Django 4.2.25 on 2025-12-01 21:52
2+
3+
from django.db import migrations, models
4+
import django.db.models.deletion
5+
6+
7+
class Migration(migrations.Migration):
8+
9+
dependencies = [
10+
("vulnerabilities", "0104_advisorydetectionrule"),
11+
]
12+
13+
operations = [
14+
migrations.AlterField(
15+
model_name="advisorydetectionrule",
16+
name="advisory",
17+
field=models.ForeignKey(
18+
blank=True,
19+
null=True,
20+
on_delete=django.db.models.deletion.SET_NULL,
21+
related_name="detection_rules",
22+
to="vulnerabilities.advisoryv2",
23+
),
24+
),
25+
]

vulnerabilities/models.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3689,3 +3689,13 @@ class DetectionRule(models.Model):
36893689
related_name="detection_rules",
36903690
help_text="Advisories associated with this DetectionRule.",
36913691
)
3692+
3693+
3694+
class DetectionRuleTypes(models.TextChoices):
3695+
"""Defines the supported formats for security detection rules."""
3696+
3697+
YARA = "yara", "Yara"
3698+
YARA_X = "yara-x", "Yara-X"
3699+
SIGMA = "sigma", "Sigma"
3700+
CLAMAV = "clamav", "CLAMAV"
3701+
SURICATA = "suricata", "Suricata"
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import gzip
11+
import io
12+
import os
13+
import shutil
14+
import tarfile
15+
import tempfile
16+
from pathlib import Path
17+
from typing import List
18+
19+
import requests
20+
21+
from vulnerabilities.models import AdvisoryAlias
22+
from vulnerabilities.models import DetectionRule
23+
from vulnerabilities.models import DetectionRuleTypes
24+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
25+
from vulnerabilities.utils import find_all_cve
26+
27+
28+
def extract_cvd(cvd_path, output_dir):
29+
"""
30+
Extract a CVD file. CVD format: 512-byte header + gzipped tar archive and returns Path to output directory
31+
"""
32+
output_path = Path(output_dir)
33+
output_path.mkdir(parents=True, exist_ok=True)
34+
35+
with open(cvd_path, "rb") as f:
36+
f.seek(512) # Skip header
37+
compressed_data = f.read()
38+
39+
decompressed_data = gzip.decompress(compressed_data)
40+
tar_buffer = io.BytesIO(decompressed_data)
41+
42+
with tarfile.open(fileobj=tar_buffer, mode="r:") as tar:
43+
tar.extractall(path=output_path)
44+
45+
for file in output_path.rglob("*"):
46+
if file.is_file():
47+
file.chmod(0o644) # rw-r--r--
48+
return output_path
49+
50+
51+
def parse_ndb_file(ndb_path: Path) -> List[dict]:
52+
"""Parse a .ndb file (extended signatures). Return list of dicts."""
53+
signatures = []
54+
with ndb_path.open("r", encoding="utf-8", errors="ignore") as f:
55+
for line_num, line in enumerate(f, 1):
56+
line = line.strip()
57+
if not line or line.startswith("#"):
58+
continue
59+
60+
parts = line.split(":")
61+
if len(parts) >= 4:
62+
signatures.append(
63+
{
64+
"name": parts[0],
65+
"target_type": parts[1],
66+
"offset": parts[2],
67+
"hex_signature": parts[3],
68+
"line_num": line_num,
69+
}
70+
)
71+
return signatures
72+
73+
74+
def parse_hdb_file(hdb_path: Path) -> List[dict]:
75+
"""Parse a .hdb file (MD5 hash signatures). Return list of dicts."""
76+
signatures = []
77+
with hdb_path.open("r", encoding="utf-8", errors="ignore") as f:
78+
for line_num, line in enumerate(f, 1):
79+
line = line.strip()
80+
if not line or line.startswith("#"):
81+
continue
82+
83+
parts = line.split(":")
84+
if len(parts) >= 3:
85+
signatures.append(
86+
{
87+
"hash": parts[0],
88+
"file_size": parts[1],
89+
"name": parts[2],
90+
"line_num": line_num,
91+
}
92+
)
93+
return signatures
94+
95+
96+
def extract_cve_id(name: str):
97+
"""Normalize underscores and extract the first CVE ID from a string, or None."""
98+
normalized = name.replace("_", "-")
99+
cves = [cve.upper() for cve in find_all_cve(normalized)]
100+
return cves[0] if cves else None
101+
102+
103+
class ClamVRulesImproverPipeline(VulnerableCodeBaseImporterPipelineV2):
104+
"""
105+
Pipeline that downloads ClamAV database (main.cvd), extracts signatures,
106+
parses .ndb and .hdb files and save a detection rules.
107+
"""
108+
109+
pipeline_id = "clamv_rules"
110+
MAIN_DATABASE_URL = "https://database.clamav.net/main.cvd"
111+
license_url = "https://github.com/Cisco-Talos/clamav/blob/c73755d3fc130b0c60ccf4e8f8d28c62fc58c95b/README.md#licensing"
112+
license_expression = "GNU GENERAL PUBLIC LICENSE"
113+
114+
@classmethod
115+
def steps(cls):
116+
return (
117+
cls.download_database,
118+
cls.extract_database,
119+
cls.collect_and_store_advisories,
120+
cls.clean_downloads,
121+
)
122+
123+
def download_database(self):
124+
"""Download ClamAV database using the supported API with proper headers."""
125+
126+
self.log("Downloading ClamAV database…")
127+
self.db_dir = Path(tempfile.mkdtemp()) / "clamav_db"
128+
self.db_dir.mkdir(parents=True, exist_ok=True)
129+
130+
database_url = "https://database.clamav.net/main.cvd?api-version=1"
131+
headers = {
132+
"User-Agent": "ClamAV-Client/1.0 (https://github.com/yourproject)",
133+
"Accept": "*/*",
134+
}
135+
136+
filename = self.db_dir / "main.cvd"
137+
self.log(f"Downloading {database_url}{filename}")
138+
139+
resp = requests.get(database_url, headers=headers, stream=True, timeout=30)
140+
resp.raise_for_status()
141+
142+
with filename.open("wb") as f:
143+
for chunk in resp.iter_content(chunk_size=8192):
144+
if chunk:
145+
f.write(chunk)
146+
147+
self.log("ClamAV DB file downloaded successfully.")
148+
149+
def extract_database(self):
150+
"""Extract the downloaded CVD into a directory"""
151+
out_dir = self.db_dir / "extracted"
152+
self.extract_cvd_dir = extract_cvd(self.db_dir / "main.cvd", out_dir)
153+
self.log(f"Extracted CVD to {self.extract_cvd_dir}")
154+
155+
def collect_and_store_advisories(self):
156+
"""Parse .ndb and .hdb files and store rules in the DB."""
157+
158+
for rule_entry in parse_hdb_file(self.extract_cvd_dir / "main.hdb") + parse_ndb_file(
159+
self.extract_cvd_dir / "main.ndb"
160+
):
161+
name = rule_entry.get("name", "")
162+
cve_id = extract_cve_id(name)
163+
found_advisories = set()
164+
165+
if cve_id:
166+
try:
167+
if alias := AdvisoryAlias.objects.get(alias=cve_id):
168+
for adv in alias.advisories.all():
169+
found_advisories.add(adv)
170+
except AdvisoryAlias.DoesNotExist:
171+
self.log(f"Advisory {cve_id} not found.")
172+
173+
for adv in found_advisories:
174+
DetectionRule.objects.update_or_create(
175+
rule_text=str(rule_entry),
176+
rule_type=DetectionRuleTypes.CLAMAV,
177+
advisory=adv,
178+
defaults={
179+
"source_url": self.MAIN_DATABASE_URL,
180+
},
181+
)
182+
183+
if not found_advisories:
184+
DetectionRule.objects.update_or_create(
185+
rule_text=str(rule_entry),
186+
rule_type=DetectionRuleTypes.CLAMAV,
187+
advisory=None,
188+
defaults={
189+
"source_url": self.MAIN_DATABASE_URL,
190+
},
191+
)
192+
193+
def clean_downloads(self):
194+
"""Clean up downloaded files."""
195+
if getattr(self, "db_dir", None) and os.path.exists(self.db_dir):
196+
shutil.rmtree(self.db_dir, ignore_errors=True)
197+
self.log("Cleaned up downloaded files.")
198+
199+
def on_failure(self):
200+
"""Ensure cleanup on failure."""
201+
self.clean_downloads()

0 commit comments

Comments
 (0)