diff --git a/src/post_processing/dataclass/data_aplose.py b/src/post_processing/dataclass/data_aplose.py index c4736d1..c93b784 100644 --- a/src/post_processing/dataclass/data_aplose.py +++ b/src/post_processing/dataclass/data_aplose.py @@ -123,8 +123,10 @@ def __init__( ).reset_index(drop=True) self.annotators = sorted(set(self.df["annotator"])) if df is not None else None self.labels = sorted(set(self.df["annotation"])) if df is not None else None - self.begin = min(self.df["start_datetime"]) if begin is None else begin - self.end = max(self.df["end_datetime"]) if end is None else end + self.begin = ( + min(self.df["start_datetime"], default=None) if begin is None else begin + ) + self.end = max(self.df["end_datetime"], default=None) if end is None else end self.dataset = sorted(set(self.df["dataset"])) if df is not None else None self.lat = None self.lon = None @@ -595,8 +597,7 @@ def reshape(self, begin: Timestamp = None, end: Timestamp = None) -> DataAplose: ] if self.df.empty: - msg = "DataFrame is empty after reshaping." - raise ValueError(msg) + return self self.dataset = get_dataset(self.df) self.labels = get_labels(self.df) diff --git a/src/post_processing/dataclass/detection_filter.py b/src/post_processing/dataclass/detection_filter.py index d39e186..70f8120 100644 --- a/src/post_processing/dataclass/detection_filter.py +++ b/src/post_processing/dataclass/detection_filter.py @@ -11,6 +11,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Literal +import numpy as np import yaml from pandas import Timedelta, Timestamp @@ -38,6 +39,7 @@ class DetectionFilter: score: float | None = None box: bool = False filename_format: str = None + confidence: float = None def __getitem__(self, key: str): """Return the value of the given key.""" @@ -88,10 +90,13 @@ def from_dict( filters = [] for detection_file, filters_dict in parameters.items(): df_preview = read_dataframe(Path(detection_file), rows=5) - filters_dict["timebin_origin"] = Timedelta( - max(df_preview["end_time"]), - "s", - ) + if df_preview.empty: + filters_dict["timebin_origin"] = np.nan + else: + filters_dict["timebin_origin"] = Timedelta( + max(df_preview["end_time"]), + "s", + ) filters_dict["detection_file"] = Path(detection_file) if filters_dict.get("timebin_new"): filters_dict["timebin_new"] = Timedelta( diff --git a/src/post_processing/utils/filtering_utils.py b/src/post_processing/utils/filtering_utils.py index 895f4bb..94d2353 100644 --- a/src/post_processing/utils/filtering_utils.py +++ b/src/post_processing/utils/filtering_utils.py @@ -79,8 +79,6 @@ def filter_strong_detection( """ if "type" in df.columns: df = df[df["type"] == "WEAK"] - elif "is_box" in df.columns: - df = df[df["is_box"] == 0] else: msg = "Could not determine annotation type." raise ValueError(msg) @@ -114,15 +112,9 @@ def filter_by_time( """ if begin is not None: df = df[df["start_datetime"] >= begin] - if df.empty: - msg = f"No detection found after '{begin}'." - raise ValueError(msg) if end is not None: df = df[df["end_datetime"] <= end] - if df.empty: - msg = f"No detection found before '{end}'." - raise ValueError(msg) return df @@ -218,47 +210,43 @@ def filter_by_freq( """ if f_min is not None: - df = df[df["start_frequency"] >= f_min] + df = df[df["min_frequency"] >= f_min] if df.empty: msg = f"No detection found above {f_min}Hz." raise ValueError(msg) if f_max is not None: - df = df[df["end_frequency"] <= f_max] + df = df[df["max_frequency"] <= f_max] if df.empty: msg = f"No detection found below {f_max}Hz." raise ValueError(msg) return df -def filter_by_score(df: DataFrame, score: float) -> DataFrame: - """Filter detections by confidence score. +def filter_by_confidence(df: DataFrame, confidence: float) -> DataFrame: + """Filter detections by confidence. Parameters ---------- df : DataFrame - APLOSE-formatted DataFrame containing a 'score' column. - score : float - The minimum confidence score threshold (inclusive). + APLOSE-formatted DataFrame containing a 'confidence' column. + confidence : float + The minimum confidence threshold (inclusive). Returns ------- DataFrame - Filtered DataFrame containing only detections with score >= min_score. + Filtered DataFrame containing only detections with confidence >= min_confidence. """ - if not score: + if not confidence: return df - if "score" not in df.columns: - msg = "'score' column not present if DataFrame." + if "confidence" not in df.columns: + msg = "'confidence' column not present if DataFrame." raise ValueError(msg) - df = df[df["score"] >= score] - if df.empty: - msg = f"No detection found with score above {score}." - raise ValueError(msg) - return df + return df[df["confidence"] >= confidence] def read_dataframe(file: Path, rows: int | None = None) -> DataFrame: @@ -278,36 +266,40 @@ def read_dataframe(file: Path, rows: int | None = None) -> DataFrame: ) -def get_annotators(df: DataFrame) -> list[str]: +def get_annotators(df: DataFrame) -> str | list[str]: """Return the annotator list of APLOSE DataFrame.""" - if len(df) == 1: - return df["annotator"][0] + if df.empty: + return [] annotators = sorted(set(df["annotator"])) return annotators if len(annotators) > 1 else annotators[0] def get_labels(df: DataFrame) -> str | list[str]: """Return the label list of APLOSE DataFrame.""" - if len(df) == 1: - return df["annotation"][0] + if df.empty: + return [] labels = sorted(set(df["annotation"])) return labels if len(labels) > 1 else labels[0] def get_max_freq(df: DataFrame) -> float: """Return the maximum frequency of APLOSE DataFrame.""" - return df["end_frequency"].max() + if df.empty: + return [] + return df["max_frequency"].max() def get_max_time(df: DataFrame) -> float: """Return the maximum time of APLOSE DataFrame.""" + if df.empty: + return [] return df["end_time"].max() def get_dataset(df: DataFrame) -> str | list[str]: """Return dataset list of APLOSE DataFrame.""" - if len(df) == 1: - return df["dataset"][0] + if df.empty: + return [] datasets = sorted(set(df["dataset"])) return datasets if len(datasets) > 1 else datasets[0] @@ -443,8 +435,8 @@ def _create_result_dataframe( "filename": file_vector, "start_time": [0] * len(file_vector), "end_time": [timebin_new.total_seconds()] * len(file_vector), - "start_frequency": [0] * len(file_vector), - "end_frequency": [max_freq] * len(file_vector), + "min_frequency": [0] * len(file_vector), + "max_frequency": [max_freq] * len(file_vector), "annotation": [label] * len(file_vector), "annotator": [annotator] * len(file_vector), "start_datetime": start_datetime, @@ -545,8 +537,7 @@ def reshape_timebin( """ if df.empty: - msg = "DataFrame is empty" - raise ValueError(msg) + return df if not timebin_new: return df @@ -648,13 +639,17 @@ def load_detections(filters: DetectionFilter) -> DataFrame: """ df = read_dataframe(filters.detection_file) + + if df.empty: + return df + if filters.box: df = filter_strong_detection(df) df = filter_by_time(df, filters.begin, filters.end) df = filter_by_annotator(df, annotator=filters.annotator) df = filter_by_label(df, label=filters.annotation) df = filter_by_freq(df, filters.f_min, filters.f_max) - df = filter_by_score(df, filters.score) + df = filter_by_confidence(df, filters.confidence) filename_ts = get_filename_timestamps(df, filters.filename_format) df = reshape_timebin( df, @@ -733,14 +728,13 @@ def add_weak_detection( "start_time": 0, "end_time": max_time.total_seconds(), "min_frequency": 0, - "start_frequency": 0, "max_frequency": max_freq, - "end_frequency": max_freq, "annotation": lbl, "annotator": ant, "start_datetime": strftime_osmose_format(start_datetime), "end_datetime": strftime_osmose_format(end_datetime), "type": "WEAK", + "confidence": None, }) new_row_df = DataFrame([new_row]) df = concat([df, new_row_df], ignore_index=True) diff --git a/src/post_processing/utils/fpod_utils.py b/src/post_processing/utils/fpod_utils.py index 20fd904..315b134 100644 --- a/src/post_processing/utils/fpod_utils.py +++ b/src/post_processing/utils/fpod_utils.py @@ -25,7 +25,6 @@ from post_processing.utils.core_utils import get_coordinates, get_sun_times if TYPE_CHECKING: - import pytz @@ -73,13 +72,13 @@ def fpod2aplose( "filename": [""] * len(df), "start_time": [0] * len(df), "end_time": [bin_size] * len(df), - "start_frequency": [0] * len(df), - "end_frequency": [0] * len(df), + "min_frequency": [0] * len(df), + "max_frequency": [0] * len(df), "annotation": [annotation] * len(df), "annotator": ["FPOD"] * len(df), "start_datetime": [strftime_osmose_format(entry) for entry in fpod_start_dt], "end_datetime": [strftime_osmose_format(entry) for entry in fpod_end_dt], - "is_box": [0] * len(df), + "type": [0] * len(df), } return DataFrame(data) @@ -420,7 +419,9 @@ def generate_hourly_detections(meta: DataFrame, site: str) -> DataFrame: {"name": row["name"], "start_datetime": date} for _, row in df_meta.iterrows() for date in date_range( - start=row["deployment_date"], end=row["recovery_date"], freq="h", + start=row["deployment_date"], + end=row["recovery_date"], + freq="h", ) ] @@ -491,20 +492,30 @@ def feeding_buzz(df: DataFrame, species: str) -> DataFrame: col2 = "HEURE MINUTE" if col in df.columns: df[["DATE", "HEURE", "MINUTE"]] = df[col].str.split(" ", expand=True) - df["Time"] = (df["DATE"].astype(str) + " " + - df["HEURE"].astype(str) + ":" + - df["MINUTE"].astype(str) + ":" + - df["MICROSEC"].astype(str)) + df["Time"] = ( + df["DATE"].astype(str) + + " " + + df["HEURE"].astype(str) + + ":" + + df["MINUTE"].astype(str) + + ":" + + df["MICROSEC"].astype(str) + ) df["Time"] = to_datetime(df["Time"], dayfirst=True) elif col2 in df.columns: df[["HEURE", "MINUTE"]] = df[col2].str.split(" ", expand=True) - df["Time"] = (df["DATE"].astype(str) + " " + - df["HEURE"].astype(str) + ":" + - df["MINUTE"].astype(str) + ":" + - df["MICROSEC"].astype(str)) + df["Time"] = ( + df["DATE"].astype(str) + + " " + + df["HEURE"].astype(str) + + ":" + + df["MINUTE"].astype(str) + + ":" + + df["MICROSEC"].astype(str) + ) df["Time"] = to_datetime(df["Time"], dayfirst=True) else: - df["Time"] = (df["MINUTE"].astype(str) + ":" + df["MICROSEC"].astype(str)) + df["Time"] = df["MINUTE"].astype(str) + ":" + df["MICROSEC"].astype(str) df["Time"] = to_datetime(df["Time"], dayfirst=True) df = df.sort_values(by="Time").reset_index(drop=True) @@ -530,7 +541,7 @@ def feeding_buzz(df: DataFrame, species: str) -> DataFrame: def assign_daytime( - df: DataFrame, + df: DataFrame, ) -> DataFrame: """Assign datetime categories to events. @@ -561,10 +572,10 @@ def assign_daytime( dpm_i = row["Time"] if notna(dpm_i): # Check if time is not NaN jour_i = jour[ - (jour["dusk"].dt.year == dpm_i.year) & - (jour["dusk"].dt.month == dpm_i.month) & - (jour["dusk"].dt.day == dpm_i.day) - ] + (jour["dusk"].dt.year == dpm_i.year) + & (jour["dusk"].dt.month == dpm_i.month) + & (jour["dusk"].dt.day == dpm_i.day) + ] if not jour_i.empty: # Ensure there"s a matching row jour_i = jour_i.iloc[0] # Extract first match if dpm_i <= jour_i["day"]: @@ -665,12 +676,16 @@ def percent_calc(data: DataFrame, time_unit: str | None = None) -> DataFrame: group_cols.insert(0, time_unit) # Aggregate and compute metrics - df = data.groupby(group_cols).agg({ - "DPH": "sum", - "DPM": "sum", - "Day": "size", - "Foraging": "sum", - }).reset_index() + df = ( + data.groupby(group_cols) + .agg({ + "DPH": "sum", + "DPM": "sum", + "Day": "size", + "Foraging": "sum", + }) + .reset_index() + ) df["%click"] = df["DPM"] * 100 / (df["Day"] * 60) df["%DPH"] = df["DPH"] * 100 / df["Day"] @@ -690,12 +705,14 @@ def site_percent(df: DataFrame, metric: str) -> None: Type of percentage you want to show on the graph """ - ax = sns.barplot(data=df, x="site.name", - y=metric, - hue="site.name", - dodge=False, - palette=colors, - ) + ax = sns.barplot( + data=df, + x="site.name", + y=metric, + hue="site.name", + dodge=False, + palette=colors, + ) ax.set_title(f"{metric} per site") ax.set_ylabel(f"{metric}") if metric == "%buzzes": @@ -723,11 +740,12 @@ def year_percent(df: DataFrame, metric: str) -> None: for i, site in enumerate(sorted(sites)): site_data = df[df["site.name"] == site] ax = axs[i] - ax.bar(site_data["Year"], - site_data[metric], - label=f"Site {site}", - color=colors.get(site, "gray"), - ) + ax.bar( + site_data["Year"], + site_data[metric], + label=f"Site {site}", + color=colors.get(site, "gray"), + ) ax.set_title(f"Site {site}") ax.set_ylim(0, max(df[metric]) + 0.2) ax.set_ylabel(metric) @@ -761,19 +779,32 @@ def month_percent(df: DataFrame, metric: str) -> None: for i, site in enumerate(sorted(sites)): site_data = df[df["site.name"] == site] ax = axs[i] - ax.bar(site_data["Month"], - site_data[metric], - label=f"Site {site}", - color=colors.get(site, "gray"), - ) + ax.bar( + site_data["Month"], + site_data[metric], + label=f"Site {site}", + color=colors.get(site, "gray"), + ) ax.set_title(f"{site} - Percentage of postitive to detection minutes per month") ax.set_ylim(0, max(df[metric]) + 0.2) ax.set_ylabel(metric) - ax.set_xticks([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], - ["Jan", "Feb", "Mar", "Apr", "May", "Jun", - "Jul", "Agu", "Sep", "Oct", "Nov", "Dec", - ], - ) + ax.set_xticks( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], + [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Agu", + "Sep", + "Oct", + "Nov", + "Dec", + ], + ) if i != 3: ax.set_xlabel("") else: @@ -804,11 +835,12 @@ def hour_percent(df: DataFrame, metric: str) -> None: for i, site in enumerate(sorted(sites)): site_data = df[df["site.name"] == site] ax = axs[i] - ax.bar(site_data["hour"], - site_data[metric], - label=f"Site {site}", - color=colors.get(site, "gray"), - ) + ax.bar( + site_data["hour"], + site_data[metric], + label=f"Site {site}", + color=colors.get(site, "gray"), + ) ax.set_title(f"Site {site} - Percentage of positive to detection per hour") ax.set_ylim(0, max(df[metric]) + 0.2) ax.set_ylabel(metric) diff --git a/src/post_processing/utils/glider_utils.py b/src/post_processing/utils/glider_utils.py index 626371c..528f7ca 100644 --- a/src/post_processing/utils/glider_utils.py +++ b/src/post_processing/utils/glider_utils.py @@ -104,7 +104,7 @@ def plot_detections_with_nav_data( labels = df["annotation"].unique() for annotation in labels: - df_1label = df[(df["annotation"] == annotation) & (df["is_box"] == 0)] + df_1label = df[(df["annotation"] == annotation) & (df["type"] == "BOX")] glider_timestamps_numeric = [int(ts.timestamp()) for ts in nav["Timestamp"]] detections_timestamps_numeric = [ diff --git a/src/post_processing/utils/pamguard_utils.py b/src/post_processing/utils/pamguard_utils.py index d595835..c448691 100644 --- a/src/post_processing/utils/pamguard_utils.py +++ b/src/post_processing/utils/pamguard_utils.py @@ -116,11 +116,11 @@ def process_binary( "filename": filenames, "start_time": start_times, "end_time": end_times, - "start_frequency": freq_min, - "end_frequency": freq_max, + "min_frequency": freq_min, + "max_frequency": freq_max, "annotation": annotation, "annotator": annotator, "start_datetime": [strftime_osmose_format(beg) for beg in start_datetimes], "end_datetime": [strftime_osmose_format(end) for end in end_datetimes], - "is_box": True, + "type": "BOX", }).sort_values("start_datetime") diff --git a/tests/conftest.py b/tests/conftest.py index adfa6a3..7c1f38d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,7 +12,7 @@ from post_processing.dataclass.data_aplose import DataAplose -SAMPLE = """dataset,filename,start_time,end_time,start_frequency,end_frequency,annotation,annotator,start_datetime,end_datetime,type,score +SAMPLE = """dataset,filename,start_time,end_time,min_frequency,max_frequency,annotation,annotator,start_datetime,end_datetime,type,confidence sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl2,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.11 sample_dataset,2025_01_25_06_20_00,3.46662989520132,4.02371759514617,7523.0,15257.0,lbl2,ann2,2025-01-25T06:20:03.466+00:00,2025-01-25T06:20:04.023+00:00,BOX,0.23 sample_dataset,2025_01_25_06_20_00,0.0,10.0,0.0,72000.0,lbl1,ann2,2025-01-25T06:20:00.000+00:00,2025-01-25T06:20:10.000+00:00,WEAK,0.26 @@ -201,7 +201,7 @@ def sample_yaml( "user_sel": "all", "f_min": None, "f_max": None, - "score": None, + "confidence": None, }, } diff --git a/tests/test_DataAplose.py b/tests/test_DataAplose.py index 025b8c6..87f8877 100644 --- a/tests/test_DataAplose.py +++ b/tests/test_DataAplose.py @@ -1,3 +1,4 @@ +from contextlib import nullcontext from copy import copy from pathlib import Path from typing import ContextManager @@ -248,13 +249,13 @@ def test_concat(sample_yaml: Path, sample_df: DataFrame) -> None: pytest.param( Timestamp("2025-01-26T06:20:09.999+00:00"), None, - pytest.raises(ValueError, match=r"DataFrame is empty after reshaping."), + nullcontext(), id="new_begin_after_original_end", ), pytest.param( None, Timestamp("2025-01-25T06:20:00.001+00:00"), - pytest.raises(ValueError, match=r"DataFrame is empty after reshaping."), + nullcontext(), id="new_end_before_original_begin", ), pytest.param( @@ -268,13 +269,13 @@ def test_concat(sample_yaml: Path, sample_df: DataFrame) -> None: pytest.param( Timestamp("2050-01-01", tz="UTC"), Timestamp("2050-12-31", tz="UTC"), - pytest.raises(ValueError, match=r"DataFrame is empty after reshaping."), + nullcontext(), id="tz_aware_future_range_no_data", ), pytest.param( Timestamp("1990-01-01", tz="America/New_York"), Timestamp("1990-12-31", tz="America/New_York"), - pytest.raises(ValueError, match=r"DataFrame is empty after reshaping."), + nullcontext(), id="tz_aware_past_range_no_data", ), ], diff --git a/tests/test_filtering_utils.py b/tests/test_filtering_utils.py index ceb0940..faed6f0 100644 --- a/tests/test_filtering_utils.py +++ b/tests/test_filtering_utils.py @@ -13,7 +13,7 @@ filter_by_annotator, filter_by_freq, filter_by_label, - filter_by_score, + filter_by_confidence, filter_by_time, filter_strong_detection, find_delimiter, @@ -124,25 +124,27 @@ def test_filter_by_time_valid(sample_df: DataFrame, begin, end): @pytest.mark.parametrize( - "begin, end, expected_msg", + "begin, end", [ pytest.param( Timestamp("2050-01-01", tz="utc"), None, - "No detection found after '2050", id="out_of_range_begin", ), pytest.param( None, Timestamp("1900-01-01", tz="utc"), - "No detection found before '1900", id="out_of_range_end", ), ], ) -def test_filter_by_time_out_of_range(sample_df: DataFrame, begin, end, expected_msg): - with pytest.raises(ValueError, match=expected_msg): - filter_by_time(sample_df, begin=begin, end=end) +def test_filter_by_time_out_of_range( + sample_df: DataFrame, + begin: Timestamp, + end: Timestamp, +) -> None: + df = filter_by_time(sample_df, begin=begin, end=end) + assert df.empty # filter_by_annotator @@ -203,9 +205,9 @@ def test_filter_by_freq_valid(sample_df: DataFrame, f_min, f_max): assert not result.empty if f_min is not None: - assert (result["start_frequency"] >= f_min).all() + assert (result["min_frequency"] >= f_min).all() if f_max is not None: - assert (result["end_frequency"] <= f_max).all() + assert (result["max_frequency"] <= f_max).all() @pytest.mark.parametrize( @@ -230,21 +232,21 @@ def test_filter_by_freq_out_of_range(sample_df: DataFrame, f_min, f_max, expecte filter_by_freq(sample_df, f_min=f_min, f_max=f_max) -# filter_by_score -def test_filter_by_score_valid(sample_df: DataFrame) -> None: - df = filter_by_score(sample_df, 0.5) - assert (df["score"] >= 0.5).all() +# filter_by_confidence +def test_filter_by_confidence_valid(sample_df: DataFrame) -> None: + df = filter_by_confidence(sample_df, 0.5) + assert (df["confidence"] >= 0.5).all() -def test_filter_by_score_no_results(sample_df: DataFrame) -> None: - with pytest.raises(ValueError, match="No detection found with score above 1.0"): - filter_by_score(sample_df, 1.0) +def test_filter_by_confidence_no_results(sample_df: DataFrame) -> None: + df = filter_by_confidence(sample_df, 1) + assert df.empty -def test_filter_by_score_missing_column(sample_df: DataFrame) -> None: - df = sample_df.drop(columns=["score"]) - with pytest.raises(ValueError, match="'score' column not present"): - filter_by_score(df, 0.5) +def test_filter_by_confidence_missing_column(sample_df: DataFrame) -> None: + df = sample_df.drop(columns=["confidence"]) + with pytest.raises(ValueError, match="'confidence' column not present"): + filter_by_confidence(df, 0.5) # filter_weak_strong_detection @@ -282,7 +284,7 @@ def test_get_labels(sample_df: DataFrame) -> None: def test_get_max_freq(sample_df: DataFrame) -> None: - assert get_max_freq(sample_df) == sample_df["end_frequency"].max() + assert get_max_freq(sample_df) == sample_df["max_frequency"].max() def test_get_max_time(sample_df: DataFrame) -> None: @@ -327,14 +329,14 @@ def test_get_timezone_several(sample_df: DataFrame) -> None: "filename": "2025_01_26_06_20_00", "start_time": 0, "end_time": 2, - "start_frequency": 100, - "end_frequency": 200, + "min_frequency": 100, + "max_frequency": 200, "annotation": "annotation", "annotator": "annotator", "start_datetime": Timestamp("2025-01-27 06:00:00.000000+07:00"), "end_datetime": Timestamp("2025-01-27 06:00:00.000000+07:00"), - "is_box": 1, - "score": None, + "type": "WEAK", + "confidence": None, } sample_df = concat( [sample_df, DataFrame([new_row])], @@ -416,14 +418,14 @@ def test_no_timebin_several_tz(sample_df: DataFrame) -> None: "filename": "2025_01_26_06_20_00", "start_time": 0, "end_time": 2, - "start_frequency": 100, - "end_frequency": 200, + "min_frequency": 100, + "max_frequency": 200, "annotation": "annotation", "annotator": "annotator", "start_datetime": Timestamp("2025-01-27 06:00:00.000000+07:00"), "end_datetime": Timestamp("2025-01-27 06:00:00.000000+07:00"), - "is_box": 1, - "score": None, + "type": "WEAK", + "confidence": None, } sample_df = concat( [sample_df, DataFrame([new_row])], @@ -472,8 +474,8 @@ def test_no_timebin_original_timebin(sample_df: DataFrame) -> None: ], "start_time": [0] * 18, "end_time": [60.0] * 18, - "start_frequency": [0] * 18, - "end_frequency": [72_000.0] * 18, + "min_frequency": [0] * 18, + "max_frequency": [72_000.0] * 18, "annotation": [ "lbl1", "lbl2", @@ -538,7 +540,7 @@ def test_simple_reshape_hourly(sample_df: DataFrame) -> None: ) assert not df_out.empty assert all(df_out["end_time"] == 3600.0) - assert df_out["end_frequency"].max() == sample_df["end_frequency"].max() + assert df_out["max_frequency"].max() == sample_df["max_frequency"].max() assert set(df_out["annotation"]) <= set(sample_df["annotation"]) assert set(df_out["annotator"]) <= set(sample_df["annotator"]) @@ -581,10 +583,12 @@ def test_empty_result_when_no_matching(sample_df: DataFrame) -> None: timestamp_wav = to_datetime( sample_df["filename"], format="%Y_%m_%d_%H_%M_%S" ).dt.tz_localize(tz) - with pytest.raises(ValueError, match="DataFrame is empty"): - reshape_timebin( - DataFrame(), timestamp_audio=timestamp_wav, timebin_new=Timedelta(hours=1) - ) + df = reshape_timebin( + DataFrame(), + timestamp_audio=timestamp_wav, + timebin_new=Timedelta(hours=1), + ) + assert df.empty # %% ensure_no_invalid diff --git a/tests/test_glider_utils.py b/tests/test_glider_utils.py index d0247c5..0886fee 100644 --- a/tests/test_glider_utils.py +++ b/tests/test_glider_utils.py @@ -35,7 +35,7 @@ def nav_df() -> DataFrame: def df_detections(nav_df: DataFrame) -> DataFrame: return DataFrame({ "annotation": ["whale", "whale", "dolphin"], - "is_box": [0, 0, 0], + "type": [0, 0, 0], "start_datetime": nav_df["Timestamp"][:3], }) @@ -55,8 +55,8 @@ def test_get_position_from_timestamp(nav_df: DataFrame) -> None: def test_plot_detections_with_nav_data( - df_detections: DataFrame, - nav_df: DataFrame, + df_detections: DataFrame, + nav_df: DataFrame, ) -> None: plot_detections_with_nav_data( df=df_detections, @@ -67,7 +67,9 @@ def test_plot_detections_with_nav_data( def test_load_glider_nav() -> None: - input_dir = Path(__file__).parent.parent / "user_case" / "resource" / "OHAGEODAMS_nav" + input_dir = ( + Path(__file__).parent.parent / "user_case" / "resource" / "OHAGEODAMS_nav" + ) df = load_glider_nav(input_dir) assert isinstance(df, DataFrame) assert "Lat" in df.columns @@ -85,8 +87,9 @@ def test_load_glider_nav_no_files(tmp_path: Path) -> None: load_glider_nav(tmp_path) -def test_compute_acoustic_diversity(df_detections: DataFrame, - nav_df: DataFrame) -> None: +def test_compute_acoustic_diversity( + df_detections: DataFrame, nav_df: DataFrame +) -> None: time_vector = list(nav_df["Timestamp"]) result = compute_acoustic_diversity(df_detections, nav_df, time_vector) assert isinstance(result, DataFrame) diff --git a/tests/test_pamguard_utils.py b/tests/test_pamguard_utils.py index db7811f..6931543 100644 --- a/tests/test_pamguard_utils.py +++ b/tests/test_pamguard_utils.py @@ -33,41 +33,58 @@ def fake_detection() -> GenericModule: return det -def test_process_binary_basic(fake_audio: AudioData, fake_detection: GenericModule) -> None: - with patch("post_processing.utils.pamguard_utils.load_pamguard_binary_folder") as mock_loader: +def test_process_binary_basic( + fake_audio: AudioData, fake_detection: GenericModule +) -> None: + with patch( + "post_processing.utils.pamguard_utils.load_pamguard_binary_folder" + ) as mock_loader: mock_loader.return_value = ([fake_detection], None, None) df = process_binary(fake_audio, Path("/fake/binary"), "Dataset", "Label") assert isinstance(df, DataFrame) expected_cols = { - "dataset", "filename", "start_time", "end_time", - "start_frequency", "end_frequency", - "annotation", "annotator", - "start_datetime", "end_datetime", "is_box", + "dataset", + "filename", + "start_time", + "end_time", + "min_frequency", + "max_frequency", + "annotation", + "annotator", + "start_datetime", + "end_datetime", + "type", } assert set(df.columns) == expected_cols row = df.iloc[0] assert row["dataset"] == "Dataset" assert row["filename"] == "fake.wav" - assert row["start_frequency"] == 1000 - assert row["end_frequency"] == 5000 + assert row["min_frequency"] == 1000 + assert row["max_frequency"] == 5000 assert row["annotation"] == "Label" - assert row["is_box"] + assert row["type"] def test_process_binary_no_detections(fake_audio: AudioData) -> None: - with patch("post_processing.utils.pamguard_utils.load_pamguard_binary_folder") as mock_loader: + with patch( + "post_processing.utils.pamguard_utils.load_pamguard_binary_folder" + ) as mock_loader: mock_loader.return_value = ([], None, None) df = process_binary(fake_audio, Path("/fake/binary"), "Dataset", "Label") assert df.empty -def test_process_binary_detection_outside_audio(fake_audio: AudioData, fake_detection: GenericModule) -> None: +def test_process_binary_detection_outside_audio( + fake_audio: AudioData, fake_detection: GenericModule +) -> None: fake_detection.date = "2025-05-28T23:59:00+0000" - with patch("post_processing.utils.pamguard_utils.load_pamguard_binary_folder") as mock_loader: + with patch( + "post_processing.utils.pamguard_utils.load_pamguard_binary_folder" + ) as mock_loader: mock_loader.return_value = ([fake_detection], None, None) with pytest.raises(AttributeError): process_binary(fake_audio, Path("/fake/binary"), "Dataset", "Label")