From e47d269448d7515bb96d7877deb2d66cb925662d Mon Sep 17 00:00:00 2001
From: Bernardo Braga <bernbraga@gmail.com>
Date: Tue, 6 Jan 2026 10:48:03 +0000
Subject: [PATCH 1/2] ensembling predictions made easy

---
 numerai_tools/scoring.py | 51 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

diff --git a/numerai_tools/scoring.py b/numerai_tools/scoring.py
index 1de6d9b..dd75041 100644
--- a/numerai_tools/scoring.py
+++ b/numerai_tools/scoring.py
@@ -445,6 +445,57 @@ def tie_kept_rank__gaussianize__pow_1_5(df: pd.DataFrame) -> pd.DataFrame:
     return power(gaussian(tie_kept_rank(df)), 1.5)
 
 
+def ensemble_predictions(predictions: pd.DataFrame) -> pd.Series:
+    """Ensemble multiple prediction columns into a single prediction using equal weighting.
+    
+    This function ensures all predictors contribute equally by normalizing each prediction
+    through tie-kept ranking and gaussianization before taking the mean. The ensemble result 
+    is then normalized using the same transformation pipeline (rank + gaussianize).
+    
+    The output is ready for further processing such as neutralization or scoring with 
+    numerai_corr (which applies the power 1.5 transformation).
+    
+    This approach guarantees that:
+    1. All predictions are on the same scale before ensembling
+    2. Each predictor has equal weight regardless of its original distribution
+    3. The output is properly normalized (mean≈0, std≈1) and ready for downstream operations
+    
+    Note: This function operates on a single time period (e.g., single era). When working
+    with multiple eras, apply this function separately to each era using groupby.
+    
+    Arguments:
+        predictions: pd.DataFrame - DataFrame where each column is a prediction to ensemble.
+                                   All columns should have the same index.
+    
+    Returns:
+        pd.Series - The ensembled prediction as a Series with the same index as input,
+                   normalized through tie-kept ranking and gaussianization.
+    
+    Example:
+        >>> # Ensemble within each era
+        >>> ensemble = df.groupby('era', group_keys=False).apply(
+        ...     lambda era_df: ensemble_predictions(era_df[pred_cols])
+        ... )
+        
+        >>> # Simple ensemble without era grouping
+        >>> ensemble = ensemble_predictions(predictions_df)
+    """
+    assert isinstance(predictions, pd.DataFrame), "predictions must be a DataFrame"
+    assert len(predictions.columns) > 0, "predictions must have at least one column"
+    assert not predictions.isna().any().any(), "predictions contain NaNs"
+    
+    # Normalize each prediction to the same scale (mean≈0, std≈1)
+    normalized_preds = gaussian(tie_kept_rank(predictions))
+    
+    # Take the mean of normalized predictions
+    ensemble = normalized_preds.mean(axis=1)
+    
+    # Normalize the ensemble result (rank + gaussianize produces mean≈0, std≈1)
+    ensemble_normalized = gaussian(tie_kept_rank(ensemble.to_frame()))
+    
+    return ensemble_normalized.iloc[:, 0]
+
+
 def tie_kept_rank__gaussianize__neutralize__variance_normalize(
     df: pd.DataFrame, neutralizers: pd.DataFrame
 ) -> pd.DataFrame:

From f9becadf1fe5388b3c26724c92f0e55576f7ead7 Mon Sep 17 00:00:00 2001
From: Bernardo Braga <bernbraga@gmail.com>
Date: Tue, 6 Jan 2026 11:00:08 +0000
Subject: [PATCH 2/2] ensembling predictions with .mean() or .dot(weights)

---
 numerai_tools/scoring.py | 46 +++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/numerai_tools/scoring.py b/numerai_tools/scoring.py
index dd75041..49de5fe 100644
--- a/numerai_tools/scoring.py
+++ b/numerai_tools/scoring.py
@@ -445,19 +445,22 @@ def tie_kept_rank__gaussianize__pow_1_5(df: pd.DataFrame) -> pd.DataFrame:
     return power(gaussian(tie_kept_rank(df)), 1.5)
 
 
-def ensemble_predictions(predictions: pd.DataFrame) -> pd.Series:
-    """Ensemble multiple prediction columns into a single prediction using equal weighting.
+def ensemble_predictions(
+    predictions: pd.DataFrame, weights: Optional[List[float]] = None
+) -> pd.Series:
+    """Ensemble multiple prediction columns into a single prediction.
     
-    This function ensures all predictors contribute equally by normalizing each prediction
-    through tie-kept ranking and gaussianization before taking the mean. The ensemble result 
-    is then normalized using the same transformation pipeline (rank + gaussianize).
+    This function ensures all predictors are normalized through tie-kept ranking and 
+    gaussianization before aggregation. Supports both equal weighting (default) and 
+    custom weights. The ensemble result is then normalized using the same transformation 
+    pipeline (rank + gaussianize).
     
     The output is ready for further processing such as neutralization or scoring with 
-    numerai_corr (which applies the power 1.5 transformation).
+    numerai_corr.
     
     This approach guarantees that:
     1. All predictions are on the same scale before ensembling
-    2. Each predictor has equal weight regardless of its original distribution
+    2. Each predictor's contribution is proportional to its weight
     3. The output is properly normalized (mean≈0, std≈1) and ready for downstream operations
     
     Note: This function operates on a single time period (e.g., single era). When working
@@ -466,19 +469,26 @@ def ensemble_predictions(predictions: pd.DataFrame) -> pd.Series:
     Arguments:
         predictions: pd.DataFrame - DataFrame where each column is a prediction to ensemble.
                                    All columns should have the same index.
+        weights: Optional[List[float]] - Optional weights for each prediction column.
+                                        Must have same length as number of columns.
+                                        If None, uses equal weights (simple mean).
+                                        Weights will be normalized to sum to 1.
     
     Returns:
         pd.Series - The ensembled prediction as a Series with the same index as input,
                    normalized through tie-kept ranking and gaussianization.
     
     Example:
-        >>> # Ensemble within each era
+        >>> # Equal weighting (simple mean)
         >>> ensemble = df.groupby('era', group_keys=False).apply(
         ...     lambda era_df: ensemble_predictions(era_df[pred_cols])
         ... )
         
-        >>> # Simple ensemble without era grouping
-        >>> ensemble = ensemble_predictions(predictions_df)
+        >>> # Custom weights
+        >>> weights = [0.5, 0.3, 0.2]  # Will be normalized to sum to 1
+        >>> ensemble = df.groupby('era', group_keys=False).apply(
+        ...     lambda era_df: ensemble_predictions(era_df[pred_cols], weights)
+        ... )
     """
     assert isinstance(predictions, pd.DataFrame), "predictions must be a DataFrame"
     assert len(predictions.columns) > 0, "predictions must have at least one column"
@@ -487,8 +497,20 @@ def ensemble_predictions(predictions: pd.DataFrame) -> pd.Series:
     # Normalize each prediction to the same scale (mean≈0, std≈1)
     normalized_preds = gaussian(tie_kept_rank(predictions))
     
-    # Take the mean of normalized predictions
-    ensemble = normalized_preds.mean(axis=1)
+    # Aggregate predictions
+    if weights is None:
+        # Equal weighting - simple mean
+        ensemble = normalized_preds.mean(axis=1)
+    else:
+        # Custom weights
+        assert len(weights) == len(
+            predictions.columns
+        ), f"weights length ({len(weights)}) must match number of columns ({len(predictions.columns)})"
+        # Normalize weights to sum to 1
+        weights_array = np.array(weights)
+        weights_normalized = weights_array / weights_array.sum()
+        # Weighted average using dot product
+        ensemble = normalized_preds.dot(weights_normalized)
     
     # Normalize the ensemble result (rank + gaussianize produces mean≈0, std≈1)
     ensemble_normalized = gaussian(tie_kept_rank(ensemble.to_frame()))