Merge branch 'master' into val_date_issue

jbogaardt · web-flow · commit b8eb1caddb40 · 2025-11-04T15:46:55.000-07:00
diff --git a/chainladder/core/base.py b/chainladder/core/base.py
@@ -290,24 +290,24 @@ def _get_date_axes(
             end=origin_date.max(),
             freq=origin_grain
         ).to_timestamp(how="s")
-
+        
         development_range: DatetimeIndex = pd.period_range(
             start=development_date.min(),
             end=development_date.max(),
             freq=development_grain,
         ).to_timestamp(how="e")
-
+        
         # If the development is semi-annual, we need to adjust further because of "2Q-DEC".
-        if development_grain == "2Q-DEC":
+        if development_grain[:2] == "2Q":
             from pandas.tseries.offsets import DateOffset
 
             development_range += DateOffset(months=-3)
-
+        
         c = pd.DataFrame(
             TriangleBase._cartesian_product(origin_range, development_range),
             columns=["__origin__", "__development__"],
         )
-
+        
         return c[c["__development__"] > c["__origin__"]]
 
     @property
@@ -407,25 +407,17 @@ def _get_grain(
             Set to False if you want to treat December as period end. Set
             to True if you want it inferred from the data.
         """
-        months: np.ndarray = dates.dt.month.unique()
+        months: np.ndarray = (dates.dt.year * 12 + dates.dt.month).unique()
         diffs: np.ndarray = np.diff(np.sort(months))
-        if len(dates.unique()) == 1:
+        if np.all(np.mod(diffs,12) == 0):
             grain = (
                 "Y"
                 if version.Version(pd.__version__) >= version.Version("2.2.0")
                 else "A"
             )
-
-        elif len(months) == 1:
-            grain = (
-                "Y"
-                if version.Version(pd.__version__) >= version.Version("2.2.0")
-                else "A"
-            )
-
-        elif np.all(diffs == 6):
+        elif np.all(np.mod(diffs,6) == 0):
             grain = "2Q"
-        elif np.all(diffs == 3):
+        elif np.all(np.mod(diffs,3) == 0):
             grain = "Q"
         else:
             grain = "M"
diff --git a/chainladder/core/tests/test_correlation.py b/chainladder/core/tests/test_correlation.py
@@ -1,9 +1,20 @@
 import chainladder as cl
+import pytest
 
 raa = cl.load_sample("RAA")
 
-def test_val_corr():
+def test_val_corr_total_true():
     assert raa.valuation_correlation(p_critical=0.5, total=True)
 
+def test_val_corr_total_false():
+    assert raa.valuation_correlation(p_critical=0.5, total=False)
+
 def test_dev_corr():
     assert raa.development_correlation(p_critical=0.5)
+
+def test_dev_corr_sparse():
+    assert raa.set_backend('sparse').development_correlation(p_critical=0.5)
+
+def test_validate_critical():
+    with pytest.raises(ValueError):
+        raa.valuation_correlation(p_critical=1.5, total=True)
diff --git a/chainladder/core/tests/test_triangle.py b/chainladder/core/tests/test_triangle.py
@@ -866,3 +866,36 @@ def test_single_valuation_date_preserves_exact_date():
     assert triangle.valuation_date == pd.Timestamp('2025-10-31 23:59:59.999999999')
     assert triangle.development_grain == 'M'
     assert int(triangle.valuation_date.strftime('%Y%m')) == 202510
+def test_OXDX_triangle():
+    
+    for x in [12,6,3,1]:
+        for y in [i for i in [12,6,3,1] if i <= x]:
+            first_orig = '2020-01-01'
+            width = int(x / y) + 1
+            dev_series = (pd.date_range(start=first_orig,periods = width, freq = str(y) + 'ME') + pd.DateOffset(months=y-1)).to_series()
+            tri_df = pd.DataFrame({
+                'origin_date': pd.concat([pd.to_datetime([first_orig] * (width)).to_series(), (pd.to_datetime([first_orig]) + pd.DateOffset(months=x)).to_series()]).to_list(),
+                'development_date': pd.concat([dev_series,dev_series.iloc[[0]] + pd.DateOffset(months=x)]).to_list(),
+                'value': list(range(1,width + 2))
+            })
+            for i in range(12):
+                for j in range(y):
+                    test_data = tri_df.copy()
+                    test_data['origin_date'] += pd.DateOffset(months=i)        
+                    test_data['development_date'] += pd.DateOffset(months=i-j)
+                    tri = cl.Triangle(
+                        test_data, 
+                        origin='origin_date', 
+                        development='development_date', 
+                        columns='value', 
+                        cumulative=True
+                    )
+                    assert tri.shape == (1,1,2,width)
+                    assert tri.sum().sum() == tri_df['value'].sum()
+                    assert np.all(tri.development == [y-j + x * y for x in range(width)])
+                    #there's a known bug with origin that displays incorrect year when origin doesn't start on 1/1
+                    #if x == 12:
+                        #assert np.all(tri.origin == ['2020','2021'])
+                    #elif x in [6,3]:
+                        #assert np.all(tri.origin.strftime('%Y') == pd.to_datetime(tri.odims).strftime('%Y'))
+                        #assert np.all(tri.origin.strftime('%q').values.astype(float) == np.ceil((pd.to_datetime(tri.odims).strftime('%m').values.astype(int) - 0.5) / 3))
diff --git a/chainladder/core/triangle.py b/chainladder/core/triangle.py
@@ -11,7 +11,7 @@
 from chainladder.utils.sparse import sp
 from chainladder.core.slice import VirtualColumns
 from chainladder.core.correlation import DevelopmentCorrelation, ValuationCorrelation
-from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value
+from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value, to_period
 from chainladder import options
 
 try:
@@ -194,16 +194,12 @@ def __init__(
 
         # Ensure that origin_date values represent the beginning of the period.
         # i.e., 1990 means the start of 1990.
-        origin_date: Series = origin_date.dt.to_period(
-            self.origin_grain
-        ).dt.to_timestamp(how="s")
-
+        origin_date: Series = to_period(origin_date,self.origin_grain).dt.to_timestamp(how="s")
+        
         # Ensure that development_date values represent the end of the period.
         # i.e., 1990 means the end of 1990 assuming annual development periods.
-        development_date: Series = development_date.dt.to_period(
-            self.development_grain
-        ).dt.to_timestamp(how="e")
-
+        development_date: Series = to_period(development_date,self.development_grain).dt.to_timestamp(how="e")
+        
         # Aggregate dates to the origin/development grains.
         data_agg: DataFrame = self._aggregate_data(
             data=data,
@@ -239,7 +235,7 @@ def __init__(
         self.vdims = np.array(columns)
         self.odims, orig_idx = self._set_odims(data_agg, date_axes)
         self.ddims, dev_idx = self._set_ddims(data_agg, date_axes)
-        
+
         # Set remaining triangle properties.
         val_date: Timestamp = data_agg["__development__"].max()
         val_date = val_date.compute() if hasattr(val_date, "compute") else val_date
diff --git a/chainladder/utils/utility_functions.py b/chainladder/utils/utility_functions.py
@@ -637,6 +637,11 @@ def minimum(x1, x2):
 def maximum(x1, x2):
     return x1.maximum(x2)
 
+def to_period(dateseries: pd.Series, freq:str):
+    if freq[:2] != '2Q':
+        return dateseries.dt.to_period(freq)
+    else:
+        return dateseries.where(dateseries.dt.to_period(freq).dt.strftime('%q').isin(['1','3']),dateseries.dt.date + pd.DateOffset(months=-3)).dt.to_period(freq)
 
 class PatsyFormula(BaseEstimator, TransformerMixin):
     """A sklearn-style Transformer for patsy formulas.