Skip to content

Commit b8eb1ca

Browse files
authored
Merge branch 'master' into val_date_issue
2 parents 9c73801 + 4eb1671 commit b8eb1ca

File tree

5 files changed

+65
-28
lines changed

5 files changed

+65
-28
lines changed

chainladder/core/base.py

Lines changed: 9 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -290,24 +290,24 @@ def _get_date_axes(
290290
end=origin_date.max(),
291291
freq=origin_grain
292292
).to_timestamp(how="s")
293-
293+
294294
development_range: DatetimeIndex = pd.period_range(
295295
start=development_date.min(),
296296
end=development_date.max(),
297297
freq=development_grain,
298298
).to_timestamp(how="e")
299-
299+
300300
# If the development is semi-annual, we need to adjust further because of "2Q-DEC".
301-
if development_grain == "2Q-DEC":
301+
if development_grain[:2] == "2Q":
302302
from pandas.tseries.offsets import DateOffset
303303

304304
development_range += DateOffset(months=-3)
305-
305+
306306
c = pd.DataFrame(
307307
TriangleBase._cartesian_product(origin_range, development_range),
308308
columns=["__origin__", "__development__"],
309309
)
310-
310+
311311
return c[c["__development__"] > c["__origin__"]]
312312

313313
@property
@@ -407,25 +407,17 @@ def _get_grain(
407407
Set to False if you want to treat December as period end. Set
408408
to True if you want it inferred from the data.
409409
"""
410-
months: np.ndarray = dates.dt.month.unique()
410+
months: np.ndarray = (dates.dt.year * 12 + dates.dt.month).unique()
411411
diffs: np.ndarray = np.diff(np.sort(months))
412-
if len(dates.unique()) == 1:
412+
if np.all(np.mod(diffs,12) == 0):
413413
grain = (
414414
"Y"
415415
if version.Version(pd.__version__) >= version.Version("2.2.0")
416416
else "A"
417417
)
418-
419-
elif len(months) == 1:
420-
grain = (
421-
"Y"
422-
if version.Version(pd.__version__) >= version.Version("2.2.0")
423-
else "A"
424-
)
425-
426-
elif np.all(diffs == 6):
418+
elif np.all(np.mod(diffs,6) == 0):
427419
grain = "2Q"
428-
elif np.all(diffs == 3):
420+
elif np.all(np.mod(diffs,3) == 0):
429421
grain = "Q"
430422
else:
431423
grain = "M"
Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
import chainladder as cl
2+
import pytest
23

34
raa = cl.load_sample("RAA")
45

5-
def test_val_corr():
6+
def test_val_corr_total_true():
67
assert raa.valuation_correlation(p_critical=0.5, total=True)
78

9+
def test_val_corr_total_false():
10+
assert raa.valuation_correlation(p_critical=0.5, total=False)
11+
812
def test_dev_corr():
913
assert raa.development_correlation(p_critical=0.5)
14+
15+
def test_dev_corr_sparse():
16+
assert raa.set_backend('sparse').development_correlation(p_critical=0.5)
17+
18+
def test_validate_critical():
19+
with pytest.raises(ValueError):
20+
raa.valuation_correlation(p_critical=1.5, total=True)

chainladder/core/tests/test_triangle.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -866,3 +866,36 @@ def test_single_valuation_date_preserves_exact_date():
866866
assert triangle.valuation_date == pd.Timestamp('2025-10-31 23:59:59.999999999')
867867
assert triangle.development_grain == 'M'
868868
assert int(triangle.valuation_date.strftime('%Y%m')) == 202510
869+
def test_OXDX_triangle():
870+
871+
for x in [12,6,3,1]:
872+
for y in [i for i in [12,6,3,1] if i <= x]:
873+
first_orig = '2020-01-01'
874+
width = int(x / y) + 1
875+
dev_series = (pd.date_range(start=first_orig,periods = width, freq = str(y) + 'ME') + pd.DateOffset(months=y-1)).to_series()
876+
tri_df = pd.DataFrame({
877+
'origin_date': pd.concat([pd.to_datetime([first_orig] * (width)).to_series(), (pd.to_datetime([first_orig]) + pd.DateOffset(months=x)).to_series()]).to_list(),
878+
'development_date': pd.concat([dev_series,dev_series.iloc[[0]] + pd.DateOffset(months=x)]).to_list(),
879+
'value': list(range(1,width + 2))
880+
})
881+
for i in range(12):
882+
for j in range(y):
883+
test_data = tri_df.copy()
884+
test_data['origin_date'] += pd.DateOffset(months=i)
885+
test_data['development_date'] += pd.DateOffset(months=i-j)
886+
tri = cl.Triangle(
887+
test_data,
888+
origin='origin_date',
889+
development='development_date',
890+
columns='value',
891+
cumulative=True
892+
)
893+
assert tri.shape == (1,1,2,width)
894+
assert tri.sum().sum() == tri_df['value'].sum()
895+
assert np.all(tri.development == [y-j + x * y for x in range(width)])
896+
#there's a known bug with origin that displays incorrect year when origin doesn't start on 1/1
897+
#if x == 12:
898+
#assert np.all(tri.origin == ['2020','2021'])
899+
#elif x in [6,3]:
900+
#assert np.all(tri.origin.strftime('%Y') == pd.to_datetime(tri.odims).strftime('%Y'))
901+
#assert np.all(tri.origin.strftime('%q').values.astype(float) == np.ceil((pd.to_datetime(tri.odims).strftime('%m').values.astype(int) - 0.5) / 3))

chainladder/core/triangle.py

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from chainladder.utils.sparse import sp
1212
from chainladder.core.slice import VirtualColumns
1313
from chainladder.core.correlation import DevelopmentCorrelation, ValuationCorrelation
14-
from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value
14+
from chainladder.utils.utility_functions import concat, num_to_nan, num_to_value, to_period
1515
from chainladder import options
1616

1717
try:
@@ -194,16 +194,12 @@ def __init__(
194194

195195
# Ensure that origin_date values represent the beginning of the period.
196196
# i.e., 1990 means the start of 1990.
197-
origin_date: Series = origin_date.dt.to_period(
198-
self.origin_grain
199-
).dt.to_timestamp(how="s")
200-
197+
origin_date: Series = to_period(origin_date,self.origin_grain).dt.to_timestamp(how="s")
198+
201199
# Ensure that development_date values represent the end of the period.
202200
# i.e., 1990 means the end of 1990 assuming annual development periods.
203-
development_date: Series = development_date.dt.to_period(
204-
self.development_grain
205-
).dt.to_timestamp(how="e")
206-
201+
development_date: Series = to_period(development_date,self.development_grain).dt.to_timestamp(how="e")
202+
207203
# Aggregate dates to the origin/development grains.
208204
data_agg: DataFrame = self._aggregate_data(
209205
data=data,
@@ -239,7 +235,7 @@ def __init__(
239235
self.vdims = np.array(columns)
240236
self.odims, orig_idx = self._set_odims(data_agg, date_axes)
241237
self.ddims, dev_idx = self._set_ddims(data_agg, date_axes)
242-
238+
243239
# Set remaining triangle properties.
244240
val_date: Timestamp = data_agg["__development__"].max()
245241
val_date = val_date.compute() if hasattr(val_date, "compute") else val_date

chainladder/utils/utility_functions.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,11 @@ def minimum(x1, x2):
637637
def maximum(x1, x2):
638638
return x1.maximum(x2)
639639

640+
def to_period(dateseries: pd.Series, freq:str):
641+
if freq[:2] != '2Q':
642+
return dateseries.dt.to_period(freq)
643+
else:
644+
return dateseries.where(dateseries.dt.to_period(freq).dt.strftime('%q').isin(['1','3']),dateseries.dt.date + pd.DateOffset(months=-3)).dt.to_period(freq)
640645

641646
class PatsyFormula(BaseEstimator, TransformerMixin):
642647
"""A sklearn-style Transformer for patsy formulas.

0 commit comments

Comments
 (0)