Skip to content

Commit 1ac64e5

Browse files
authored
Merge pull request #4162 from CliMA/zs/restart
add an option for reproducible restart
2 parents b595daf + 11519ea commit 1ac64e5

File tree

7 files changed

+30
-10
lines changed

7 files changed

+30
-10
lines changed

NEWS.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ ClimaAtmos.jl Release Notes
44
main
55
-------
66

7+
PR [#4162](https://github.com/CliMA/ClimaAtmos.jl/pull/4162) adds an option for
8+
reproducible restart. It is set to false by default. This shouldn't affect restart
9+
in the coupler as the coupler save the cache for restarting.
10+
711
PR [#4021](https://github.com/CliMA/ClimaAtmos.jl/pull/4021) uses ClimaCore
812
convenience constructors to create spaces without an AtmosConfig.
913

config/default_configs/default_config.yml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,14 @@ deep_atmosphere:
335335
help: "If true, use deep atmosphere equations and metric terms, otherwise assume columns are cylindrical (shallow atmosphere) [`true` (default), `false`]"
336336
value: true
337337
restart_file:
338-
help: "Path to HDF5 file to use as simulation starting point"
338+
help: "Path to HDF5 file to use as simulation starting point. Note that the simulation can only be restarted in a reproducible way when `reproducible_restart` is true."
339339
value: ~
340340
detect_restart_file:
341341
help: "When true, try finding a restart file and use it to restart the simulation. Only works with ActiveLink."
342342
value: false
343+
reproducible_restart:
344+
help: "If true, the simulation is reproducible when restarting from a restart file. Disable this option when running production runs."
345+
value: false
343346
prescribed_aerosols:
344347
help: "Which aerosols to add. List of keys from the data file (e.g., CB1, CB2)."
345348
value: []

docs/src/restarts.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,12 @@ particularly useful for
2626

2727
!!! note
2828

29-
The simulation can only be restarted in a reproducible way when using `GridScaleCloud`
30-
for `cloud_model`.
29+
By default, the simulation cannot be restarted in a reproducible way. To
30+
enable reproducible restarts, you need to set `reproducible_restart` to `true`.
31+
When `reproducible_restart` is true, `ClimaAtmos` recalculates the grid_scale
32+
cloud fraction and uses it in the buoyancy gradient calculation to ensure deterministic
33+
behavior across restarts. We recommend disabling this option for production runs.
34+
3135

3236
### How Restarts Work
3337

src/cache/precomputed_quantities.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -533,9 +533,9 @@ NVTX.@annotate function set_explicit_precomputed_quantities!(Y, p, t)
533533
# depends on the mixing length, which depends on the buoyancy gradient.
534534
# We break this circular dependency by using cloud fraction from the previous time step in the
535535
# buoyancy gradient calculation. This breaks reproducible restart in general,
536-
# but we support reproducible restart with grid-scale cloud by recalculating the cloud fraction here.
537-
if cloud_model isa GridScaleCloud
538-
set_cloud_fraction!(Y, p, moisture_model, cloud_model)
536+
# but we support reproducible restart by recalculating the cloud fraction with GridScaleCloud here.
537+
if p.atmos.numerics.reproducible_restart isa ReproducibleRestart
538+
set_cloud_fraction!(Y, p, moisture_model, GridScaleCloud())
539539
end
540540

541541
if turbconv_model isa PrognosticEDMFX

src/solver/type_getters.jl

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,12 @@ function get_scale_blending_method(parsed_args)
188188
end
189189

190190
function get_numerics(parsed_args, FT)
191-
test_dycore =
191+
test_dycore_consistency =
192192
parsed_args["test_dycore_consistency"] ? TestDycoreConsistency() :
193193
nothing
194+
reproducible_restart =
195+
parsed_args["reproducible_restart"] ? ReproducibleRestart() :
196+
nothing
194197

195198
energy_q_tot_upwinding = Val(Symbol(parsed_args["energy_q_tot_upwinding"]))
196199
tracer_upwinding = Val(Symbol(parsed_args["tracer_upwinding"]))
@@ -227,7 +230,8 @@ function get_numerics(parsed_args, FT)
227230
edmfx_sgsflux_upwinding,
228231
edmfx_tracer_upwinding,
229232
limiter,
230-
test_dycore_consistency = test_dycore,
233+
test_dycore_consistency,
234+
reproducible_restart,
231235
diff_mode,
232236
hyperdiff,
233237
)

src/solver/types.jl

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ function get_ρu₃qₜ_surface(flow::ShipwayHill2012VelocityProfile, thermo_par
466466
end
467467

468468
struct TestDycoreConsistency end
469+
struct ReproducibleRestart end
469470

470471
abstract type AbstractTimesteppingMode end
471472
struct Explicit <: AbstractTimesteppingMode end
@@ -478,7 +479,7 @@ struct SmoothMinimumBlending <: AbstractScaleBlendingMethod end
478479
struct HardMinimumBlending <: AbstractScaleBlendingMethod end
479480
Base.broadcastable(x::AbstractScaleBlendingMethod) = tuple(x)
480481

481-
Base.@kwdef struct AtmosNumerics{EN_UP, TR_UP, ED_UP, SG_UP, ED_TR_UP, TDC, LIM, DM, HD}
482+
Base.@kwdef struct AtmosNumerics{EN_UP, TR_UP, ED_UP, SG_UP, ED_TR_UP, TDC, RR, LIM, DM, HD}
482483

483484
"""Enable specific upwinding schemes for specific equations"""
484485
energy_q_tot_upwinding::EN_UP
@@ -489,6 +490,8 @@ Base.@kwdef struct AtmosNumerics{EN_UP, TR_UP, ED_UP, SG_UP, ED_TR_UP, TDC, LIM,
489490

490491
"""Add NaNs to certain equations to track down problems"""
491492
test_dycore_consistency::TDC
493+
"""Whether the simulation is reproducible when restarting from a restart file"""
494+
reproducible_restart::RR
492495

493496
limiter::LIM
494497

@@ -941,6 +944,7 @@ const _DEFAULT_ATMOS_MODEL_KWARGS = (
941944
edmfx_sgsflux_upwinding = Val(:none),
942945
edmfx_tracer_upwinding = Val(:first_order),
943946
test_dycore_consistency = nothing,
947+
reproducible_restart = nothing,
944948
limiter = nothing,
945949
diff_mode = Explicit(),
946950
hyperdiff = nothing,

test/restart.jl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,7 @@ if MANYTESTS
389389
job_id = "$(configuration)_$(moisture)_$(precip)_$(topography)_$(radiation)_$(turbconv_mode)"
390390
test_dict = Dict(
391391
"test_dycore_consistency" => true, # We will add NaNs to the cache, just to make sure
392+
"reproducible_restart" => true,
392393
"check_nan_every" => 3,
393394
"log_progress" => false,
394395
"moist" => moisture,
@@ -445,6 +446,7 @@ else
445446
"h_elem" => 4,
446447
"z_elem" => 15,
447448
"test_dycore_consistency" => true, # We will add NaNs to the cache, just to make sure
449+
"reproducible_restart" => true,
448450
"check_nan_every" => 3,
449451
"log_progress" => false,
450452
"dt" => "1secs",
@@ -455,7 +457,6 @@ else
455457
"output_dir" => joinpath(amip_output_loc, amip_job_id),
456458
"dt_cloud_fraction" => "1secs",
457459
"rad" => "allskywithclear",
458-
"cloud_model" => "grid_scale",
459460
"toml" => [
460461
joinpath(@__DIR__, "../toml/longrun_aquaplanet_diagedmf.toml"),
461462
],

0 commit comments

Comments
 (0)