diff --git a/src/diagnostics/core_diagnostics.jl b/src/diagnostics/core_diagnostics.jl index 3942071af0c..60ea0eccb5e 100644 --- a/src/diagnostics/core_diagnostics.jl +++ b/src/diagnostics/core_diagnostics.jl @@ -1647,24 +1647,32 @@ add_diagnostic_variable!( function compute_mslp!(out, state, cache, time) thermo_params = CAP.thermodynamics_params(cache.params) g = TD.Parameters.grav(thermo_params) - R_m_surf = Fields.level( - lazy.(TD.gas_constant_air.(thermo_params, cache.precomputed.ᶜts)), - 1, - ) + ts_level = Fields.level(cache.precomputed.ᶜts, 1) + R_m_surf = @. lazy(TD.gas_constant_air(thermo_params, ts_level)) - # get pressure, temperature, and height at the lowest atmospheric level p_level = Fields.level(cache.precomputed.ᶜp, 1) - t_level = Fields.level( - lazy.(TD.air_temperature.(thermo_params, cache.precomputed.ᶜts)), - 1, - ) + t_level = @. lazy(TD.air_temperature(thermo_params, ts_level)) z_level = Fields.level(Fields.coordinate_field(state.c.ρ).z, 1) - # compute sea level pressure using the hypsometric equation + # Reduce to mean sea level using hypsometric formulation with lapse rate adjustment + # Using constant lapse rate Γ = 6.5 K/km, with virtual temperature + # represented via R_m_surf. This reduces biases over + # very cold or very warm high-topography regions. + FT = Spaces.undertype(Fields.axes(state.c.ρ)) + Γ = FT(6.5e-3) # K m^-1 + + # p_msl = p_z0 * [1 + Γ * z / T_z0]^( g / (R_m Γ)) + # where: + # - p_z0 pressure at the lowest model level + # - T_z0 air temperature at the lowest model level + # - R_m moist-air gas constant at the surface (R_m_surf), which + # accounts for virtual-temperature effects in the exponent + # - Γ constant lapse rate (6.5 K/km here) + if isnothing(out) - return @. p_level * exp(g * z_level / (R_m_surf * t_level)) + return p_level .* (1 .+ Γ .* z_level ./ t_level) .^ (g / Γ ./ R_m_surf) else - @. out = p_level * exp(g * z_level / (R_m_surf * t_level)) + out .= p_level .* (1 .+ Γ .* z_level ./ t_level) .^ (g / Γ ./ R_m_surf) end end @@ -1673,7 +1681,7 @@ add_diagnostic_variable!( long_name = "Mean Sea Level Pressure", standard_name = "mean_sea_level_pressure", units = "Pa", - comments = "Mean sea level pressure computed from the hypsometric equation", + comments = "Mean sea level pressure computed using a lapse-rate-dependent hypsometric reduction (ERA-style; Γ=6.5 K/km with virtual temperature via moist gas constant).", compute! = compute_mslp!, ) diff --git a/src/initial_conditions/initial_conditions.jl b/src/initial_conditions/initial_conditions.jl index b903b2de5e2..658acaffc5f 100644 --- a/src/initial_conditions/initial_conditions.jl +++ b/src/initial_conditions/initial_conditions.jl @@ -447,73 +447,233 @@ function overwrite_initial_conditions!( ) end +""" + correct_surface_pressure_for_topography!( + p_sfc, + file_path, + face_space, + Y, + ᶜT, + ᶜq_tot, + thermo_params, + regridder_kwargs; + surface_altitude_var = "z_sfc", + ) + +Adjusts the surface pressure field `p_sfc` to account for mismatches between +ERA5 (file) surface altitude and the model orography when specifying pressure. + + Δz = z_model_surface - z_sfc + +and applies a hydrostatic correction at the surface using the local moist gas +constant and temperature at the surface: + + p_sfc .= p_sfc .* exp.(-Δz * g ./ (R_m_sfc .* T_sfc)) + +where: +- `g` is gravitational acceleration from `thermo_params` +- `R_m_sfc` is the moist-air gas constant evaluated from `ᶜq_tot` at the surface +- `T_sfc` is the air temperature from `ᶜT` at the surface + +Returns `true` if the correction is applied; returns `false` if the surface +altitude field cannot be loaded. + +Arguments +- `p_sfc`: face field of surface pressure to be corrected (modified in-place) +- `file_path`: path to the ERA5-derived initialization NetCDF file +- `face_space`: face space of the model grid (for reading/regridding) +- `Y`: prognostic state, used to obtain model surface height +- `ᶜT`: center field of temperature +- `ᶜq_tot`: center field of total specific humidity +- `thermo_params`: thermodynamics parameter set +- `regridder_kwargs`: keyword arguments forwarded to the regridder +- `surface_altitude_var`: variable name for surface altitude (default `"z_sfc"`) +""" +function correct_surface_pressure_for_topography!( + p_sfc, + file_path, + face_space, + Y, + ᶜT, + ᶜq_tot, + thermo_params, + regridder_kwargs; + surface_altitude_var = "z_sfc", +) + regridder_type = :InterpolationsRegridder + ᶠz_surface = Fields.level( + SpaceVaryingInputs.SpaceVaryingInput( + file_path, + surface_altitude_var, + face_space; + regridder_type, + regridder_kwargs = regridder_kwargs, + ), + Fields.half, + ) + + if ᶠz_surface === nothing + return false + end + + FT = eltype(thermo_params) + grav = thermo_params.grav + + ᶠz_model_surface = Fields.level(Fields.coordinate_field(Y.f).z, Fields.half) + ᶠΔz = zeros(face_space) + @. ᶠΔz = ᶠz_model_surface - ᶠz_surface + + ᶠR_m = ᶠinterp.(TD.gas_constant_air.(thermo_params, TD.PhasePartition.(ᶜq_tot))) + ᶠR_m_sfc = Fields.level(ᶠR_m, Fields.half) + + ᶠT = ᶠinterp.(ᶜT) + ᶠT_sfc = Fields.level(ᶠT, Fields.half) + + @. p_sfc = p_sfc * exp(FT(-1) * ᶠΔz * grav / (ᶠR_m_sfc * ᶠT_sfc)) + + @info "Adjusted surface pressure to account for ERA5/model surface-height differences." + return true +end + # WeatherModel function using the shared implementation +""" + overwrite_initial_conditions!(initial_condition::WeatherModel, Y, thermo_params; use_full_pressure=false) + +Overwrite the prognostic state `Y` with ERA5-derived initial conditions on the model grid. + + +- Derives the model's target vertical levels from `Y.c` (on CPU). +- Obtains the ERA5-derived IC NetCDF path via `weather_model_data_path` (with + any caller-provided kwargs forwarded there), then constructs `SpaceVaryingInput` + fields to regrid ERA5 variables onto the model's center and face spaces. +- Populates the thermodynamic state, density, velocity components, total energy, + and moisture. If EDMF subdomains exist, initializes those as well. + +Pressure initialization (`use_full_pressure`): +- If `use_full_pressure == true` and the IC file contains a 3D pressure field + `p_3d(lon,lat,z)`, then pressure is taken directly from `p_3d` and regridded. +- Otherwise, pressure is obtained by hydrostatic integration starting from the + surface pressure `p(lon,lat)` (broadcast in `z` in the IC file), using the + regridded temperature `t` and specific humidity `q`. If the dataset provides + surface altitude `z_sfc` (derived from ERA5 surface geopotential), the surface + pressure is first corrected for model-versus-ERA5 topographic differences + in order to adjust the reported ERA5 surface pressure. + +Expected variables in the IC file: +- 3D: `u`, `v`, `w`, `t`, `q` (and optionally `p_3d`, cloud water variables) +- 2D broadcast in `z`: `p` (surface pressure), `skt` (skin temperature), and + optionally `z_sfc` (surface altitude) + +Notes: +- When generating 3D ICs (via `to_z_levels(...; interp3d=true)` in + `weather_model_data_path`), the file can include `p_3d` and a `z_physical` + field on the target grid, enabling the full-pressure path described above. +""" function overwrite_initial_conditions!( initial_condition::WeatherModel, Y, - thermo_params, + thermo_params; + use_full_pressure::Bool = false, ) + regridder_type = :InterpolationsRegridder + interpolation_method = Intp.Linear() extrapolation_bc = (Intp.Periodic(), Intp.Flat(), Intp.Flat()) - # Extract face coordinates and compute center midpoints - # Compute target levels on CPU to avoid GPU reductions - z_arr_cpu = Array(Fields.field2array(Fields.coordinate_field(Y.c).z)) - icol = argmin(z_arr_cpu[1, :]) - target_levels = z_arr_cpu[:, icol] + # target_levels defines vertical z for 1D interp, + # if upstream processed ERA5 init file is missing, it is generated with to_z_levels_1d. + z_arr = Array(Fields.field2array(Fields.coordinate_field(Y.c).z)) + z_top = round(maximum(z_arr)) + target_levels = collect(0.0:300.0:z_top) + + @info "Calling weather_model_data_path" ( + start_date = initial_condition.start_date, + era5_dir = initial_condition.era5_initial_condition_dir, + ) file_path = weather_model_data_path( initial_condition.start_date, target_levels, - initial_condition.era5_initial_condition_dir, + initial_condition.era5_initial_condition_dir; ) - regridder_kwargs = (; extrapolation_bc) + regridder_kwargs = (; extrapolation_bc, interpolation_method) + isfile(file_path) || error("$(file_path) is not a file") @info "Overwriting initial conditions with data from file $(file_path)" center_space = Fields.axes(Y.c) face_space = Fields.axes(Y.f) - # Using surface pressure, air temperature and specific humidity - # from the dataset, compute air pressure. - p_sfc = Fields.level( - SpaceVaryingInputs.SpaceVaryingInput( - file_path, - "p", - face_space, - regridder_kwargs = regridder_kwargs, - ), - Fields.half, - ) ᶜT = SpaceVaryingInputs.SpaceVaryingInput( file_path, "t", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ) ᶜq_tot = SpaceVaryingInputs.SpaceVaryingInput( file_path, "q", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ) - # With the known temperature (ᶜT) and moisture (ᶜq_tot) profile, - # recompute the pressure levels assuming hydrostatic balance is maintained. - # Uses the ClimaCore `column_integral_indefinite!` function to solve - # ∂(ln𝑝)/∂z = -g/(Rₘ(q)T), where - # p is the local pressure - # g is the gravitational constant - # q is the specific humidity - # Rₘ is the gas constant for moist air - # T is the air temperature - # p is then updated with the integral result, given p_sfc, - # following which the thermodynamic state is constructed. - ᶜ∂lnp∂z = @. -thermo_params.grav / - (TD.gas_constant_air(thermo_params, TD.PhasePartition(ᶜq_tot)) * ᶜT) - ᶠlnp_over_psfc = zeros(face_space) - Operators.column_integral_indefinite!(ᶠlnp_over_psfc, ᶜ∂lnp∂z) - ᶠp = p_sfc .* exp.(ᶠlnp_over_psfc) + use_p3d = use_full_pressure && NC.NCDataset(file_path) do ds + haskey(ds, "p_3d") + end + ᶠp = if use_p3d + SpaceVaryingInputs.SpaceVaryingInput( + file_path, + "p_3d", + face_space; + regridder_type, + regridder_kwargs = regridder_kwargs, + ) + else + if use_full_pressure + @warn "Requested full pressure initialization, but variable `p_3d` is missing in $(file_path). Falling back to hydrostatic integration from surface pressure." + end + # Using surface pressure, air temperature and specific humidity + # from the dataset, compute air pressure by hydrostatic integration. + p_sfc = Fields.level( + SpaceVaryingInputs.SpaceVaryingInput( + file_path, + "p", + face_space; + regridder_type, + regridder_kwargs = regridder_kwargs, + ), + Fields.half, + ) + # Apply hydrostatic surface-pressure correction only if surface altitude is available + surface_altitude_var = "z_sfc" + has_surface_altitude = NC.NCDataset(file_path) do ds + haskey(ds, surface_altitude_var) + end + if has_surface_altitude + correct_surface_pressure_for_topography!( + p_sfc, + file_path, + face_space, + Y, + ᶜT, + ᶜq_tot, + thermo_params, + regridder_kwargs; + surface_altitude_var = surface_altitude_var, + ) + else + @warn "Skipping topographic correction because variable `$surface_altitude_var` is missing from $(file_path)." + end + # With the known temperature (ᶜT) and moisture (ᶜq_tot) profile, + # recompute the pressure levels assuming hydrostatic balance is maintained. + ᶜ∂lnp∂z = @. -thermo_params.grav / + (TD.gas_constant_air(thermo_params, TD.PhasePartition(ᶜq_tot)) * ᶜT) + ᶠlnp_over_psfc = zeros(face_space) + Operators.column_integral_indefinite!(ᶠlnp_over_psfc, ᶜ∂lnp∂z) + p_sfc .* exp.(ᶠlnp_over_psfc) + end ᶜts = TD.PhaseEquil_pTq.(thermo_params, ᶜinterp.(ᶠp), ᶜT, ᶜq_tot) # Assign prognostic variables from equilibrium moisture models @@ -525,19 +685,22 @@ function overwrite_initial_conditions!( SpaceVaryingInputs.SpaceVaryingInput( file_path, "u", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ), SpaceVaryingInputs.SpaceVaryingInput( file_path, "v", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ), SpaceVaryingInputs.SpaceVaryingInput( file_path, "w", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ), ) @@ -569,14 +732,16 @@ function overwrite_initial_conditions!( SpaceVaryingInputs.SpaceVaryingInput( file_path, "cswc", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ) .* Y.c.ρ Y.c.ρq_rai .= SpaceVaryingInputs.SpaceVaryingInput( file_path, "crwc", - center_space, + center_space; + regridder_type, regridder_kwargs = regridder_kwargs, ) .* Y.c.ρ end diff --git a/src/utils/weather_model.jl b/src/utils/weather_model.jl index b783fa6e916..9f30726d15b 100644 --- a/src/utils/weather_model.jl +++ b/src/utils/weather_model.jl @@ -5,25 +5,31 @@ import ..parse_date """ - weather_model_data_path(start_date, target_levels, era5_initial_condition_dir]) + weather_model_data_path( + start_date, + target_levels, + era5_initial_condition_dir=nothing; + kwargs... + ) Get the path to the weather model data for a given start date and time. -If the data is not found, will attempt to generate it from raw data. If -the raw data is not found, throw an error +If the data is not found, will attempt to generate it from raw data. If +the raw data is not found, throw an error. -# Arguments +Args: - `start_date`: Start date as string yyyymmdd or yyyymmdd-HHMM -- `target_levels`: Vector of target altitude levels (in meters) to interpolate to +- `target_levels`: Vector of target altitude levels (in meters) - `era5_initial_condition_dir`: Optional directory containing preprocessed ERA5 -- If `era5_initial_condition_dir` is provided, use - `era5_init_processed_internal_YYYYMMDD_0000.nc` from that directory. -- Otherwise, use the `weather_model_ic` artifact. +Keywords: +- `interp_w::Bool=false`: If false, write w=0; if true, interpolate w (1D path) """ + function weather_model_data_path( start_date, target_levels, - era5_initial_condition_dir = nothing, + era5_initial_condition_dir = nothing; + interp_w::Bool = false, ) # Parse the date using the existing parse_date function dt = parse_date(start_date) @@ -32,59 +38,93 @@ function weather_model_data_path( start_date_str = Dates.format(dt, "yyyymmdd") start_time = Dates.format(dt, "HHMM") # Note: this is not the same as `start_time` in the coupler! - # If user provided a directory with preprocessed initial conditions, use it + # Determine source/destination and whether generation is needed + local raw_data_path::String + local ic_data_path::String + local generate_needed::Bool + if !isnothing(era5_initial_condition_dir) + # User-provided directory ic_data_path = joinpath( era5_initial_condition_dir, "era5_init_processed_internal_$(start_date_str)_0000.nc", # TODO: generalize for all times once Coupler supports HHMM specification ) + if isfile(ic_data_path) + @info "Using existing interpolated IC file: $ic_data_path" + return ic_data_path + end raw_data_path = joinpath( era5_initial_condition_dir, "era5_raw_$(start_date_str)_0000.nc", ) - if !isfile(ic_data_path) - if !isfile(raw_data_path) - error( - "Neither preprocessed nor raw initial condition file exist in $(era5_initial_condition_dir). Please run `python get_initial_conditions.py` in the WeatherQuest repository to download the data.", - ) - end - @info "Interpolating raw weather model data onto z-levels from user-provided directory" - to_z_levels(raw_data_path, ic_data_path, target_levels, Float32) - else - @info "Using existing interpolated IC file: $ic_data_path" + if !isfile(raw_data_path) + error( + "Neither preprocessed nor raw initial condition file exist in $(era5_initial_condition_dir). Please run `python get_initial_conditions.py` in the WeatherQuest repository to download the data.", + ) end + generate_needed = true + else + # Artifact-based paths + ic_data_path = joinpath( + @clima_artifact("weather_model_ic"), + "init", + "era5_init_$(start_date_str)_$(start_time).nc", + ) return ic_data_path end - # Otherwise, use artifact-based paths and generate if needed - ic_data_path = joinpath( - @clima_artifact("weather_model_ic"), - "init", - "era5_init_$(start_date_str)_$(start_time).nc", + # Fallback: generate a 1D-interpolated IC file when processed_internal file absent + ic_data_path_1d = joinpath( + era5_initial_condition_dir, + "era5_init_$(start_date_str)_0000.nc", ) - raw_data_path = joinpath( - @clima_artifact("weather_model_ic"), - "raw", - "era5_raw_$(start_date_str)_$(start_time).nc", + @info "Processed 3D IC not found; falling back to 1D interpolation" ( + raw = raw_data_path, + dest = ic_data_path_1d, + n_target_levels = length(target_levels), ) - - return ic_data_path + to_z_levels_1d( + raw_data_path, + ic_data_path_1d, + target_levels, + Float32; + interp_w = interp_w, + ) + return ic_data_path_1d end + """ - to_z_levels(source_file, target_file, target_levels, FT) - -Interpolate ERA5 data from native model levels to specified z-levels. Note that -to use _overwrite_initial_conditions_from_file! we rename surface pressure, sp, -to p. This allows us to share functionality with the Dyamond setup. We assert the -variables are present in the source file, which may be modified if additional -variables are needed, e.g. for land or ocean models. `target_levels` is a vector -of target altitude levels (in meters) to interpolate to. + to_z_levels_1d( + source_file, + target_file, + target_levels, + FT; + interp_w=false, + ) + +Interpolate ERA5 data from native model levels to specified 1D z-levels, column-wise in z. + +Args: +- source_file::String: Input ERA5 NetCDF path +- target_file::String: Output NetCDF path +- target_levels::AbstractVector: Target 1D z-levels +- FT::Type{<:AbstractFloat}: Floating-point element type + +Keywords: +- interp_w::Bool=false: If false, write w=0 everywhere; if true, interpolate w """ -function to_z_levels(source_file, target_file, target_levels, FT) +function to_z_levels_1d( + source_file, + target_file, + target_levels, + FT; + interp_w::Bool = false, +) param_set = TD.Parameters.ThermodynamicsParameters(FT) grav = TD.Parameters.grav(param_set) + target_levels = FT.(target_levels) ncin = Dataset(source_file) @@ -146,7 +186,7 @@ function to_z_levels(source_file, target_file, target_levels, FT) for var_name in req3d var_obj = defVar(ncout, var_name, FT, ("lon", "lat", "z"), attrib = ncin[var_name].attrib) - if var_name == "w" + if var_name == "w" && !interp_w var_obj[:, :, :] = zeros(FT, length(lon), length(lat), length(target_levels)) else data = interpz_3d(target_levels, source_z, FT.(ncin[var_name][:, :, :, 1])) @@ -157,14 +197,53 @@ function to_z_levels(source_file, target_file, target_levels, FT) end end + # Compute 3D pressure on target z-levels (p_3d) by log-pressure interpolation + # Assume ERA5 pressure levels are in hPa and convert to Pa + plevs_pa = FT.(ncin["pressure_level"][:]) .* FT(100) + # Prepare output var and per-column interpolation in log(p) + p3d_var_attrib = Dict( + "standard_name" => "air_pressure", + "long_name" => "air pressure on model z-levels", + "units" => "Pa", + "source" => "ERA5 pressure levels interpolated in log(p) vs z", + ) + p3d_var = defVar(ncout, "p_3d", FT, ("lon", "lat", "z"), attrib = p3d_var_attrib) + nx, ny, _ = size(source_z) + p3d = similar(source_z, FT, nx, ny, length(target_levels)) + logp_src = FT.(log.(plevs_pa)) + @inbounds for j in 1:ny, i in 1:nx + zcol = view(source_z, i, j, :) + dest = view(p3d, i, j, :) + # Interpolate log(p) along z, then exponentiate + interpolate1d!(dest, zcol, target_levels, logp_src, Linear(), Flat()) + dest .= exp.(dest) + end + p3d_var[:, :, :] = p3d + # Write 2D surface variables - extend to all levels (TODO: accept 2D variables in atmos) - # Simply repeat the surface values for all levels - surf_map = Dict("skt" => "skt", "sp" => "p") + # Duplicate 2D surface field across all target vertical levels + surf_map = Dict("skt" => "skt", "sp" => "p", "surface_geopotential" => "z_sfc") for (src_name, dst_name) in surf_map - var_obj = - defVar(ncout, dst_name, FT, ("lon", "lat", "z"), attrib = ncin[src_name].attrib) + # Choose attributes; for z_sfc, set clean altitude attributes + var_attrib = if dst_name == "z_sfc" + Dict( + "standard_name" => "surface_altitude", + "long_name" => "surface altitude derived from ERA5", + "units" => "m", + "source_variable" => src_name, + ) + else + ncin[src_name].attrib + end + var_obj = defVar(ncout, dst_name, FT, ("lon", "lat", "z"), attrib = var_attrib) + # Read first time slice and coalesce; follow same convention as sp (use [:, :, 1]) + data2d = FT.(coalesce.(ncin[src_name][:, :, 1], NaN)) + # Convert geopotential to meters if necessary + if dst_name == "z_sfc" + data2d .= data2d ./ grav + end for k in 1:length(target_levels) - var_obj[:, :, k] = FT.(ncin[src_name][:, :, 1]) + var_obj[:, :, k] = data2d end end