1+ struct dgm_lstm_layer{F1, F2} <: Lux.AbstractExplicitLayer
2+ activation1:: Function
3+ activation2:: Function
4+ in_dims:: Int
5+ out_dims:: Int
6+ init_weight:: F1
7+ init_bias:: F2
8+ end
9+
10+ function dgm_lstm_layer (in_dims:: Int , out_dims:: Int , activation1, activation2;
11+ init_weight = Lux. glorot_uniform, init_bias = Lux. zeros32)
12+ return dgm_lstm_layer {typeof(init_weight), typeof(init_bias)} (activation1, activation2, in_dims, out_dims, init_weight, init_bias);
13+ end
14+
15+ import Lux: initialparameters, initialstates, parameterlength, statelength
16+
17+ function Lux. initialparameters (rng:: AbstractRNG , l:: dgm_lstm_layer )
18+ return (
19+ Uz = l. init_weight (rng, l. out_dims, l. in_dims),
20+ Ug = l. init_weight (rng, l. out_dims, l. in_dims),
21+ Ur = l. init_weight (rng, l. out_dims, l. in_dims),
22+ Uh = l. init_weight (rng, l. out_dims, l. in_dims),
23+ Wz = l. init_weight (rng, l. out_dims, l. out_dims),
24+ Wg = l. init_weight (rng, l. out_dims, l. out_dims),
25+ Wr = l. init_weight (rng, l. out_dims, l. out_dims),
26+ Wh = l. init_weight (rng, l. out_dims, l. out_dims),
27+ bz = l. init_bias (rng, l. out_dims) ,
28+ bg = l. init_bias (rng, l. out_dims) ,
29+ br = l. init_bias (rng, l. out_dims) ,
30+ bh = l. init_bias (rng, l. out_dims)
31+ )
32+ end
33+
34+ Lux. initialstates (:: AbstractRNG , :: dgm_lstm_layer ) = NamedTuple ()
35+ Lux. parameterlength (l:: dgm_lstm_layer ) = 4 * (l. out_dims * l. in_dims + l. out_dims * l. out_dims + l. out_dims)
36+ Lux. statelength (l:: dgm_lstm_layer ) = 0
37+
38+ function (layer:: dgm_lstm_layer )(S:: AbstractVecOrMat{T} , x:: AbstractVecOrMat{T} , ps, st:: NamedTuple ) where T
39+ @unpack Uz, Ug, Ur, Uh, Wz, Wg, Wr, Wh, bz, bg, br, bh = ps
40+ Z = layer. activation1 .(Uz* x+ Wz* S .+ bz);
41+ G = layer. activation1 .(Ug* x+ Wg* S .+ bg);
42+ R = layer. activation1 .(Ur* x+ Wr* S .+ br);
43+ H = layer. activation2 .(Uh* x+ Wh* (S.* R) .+ bh);
44+ S_new = (1. .- G) .* H .+ Z .* S;
45+ return S_new, st;
46+ end
47+
48+ struct dgm_lstm_block{L <: NamedTuple } <: Lux.AbstractExplicitContainerLayer{(:layers,)}
49+ layers:: L
50+ end
51+
52+ function dgm_lstm_block (l... )
53+ names = ntuple (i-> Symbol (" dgm_lstm_$i " ), length (l));
54+ layers = NamedTuple {names} (l);
55+ return dgm_lstm_block (layers);
56+ end
57+
58+ dgm_lstm_block (xs:: AbstractVector ) = dgm_lstm_block (xs... )
59+
60+ @generated function apply_dgm_lstm_block (layers:: NamedTuple{fields} , S:: AbstractVecOrMat , x:: AbstractVecOrMat , ps, st:: NamedTuple ) where fields
61+ N = length (fields);
62+ S_symbols = vcat ([:S ], [gensym () for _ in 1 : N])
63+ x_symbol = :x ;
64+ st_symbols = [gensym () for _ in 1 : N]
65+ calls = [:(($ (S_symbols[i + 1 ]), $ (st_symbols[i])) = layers.$ (fields[i])(
66+ $ (S_symbols[i]), $ (x_symbol), ps.$ (fields[i]), st.$ (fields[i]))) for i in 1 : N]
67+ push! (calls, :(st = NamedTuple {$fields} ((($ (Tuple (st_symbols)... ),)))))
68+ push! (calls, :(return $ (S_symbols[N + 1 ]), st))
69+ return Expr (:block , calls... )
70+ end
71+
72+ function (L:: dgm_lstm_block )(S:: AbstractVecOrMat{T} , x:: AbstractVecOrMat{T} , ps, st:: NamedTuple ) where T
73+ return apply_dgm_lstm_block (L. layers, S, x, ps, st)
74+ end
75+
76+ struct dgm{S, L, E} <: Lux.AbstractExplicitContainerLayer{(:d_start, :lstm, :d_end)}
77+ d_start:: S
78+ lstm:: L
79+ d_end:: E
80+ end
81+
82+ function (l:: dgm )(x:: AbstractVecOrMat{T} , ps, st:: NamedTuple ) where T
83+
84+ S, st_start = l. d_start (x, ps. d_start, st. d_start);
85+ S, st_lstm = l. lstm (S, x, ps. lstm, st. lstm);
86+ y, st_end = l. d_end (S, ps. d_end, st. d_end);
87+
88+ st_new = (
89+ d_start= st_start,
90+ lstm= st_lstm,
91+ d_end= st_end
92+ )
93+ return y, st_new;
94+
95+ end
96+
97+ """
98+ `dgm(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1, activation2, out_activation= Lux.identity)`:
99+ returns the architecture defined for Deep Galerkin method
100+
101+ ```math
102+ \\ begin{align}
103+ S^1 &= \\ sigma_1(W^1 x + b^1); \\
104+ Z^l &= \\ sigma_1(U^{z,l} x + W^{z,l} S^l + b^{z,l}); \\ quad l = 1, \\ ldots, L; \\
105+ G^l &= \\ sigma_1(U^{g,l} x + W^{g,l} S_l + b^{g,l}); \\ quad l = 1, \\ ldots, L; \\
106+ R^l &= \\ sigma_1(U^{r,l} x + W^{r,l} S^l + b^{r,l}); \\ quad l = 1, \\ ldots, L; \\
107+ H^l &= \\ sigma_2(U^{h,l} x + W^{h,l}(S^l \\ cdot R^l) + b^{h,l}); \\ quad l = 1, \\ ldots, L; \\
108+ S^{l+1} &= (1 - G^l) \\ cdot H^l + Z^l \\ cdot S^{l}; \\ quad l = 1, \\ ldots, L; \\
109+ f(t, x, \\ theta) &= \\ sigma_{out}(W S^{L+1} + b).
110+ \\ end{align}
111+ ```
112+ ## Positional Arguments:
113+ `in_dims`: number of input dimensions= (spatial dimension+ 1)
114+
115+ `out_dims`: number of output dimensions
116+
117+ `modes`: Width of the LSTM type layer (output of the first Dense layer)
118+
119+ `layers`: number of LSTM type layers
120+
121+ `activation1`: activation function used in LSTM type layers
122+
123+ `activation2`: activation function used for the output of LSTM type layers
124+
125+ `out_activation`: activation fn used for the output of the network
126+
127+ `kwargs`: additional arguments to be splatted into `PhysicsInformedNN`
128+ """
129+ function dgm (in_dims:: Int , out_dims:: Int , modes:: Int , layers:: Int , activation1, activation2, out_activation)
130+ dgm (
131+ Lux. Dense (in_dims, modes, activation1),
132+ dgm_lstm_block ([dgm_lstm_layer (in_dims, modes, activation1, activation2) for i in 1 : layers]),
133+ Lux. Dense (modes, out_dims, out_activation)
134+ )
135+ end
136+
137+ """
138+ `DeepGalerkin(in_dims::Int, out_dims::Int, modes::Int, L::Int, activation1::Function, activation2::Function, out_activation::Function,
139+ strategy::NeuralPDE.AbstractTrainingStrategy; kwargs...)`:
140+
141+ returns a `discretize` algorithm for the ModelingToolkit PDESystem interface, which transforms a `PDESystem` into an
142+ `OptimizationProblem` using the Deep Galerkin method.
143+
144+ ## Arguments:
145+ `in_dims`: number of input dimensions= (spatial dimension+ 1)
146+
147+ `out_dims`: number of output dimensions
148+
149+ `modes`: Width of the LSTM type layer
150+
151+ `L`: number of LSTM type layers
152+
153+ `activation1`: activation fn used in LSTM type layers
154+
155+ `activation2`: activation fn used for the output of LSTM type layers
156+
157+ `out_activation`: activation fn used for the output of the network
158+
159+ `kwargs`: additional arguments to be splatted into `PhysicsInformedNN`
160+
161+ ## Examples
162+ ```julia
163+ discretization= DeepGalerkin(2, 1, 30, 3, tanh, tanh, identity, QuasiRandomTraining(4_000));
164+ ```
165+ ## References
166+ Sirignano, Justin and Spiliopoulos, Konstantinos, "DGM: A deep learning algorithm for solving partial differential equations",
167+ Journal of Computational Physics, Volume 375, 2018, Pages 1339-1364, doi: https://doi.org/10.1016/j.jcp.2018.08.029
168+ """
169+ function DeepGalerkin (in_dims:: Int , out_dims:: Int , modes:: Int , L:: Int , activation1:: Function , activation2:: Function , out_activation:: Function , strategy:: NeuralPDE.AbstractTrainingStrategy ; kwargs... )
170+ PhysicsInformedNN (
171+ dgm (in_dims, out_dims, modes, L, activation1, activation2, out_activation),
172+ strategy; kwargs...
173+ )
174+ end
0 commit comments