Reputation: 21
If I execute the code, I got
'RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn' on the dy_dx = torch.autograd.grad(y_pred_physics, xCP, torch.ones_like(y_pred_physics), create_graph=True)[0]
line.
import torch
import torch.nn as nn
import numpy as np
import numpy.random as npr
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pyro
import pyro.distributions as dist
from pyro.nn import PyroModule, PyroSample
from pyro.infer import MCMC, NUTS, Predictive
# algebra and data handle
from scipy.stats import norm
import numpy as np
import numpy.random as npr
from PIL import Image # pillow
from tqdm.auto import tqdm, trange
from copy import deepcopy
from pprint import pprint
import pickle, glob, json, time
# 0a. General BNN
class BFCN(PyroModule):
"""a. Defines the architecture of a fully connected network: N_INPUT, N_OUTPUT, N_HIDDEN, N_LAYERS
b. Defines the activation function involved in each node_l_i to node_l_i+1
"""
def __init__(self, N_INPUT, N_OUTPUT, N_HIDDEN, N_LAYERS, PRIOR_SCALE=5.):
super().__init__()
self.activation = nn.Tanh()
assert N_INPUT > 0 and N_OUTPUT > 0 and N_HIDDEN > 0 and N_LAYERS > 0 # make sure the dimensions are valid
# Define the layer sizes and the PyroModule layer list
self.layer_sizes = [N_INPUT] + N_LAYERS * [N_HIDDEN] + [N_OUTPUT]
# self.layers = PyroModule[nn.ModuleList]()
# for idx in range(1, len(self.layer_sizes)):
# layer = PyroModule[nn.Linear](self.layer_sizes[idx - 1], self.layer_sizes[idx])
# # Give unique names to weights and biases
# layer.weight = PyroSample(dist.Normal(0., PRIOR_SCALE * torch.sqrt(torch.tensor(2.0) / self.layer_sizes[idx - 1])).expand([self.layer_sizes[idx], self.layer_sizes[idx - 1]]).to_event(2))
# layer.bias = PyroSample(dist.Normal(0., PRIOR_SCALE).expand([self.layer_sizes[idx]]).to_event(1))
# self.layers.append(layer)
layer_list = [PyroModule[nn.Linear](self.layer_sizes[idx - 1], self.layer_sizes[idx]) for idx in
range(1, len(self.layer_sizes))]
self.layers = PyroModule[torch.nn.ModuleList](layer_list)
for layer_idx, layer in enumerate(self.layers):
layer.weight = PyroSample(dist.Normal(0., PRIOR_SCALE * np.sqrt(2 / self.layer_sizes[layer_idx])).expand(
[self.layer_sizes[layer_idx + 1], self.layer_sizes[layer_idx]]).to_event(2))
layer.bias = PyroSample(dist.Normal(0., PRIOR_SCALE).expand([self.layer_sizes[layer_idx + 1]]).to_event(1))
# Define sigma as a parameter with unique name
self.sigma = PyroSample(dist.Gamma(0.5, 1))
def _predict(self, x):
""" Compute predictions with the model """
x = x.reshape(-1, 1)
x = self.activation(self.layers[0](x)) # input --> hidden
for layer in self.layers[1:-1]:
x = self.activation(layer(x)) # hidden --> hidden
mu_pred = self.layers[-1](x).squeeze() # hidden --> output
#sigma = pyro.sample("sigma", dist.Gamma(.5, 1)) # infer the response noise
return mu_pred.view(-1,1)
def forward(self, x, yD, xCP, d, w0):
# Compute predictions for the data loss
mu_pred = self._predict(x)
# Compute data loss (MSE) if yD is provided
data_loss = torch.tensor(0.0)
if yD is not None:
data_loss = torch.mean((mu_pred - yD) ** 2)
# Compute physics-informed loss if xCP, d, and w0 are provided
xCP = xCP.view(-1,1).requires_grad_(True).to('cpu')
#y_pred_physics = y_pred_physics.requires_grad_(True)
physics_loss = torch.tensor(0.0, requires_grad=True)
if xCP is not None and d is not None and w0 is not None:
y_pred_physics = self._predict(xCP)
dy_dx = torch.autograd.grad(y_pred_physics, xCP, torch.ones_like(y_pred_physics), create_graph=True)[0]
d2y_dx2 = torch.autograd.grad(dy_dx, xCP, torch.ones_like(dy_dx), create_graph=True)[0]
physics_residual = d2y_dx2 + 2 * d * dy_dx + w0**2 * y_pred_physics
# Physics-informed loss (MSE of the residuals)
physics_loss = (1e-4) * torch.mean(physics_residual**2)
# Return the combined loss
total_loss = data_loss + physics_loss
return total_loss, mu_pred
def oscillator(d, w0, t):
"""Defines the analytical solution to the 1D underdamped harmonic oscillator problem.
Equations taken from: https://beltoforion.de/en/harmonic_oscillator/"""
assert d < w0
w = np.sqrt(w0**2-d**2)
phi = np.arctan(-d/w)
A = 1/(2*np.cos(phi))
cos = torch.cos(phi+w*t)
sin = torch.sin(phi+w*t)
exp = torch.exp(-d*t)
y = exp*2*A*cos
return y
def f_SOL(t,d,w0):
return oscillator(d, w0, t)
torch.set_default_dtype(torch.float64)
print(f'Is CUDA available?: {torch.cuda.is_available()}')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = 'cpu'
n_input, n_output, n_hidden, n_layers, prior_scale = 1,1,3,32,2
model = BFCN(n_input, n_output, n_hidden, n_layers, prior_scale)
# Define Hamiltonian Monte Carlo (HMC) kernel
# NUTS = "No-U-Turn Sampler" (https://arxiv.org/abs/1111.4246), gives HMC an adaptive step size
nuts_kernel = NUTS(model, jit_compile=False) # jit_compile=True is faster but requires PyTorch 1.6+
# Define MCMC sampler, get 50 posterior samples
mcmc = MCMC(nuts_kernel, num_samples=50)
# Example values for mu and k
d, w0 = 2, 20
mu, k = 2*d, w0**2
t = torch.linspace(0,1,1000).view(-1,1)
u = f_SOL(t,d,w0)
## DATA
M = 32
e = 0.1
lb, ub = 0, 0.60
tD = lb + (ub - lb) * npr.rand(M)
tD = torch.from_numpy(tD)
tD = tD.to(device).view(-1,1)
tD, _ = tD.sort(dim=0, descending=False)
yD = f_SOL(tD,d,w0) + norm(0, e).rvs(M).reshape(-1, 1)
yD = yD.to(device).view(-1,1)
## CONTROL POINTs
N = 128
lb, ub = 0., 1.
tCP = lb + (ub - lb) * torch.linspace(0.,1.,N)
tCP = tCP.to(device).view(-1,1)
tCP, _ = tCP.sort(dim=0, descending=False)
yCP = f_SOL(tCP,d,w0)
yCP = yCP.to(device).view(-1,1)
# Convert data to PyTorch tensors
x_train = tD
y_train = yD
x_physics = tCP; #x_physics = x_physics.view(-1, 1).requires_grad_(True) # example physics points
# Run MCMC with physics-informed loss
mcmc.run(x_train, y_train, x_physics, d=d, w0=w0)
I try to force and check if xCP has 'required_grad_(True)' active (yes) but it does not resolve the issue. I was expecting to derive smoothly, as with simple PINNs.
Upvotes: 0
Views: 30