Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Example] Add VAE example #573

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/zh/api/data/dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
- LorenzDataset
- RosslerDataset
- VtuDataset
- VAECustomDataset
- MeshAirfoilDataset
- MeshCylinderDataset
show_root_heading: false
152 changes: 152 additions & 0 deletions examples/RegAE/train_new.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import TYPE_CHECKING
from typing import Dict
from typing import List

from paddle import nn
from paddle.nn import functional as F
import sys; sys.path.append(r"C:\Users\zihao\Desktop\lbwnb\PaddleScience")
DrRyanHuang marked this conversation as resolved.
Show resolved Hide resolved
import ppsci
from ppsci.loss import KLLoss01
from ppsci.utils import config
from ppsci.utils import logger

if TYPE_CHECKING:
import paddle
import pgl


criterion = nn.MSELoss()
kl_loss = KLLoss01()

# def train_mse_func(
# output_dict: Dict[str, "paddle.Tensor"], label_dict: Dict[str, "pgl.Graph"], *args
# ) -> paddle.Tensor:
# return F.mse_loss(output_dict["pred"], label_dict["label"].y)
Comment on lines +41 to +44
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

一些无用的注释可以删除


def train_mse_func(
# output_dict: Dict[str, "paddle.Tensor"], label_dict: Dict[str, "pgl.Graph"], *args
mu, log_sigma, decoder_z, data_item
) -> paddle.Tensor:
# return F.mse_loss(output_dict["pred"], label_dict["label"].y)
return kl_loss(mu, log_sigma) + criterion(decoder_z, data_item)


def eval_rmse_func(
output_dict: Dict[str, List["paddle.Tensor"]],
label_dict: Dict[str, List["pgl.Graph"]],
*args,
) -> Dict[str, float]:
mse_losses = [
F.mse_loss(pred, label.y)
for (pred, label) in zip(output_dict["pred"], label_dict["label"])
]
return {"RMSE": (sum(mse_losses) / len(mse_losses)) ** 0.5}


if __name__ == "__main__":
args = config.parse_args()
# set random seed for reproducibility
ppsci.utils.misc.set_random_seed(42)
# set output directory
OUTPUT_DIR = "./output_RegAE" if not args.output_dir else args.output_dir
# initialize logger
logger.init_logger("ppsci", f"{OUTPUT_DIR}/train.log", "info")

latent_dim, hidden_dim = 100, 100
# set model
model = ppsci.arch.AutoEncoder(
input_dim=10000,
latent_dim=latent_dim,
hidden_dim=hidden_dim,
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AutoEncoder的初始化超参数移动至RegAE.yaml配置文件里


# set dataloader config
ITERS_PER_EPOCH = 42
train_dataloader_cfg = {
"dataset": {
"name": "VAECustomDataset",
"file_path": "data/gaussian_train.npz",
"data_type": "train",
},
"batch_size": 128,
"sampler": {
"name": "BatchSampler",
"drop_last": False,
"shuffle": True,
},
"num_workers": 1,
}

# set constraint
sup_constraint = ppsci.constraint.SupervisedConstraint(
train_dataloader_cfg,
# output_expr={"pred": lambda out: out["pred"]},
loss=ppsci.loss.FunctionalLoss(train_mse_func),
name="Sup",
)
# wrap constraints together
constraint = {sup_constraint.name: sup_constraint}

# set training hyper-parameters
EPOCHS = 200 if not args.epochs else args.epochs

# set optimizer
optimizer = ppsci.optimizer.Adam(1e-4)(model)

# set validator
eval_dataloader_cfg = {
"dataset": {
"name": "VAECustomDataset",
"file_path": "data/gaussian_train.npz",
"data_type": "train",
},
"batch_size": 1,
"sampler": {
"name": "BatchSampler",
"drop_last": False,
"shuffle": False,
},
}
# rmse_validator = ppsci.validate.SupervisedValidator(
# eval_dataloader_cfg,
# loss=ppsci.loss.FunctionalLoss(train_mse_func),
# output_expr={"pred": lambda out: out["pred"]},
# metric={"RMSE": ppsci.metric.FunctionalMetric(eval_rmse_func)},
# name="RMSE_validator",
# )
# validator = {rmse_validator.name: rmse_validator}

# initialize solver
solver = ppsci.solver.Solver(
model,
constraint,
OUTPUT_DIR,
optimizer,
None,
EPOCHS,
ITERS_PER_EPOCH,
save_freq=50,
eval_during_train=True,
eval_freq=50,
# validator=validator,
eval_with_no_grad=True,
# pretrained_model_path="./output_AMGNet/checkpoints/latest"
)
# train model
solver.train()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

文件末尾换行

2 changes: 2 additions & 0 deletions ppsci/arch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from ppsci.arch.amgnet import AMGNet # isort:skip
from ppsci.arch.mlp import MLP # isort:skip
from ppsci.arch.deeponet import DeepONet # isort:skip
from ppsci.arch.vae import AutoEncoder # isort:skip
from ppsci.arch.embedding_koopman import LorenzEmbedding # isort:skip
from ppsci.arch.embedding_koopman import RosslerEmbedding # isort:skip
from ppsci.arch.embedding_koopman import CylinderEmbedding # isort:skip
Expand Down Expand Up @@ -51,6 +52,7 @@
"PrecipNet",
"UNetEx",
"build_model",
"AutoEncoder",
]


Expand Down
67 changes: 67 additions & 0 deletions ppsci/arch/vae.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from typing import Dict
from typing import List
from typing import Tuple

import paddle
import paddle.nn as nn

from ppsci.arch import activation as act_mod
from ppsci.arch import base



# copy from AISTUDIO
class AutoEncoder(base.Arch):
def __init__(self, input_dim, latent_dim, hidden_dim):
super(AutoEncoder, self).__init__()

# encoder
self._encoder_linear = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.Tanh(),
)
self._encoder_mu = nn.Linear(hidden_dim, latent_dim)
self._encoder_log_sigma = nn.Linear(hidden_dim, latent_dim)

self._decoder = nn.Sequential(
nn.Linear(latent_dim, hidden_dim),
nn.Tanh(),
nn.Linear(hidden_dim, input_dim),
)
Comment on lines +30 to +46
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

base.Arch的子类需要做成符号化的形式,即需要具有 input_keys、output_keys两个属性


def encoder(self, x):
h = self._encoder_linear(x)
mu = self._encoder_mu(h)
log_sigma = self._encoder_log_sigma(h)
return mu, log_sigma

def decoder(self, x):
return self._decoder(x)

# @staticmethod
# def kl_loss(mu, log_sigma):
# # 计算mu,log_sigma与 N(0,1)分布的差距
# base = paddle.exp(2. * log_sigma) + paddle.pow(mu, 2) - 1. - 2. * log_sigma
# loss = 0.5 * paddle.sum(base) / mu.shape[0]
# return loss

def forward(self, x, noise):
Copy link
Collaborator

@HydrogenSulfate HydrogenSulfate Oct 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forward中的张量计算放到forward_tensor中,然后forward内主要负责从输入的字典中找到并组装出真正的输入张量,然后调用forward_tensor方法计算结果,再将结果以dict的形式返回

mu, log_sigma = self.encoder(x)
z = mu + noise * paddle.exp(log_sigma)
return mu, log_sigma, self.decoder(z)
1 change: 1 addition & 0 deletions ppsci/data/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from ppsci.data.dataset.mat_dataset import MatDataset
from ppsci.data.dataset.npz_dataset import IterableNPZDataset
from ppsci.data.dataset.npz_dataset import NPZDataset
from ppsci.data.dataset.npz_dataset import VAECustomDataset
from ppsci.data.dataset.trphysx_dataset import CylinderDataset
from ppsci.data.dataset.trphysx_dataset import LorenzDataset
from ppsci.data.dataset.trphysx_dataset import RosslerDataset
Expand Down
77 changes: 77 additions & 0 deletions ppsci/data/dataset/npz_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,80 @@ def __iter__(self):

def __len__(self):
return 1





class ScalerStd(object):
DrRyanHuang marked this conversation as resolved.
Show resolved Hide resolved
"""
Desc: Normalization utilities with std mean
"""

def __init__(self):
self.mean = 0.
self.std = 1.

def fit(self, data):
DrRyanHuang marked this conversation as resolved.
Show resolved Hide resolved
self.mean = np.mean(data)
self.std = np.std(data)

def transform(self, data):
mean = paddle.to_tensor(self.mean).type_as(data).to(data.device) if paddle.is_tensor(data) else self.mean
std = paddle.to_tensor(self.std).type_as(data).to(data.device) if paddle.is_tensor(data) else self.std
return (data - mean) / std

def inverse_transform(self, data):
DrRyanHuang marked this conversation as resolved.
Show resolved Hide resolved
mean = paddle.to_tensor(self.mean) if paddle.is_tensor(data) else self.mean
std = paddle.to_tensor(self.std) if paddle.is_tensor(data) else self.std
return (data * std) + mean


class VAECustomDataset(io.Dataset):
def __init__(self, file_path, data_type="train"):
"""

:param file_path:
:param data_type: train or test
"""
super().__init__()
all_data = np.load(file_path)
data = all_data["data"]
num, _, _ = data.shape
data = data.reshape(num, -1)

self.neighbors = all_data['neighbors']
self.areasoverlengths = all_data['areasoverlengths']
self.dirichletnodes = all_data['dirichletnodes']
self.dirichleths = all_data['dirichletheads']
self.Qs = np.zeros([all_data['coords'].shape[-1]])
self.val_data = all_data["test_data"]

self.data_type = data_type

self.train_len = int(num * 0.8)
self.test_len = num - self.train_len

self.train_data = data[:self.train_len]
self.test_data = data[self.train_len:]

self.scaler = ScalerStd()
self.scaler.fit(self.train_data)

self.train_data = self.scaler.transform(self.train_data)
self.test_data = self.scaler.transform(self.test_data)

self.input_keys = ""
self.label_keys = ""

def __getitem__(self, idx):
if self.data_type == "train":
return self.train_data[idx]
else:
return self.test_data[idx]
Comment on lines +345 to +349
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dataset需要返回input、label、weight三个数据字典


def __len__(self):
if self.data_type == "train":
return self.train_len
else:
return self.test_len
2 changes: 2 additions & 0 deletions ppsci/loss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ppsci.loss.mse import MSELoss
from ppsci.loss.mse import MSELossWithL2Decay
from ppsci.loss.mse import PeriodicMSELoss
from ppsci.loss.kl import KLLoss01

__all__ = [
"Loss",
Expand All @@ -38,6 +39,7 @@
"MSELoss",
"MSELossWithL2Decay",
"PeriodicMSELoss",
"KLLoss01"
]


Expand Down
Loading