催化剂。使用 PyTorch 加速深度学习研发。
项目描述
Catalyst 是用于深度学习研究和开发的 PyTorch 框架。它专注于可重复性、快速实验和代码库重用,因此您可以创建新的东西而不是编写另一个训练循环。
打破循环——使用催化剂!
- 项目清单
- 框架架构
- AI Landscape的催化剂
- PyTorch生态系统的一部分
入门
pip install -U catalyst
import os
from torch import nn, optim
from torch.utils.data import DataLoader
from catalyst import dl, utils
from catalyst.contrib.datasets import MNIST
model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.02)
loaders = {
"train": DataLoader(MNIST(os.getcwd(), train=True), batch_size=32),
"valid": DataLoader(MNIST(os.getcwd(), train=False), batch_size=32),
}
runner = dl.SupervisedRunner(
input_key="features", output_key="logits", target_key="targets", loss_key="loss"
)
# model training
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
loaders=loaders,
num_epochs=1,
callbacks=[
dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, 3, 5)),
dl.PrecisionRecallF1SupportCallback(input_key="logits", target_key="targets"),
],
logdir="./logs",
valid_loader="valid",
valid_metric="loss",
minimize_valid_metric=True,
verbose=True,
)
# model evaluation
metrics = runner.evaluate_loader(
loader=loaders["valid"],
callbacks=[dl.AccuracyCallback(input_key="logits", target_key="targets", topk=(1, 3, 5))],
)
# model inference
for prediction in runner.predict_loader(loader=loaders["valid"]):
assert prediction["logits"].detach().cpu().numpy().shape[-1] == 10
# model post-processing
model = runner.model.cpu()
batch = next(iter(loaders["valid"]))[0]
utils.trace_model(model=model, batch=batch)
utils.quantize_model(model=model)
utils.prune_model(model=model, pruning_fn="l1_unstructured", amount=0.8)
utils.onnx_export(model=model, batch=batch, file="./logs/mnist.onnx", verbose=True)
分步指南
- Start with Catalyst — 用于加速深度学习研发的 PyTorch 框架。
- 尝试笔记本教程或检查最小示例以进行第一次深入研究。
- 阅读包含用例和指南的博客文章。
- 通过我们的“使用 Catalyst 进行深度学习”课程学习机器学习。
- 最后,如果您想与团队和贡献者聊天,请加入我们的 slack 。
目录
概述
Catalyst 只需几行代码即可帮助您实现紧凑但功能齐全的深度学习管道。您可以在没有样板的情况下获得包含指标、提前停止、模型检查点和其他功能的训练循环。
安装
通用安装:
pip install -U catalyst
专用版本,可能适用额外要求
pip install catalyst[ml] # installs ML-based Catalyst
pip install catalyst[cv] # installs CV-based Catalyst
# master version installation
pip install git+https://github.com/catalyst-team/catalyst@master --upgrade
# all available extensions are listed here:
# https://github.com/catalyst-team/catalyst/blob/master/setup.py
Catalyst 兼容:Python 3.7+。PyTorch 1.4+。
在 Ubuntu 16.04/18.04/20.04、macOS 10.15、Windows 10 和适用于 Linux 的 Windows 子系统上测试。
文档
最小的例子
介绍教程“自定义发生的事情
train”带有自定义示例的演示
使用 Catalyst 进行强化学习
- 和更多
CustomRunner – PyTorch for 循环分解
import os
from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import DataLoader
from catalyst import dl, metrics
from catalyst.contrib.datasets import MNIST
model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
optimizer = optim.Adam(model.parameters(), lr=0.02)
train_data = MNIST(os.getcwd(), train=True)
valid_data = MNIST(os.getcwd(), train=False)
loaders = {
"train": DataLoader(train_data, batch_size=32),
"valid": DataLoader(valid_data, batch_size=32),
}
class CustomRunner(dl.Runner):
def predict_batch(self, batch):
# model inference step
return self.model(batch[0].to(self.engine.device))
def on_loader_start(self, runner):
super().on_loader_start(runner)
self.meters = {
key: metrics.AdditiveMetric(compute_on_call=False)
for key in ["loss", "accuracy01", "accuracy03"]
}
def handle_batch(self, batch):
# model train/valid step
# unpack the batch
x, y = batch
# run model forward pass
logits = self.model(x)
# compute the loss
loss = F.cross_entropy(logits, y)
# compute the metrics
accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3))
# log metrics
self.batch_metrics.update(
{"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
)
for key in ["loss", "accuracy01", "accuracy03"]:
self.meters[key].update(self.batch_metrics[key].item(), self.batch_size)
# run model backward pass
if self.is_train_loader:
self.engine.backward(loss)
self.optimizer.step()
self.optimizer.zero_grad()
def on_loader_end(self, runner):
for key in ["loss", "accuracy01", "accuracy03"]:
self.loader_metrics[key] = self.meters[key].compute()[0]
super().on_loader_end(runner)
runner = CustomRunner()
# model training
runner.train(
model=model,
optimizer=optimizer,
loaders=loaders,
logdir="./logs",
num_epochs=5,
verbose=True,
valid_loader="valid",
valid_metric="loss",
minimize_valid_metric=True,
)
# model inference
for logits in runner.predict_loader(loader=loaders["valid"]):
assert logits.detach().cpu().numpy().shape[-1] == 10
ML - 线性回归
import torch
from torch.utils.data import DataLoader, TensorDataset
from catalyst import dl
# data
num_samples, num_features = int(1e4), int(1e1)
X, y = torch.rand(num_samples, num_features), torch.rand(num_samples)
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}
# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, 1)
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [3, 6])
# model training
runner = dl.SupervisedRunner()
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
loaders=loaders,
logdir="./logdir",
valid_loader="valid",
valid_metric="loss",
minimize_valid_metric=True,
num_epochs=8,
verbose=True,
)
ML - 多类分类
import torch
from torch.utils.data import DataLoader, TensorDataset
from catalyst import dl
# sample data
num_samples, num_features, num_classes = int(1e4), int(1e1), 4
X = torch.rand(num_samples, num_features)
y = (torch.rand(num_samples,) * num_classes).to(torch.int64)
# pytorch loaders
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}
# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, num_classes)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])
# model training
runner = dl.SupervisedRunner(
input_key="features", output_key="logits", target_key="targets", loss_key="loss"
)
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
loaders=loaders,
logdir="./logdir",
num_epochs=3,
valid_loader="valid",
valid_metric="accuracy03",
minimize_valid_metric=False,
verbose=True,
callbacks=[
dl.AccuracyCallback(input_key="logits", target_key="targets", num_classes=num_classes),
# uncomment for extra metrics:
# dl.PrecisionRecallF1SupportCallback(
# input_key="logits", target_key="targets", num_classes=num_classes
# ),
# dl.AUCCallback(input_key="logits", target_key="targets"),
# catalyst[ml] required ``pip install catalyst[ml]``
# dl.ConfusionMatrixCallback(
# input_key="logits", target_key="targets", num_classes=num_classes
# ),
],
)
ML - 多标签分类
import torch
from torch.utils.data import DataLoader, TensorDataset
from catalyst import dl
# sample data
num_samples, num_features, num_classes = int(1e4), int(1e1), 4
X = torch.rand(num_samples, num_features)
y = (torch.rand(num_samples, num_classes) > 0.5).to(torch.float32)
# pytorch loaders
dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, num_workers=1)
loaders = {"train": loader, "valid": loader}
# model, criterion, optimizer, scheduler
model = torch.nn.Linear(num_features, num_classes)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters())
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, [2])
# model training
runner = dl.SupervisedRunner(
input_key="features", output_key="logits", target_key="targets", loss_key="loss"
)
runner.train(
model=model,
criterion=criterion,
optimizer=optimizer,
scheduler=scheduler,
loaders=loaders,
logdir="./logdir",
num_epochs=3,
valid_loader="valid",
valid_metric="accuracy01",
minimize_valid_metric=False,
verbose=True,
callbacks=[
dl.BatchTransformCallback(
transform=torch.sigmoid,
scope="on_batch_end",
input_key="logits",
output_key="scores"
),
dl.AUCCallback(input_key="scores", target_key="targets"),
# uncomment for extra metrics:
# dl.MultilabelAccuracyCallback(input_key="scores", target_key="targets", threshold=0.5),
# dl.MultilabelPrecisionRecallF1SupportCallback(
# input_key="scores", target_key="targets", threshold=0.5
# ),
]
)
ML - 多头分类
import torch
from torch import nn