1、链接:https://github.com/jfzhang95/pytorch-video-recognition
2、推荐理由:包含对UCF101、HMDB51的处理,包含经典的C3D、R3D、R2+1D网络模型、包含完整的预处理函数,包含训练好的权重
1、链接:https://zhuanlan.zhihu.com/p/32934943
2、推荐理由:C3D网络Tensorflow实现,包含代码
1、链接:https://zhuanlan.zhihu.com/p/374671906
1、链接:https://paperswithcode.com/task/action-recognition-in-videos
model.py
from torch import nn
class C3D(nn.Module):
def __init__(self):
super(C3D, self).__init__()
"""
input (n_batch, n_channel, n_frame, h, w) = (n_batch, 3, 16, 240, 320)
"""
self.conv1 = nn.Sequential(
nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2)),
nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)),
nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)),
nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)),
nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))
)
self.linear = nn.Sequential(
nn.Flatten(),
nn.Linear(512*1*8*11,4096),
nn.Linear(4096,4096),
nn.Linear(4096,101),
nn.ReLU()
# nn.CrossEntropyLoss() realize the softmax layer
# nn.Softmax(dim=1)
)
def forward(self, x):
x = self.conv1(x)
x = self.linear(x)
return x
CPU版本,修改代码以在GPU上训练
train.py
import torch
from torch.utils.tensorboard import SummaryWriter
from model import *
from UCF101_dataset import *
train_set_root = "T:/Dataset/UCF101/UCF101_frames/train/"
test_set_root = "T:/Dataset/UCF101/UCF101_frames/test/"
train_set = UCF101(train_set_root)
test_set = UCF101(test_set_root)
batch_size = 4
train_data_loader = DataLoader(train_set, shuffle=True, drop_last=True, batch_size=batch_size)
test_data_loader = DataLoader(test_set, shuffle=True, drop_last=True, batch_size=batch_size)
model_c3d = C3D()
loss_function = nn.CrossEntropyLoss()
learning_rate = 1e-2
optimizer = torch.optim.Adam(model_c3d.parameters(),lr=learning_rate)
total_train_step = 0
total_test_clip = 0
correct = 0
accuracy = 0.0
writer = SummaryWriter("logs")
for epoch in range(0,20):
print("epoch: {} ".format(epoch))
model_c3d.train()
for clips_fold,labels in train_data_loader:
clips = train_set.unfold_batch(clips_fold)
onehot = train_set.onehot(labels)
out = model_c3d(clips)
loss = loss_function(onehot,out)
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_train_step += 1
if (total_train_step % 100 == 0):
print("total_train_step: {}, loss: {}".format(total_train_step,loss))
writer.add_scalar("loss",loss,total_train_step)
torch.save(model_c3d,"C3D_epoch_{:5d}.pkl".format(epoch))
print("model saved as C3D_epoch_{:5d}.pkl".format(epoch))
model_c3d.eval()
with torch.no_grad():
correct = 0
total_test_clip = 0
for test_clips_fold,test_labels in test_data_loader:
test_clips = test_set.unfold_batch(test_clips_fold)
test_onehot = test_set.onehot(test_labels)
test_out = model_c3d(test_clips)
total_test_clip += batch_size
correct += (torch.argmax(test_out,dim=1) == torch.argmax(test_onehot,dim=1)).sum().item()
accuracy = correct / total_test_clip
writer.add_scalar("accuracy",accuracy,epoch)
print("epoch: {}, Accuracy: {}".format(epoch,accuracy))