python2 -m pip install -r requirements.txt
python3 -m pip install -r requirements.txt
python3 sample.py [-d DATA_DIR]
python sample.py -d /path/to/my/data/
.
Accuracy Before Engine Refit
Got 892 correct predictions out of 10000 (8.9%)
Accuracy After Engine Refit (expecting 98.0% correct predictions)
Got 9798 correct predictions out of 10000 (98.0%)
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
import os
from random import randint
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(1,20, kernel_size=5)
self.conv2 = nn.Conv2d(20,50,kernel_size=5)
self.fc1 = nn.Linear(800,500)
self.fc2 = nn.Linear(500,10)
def forward(self,x):
x = F.max_pool2d(self.conv1(x), kernel_size =2, stride=2)
x = F.max_pool2d(self.conv2(x), kernel_size=2, stride=2)
x = x.view(-1, 800)
x = F.relu(self.fc1(x))
x = self.fc2(x)
return F.log_softmax(x, dim=1)
class MnistModel(object):
def __init__(self):
self.batch_size = 64
self.test_batch_size = 100
self.learning_rate = 0.0025
self.sgd_momentum = 0.9
self.log_interval = 100
#Fetch MNIST data set.
self.train_loader = torch.utils.data.DataLoader(
datasets.MNIST('/data',train=True,download=True,transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,))
])),
batch_size=self.batch_size,
shuffle=True
)
self.test_loader = torch.utils.data.DataLoader(
datasets.MNIST('/data',train=False, transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,))
])),
batch_size=self.test_batch_size,
shuffle=True
)
self.network = Net()
self.latest_test_accuracy = 0.0
def learn(self,num_epochs =2):
def train(epoch):
self.network.train()
optimizer = optim.SGD(self.network.parameters(), lr=self.learning_rate, momentum=self.sgd_momentum)
for batch,(data,target) in enumerate(self.train_loader):
data, target = Variable(data), Variable(target)
optimizer.zero_grad()
output = self.network(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch % self.log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch * len(data), len(self.train_loader.dataset), 100. * batch / len(self.train_loader), loss.data.item()))
def test(epoch):
self.network.eval()
test_loss = 0
correct = 0
for data, target in self.test_loader:
with torch.no_grad():
data, target = Variable(data), Variable(target)
output = self.network(data)
test_loss += F.nll_loss(output, target).data.item()
pred = output.data.max(1)[1]
correct += pred.eq(target.data).cpu().sum()
test_loss /= len(self.test_loader)
self.latest_test_accuracy = float(correct) / len(self.test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.1f}%)\n'.format(test_loss, correct, len(self.test_loader.dataset), 100. * self.latest_test_accuracy))
for e in range(num_epochs):
train(e+1)
test(e+1)
def get_latest_test_set_accuracy(self):
return self.latest_test_accuracy
def get_weigths(self):
return self.network.state_dict()
def conver_to_flattened_numpy_array(self, batch_data, batch_target, sample_idx):
test_case = batch_data.numpy()[sample_idx].ravel().astype(np.float32)
test_name = batch_target.numpy()[sample_idx]
return test_case, test_name
def get_all_test_samples(self):
for data, target in self.test_loader:
for case_num in range(len(data)):
yield self.conver_to_flattened_numpy_array(data, target, case_num)
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import sys, os
sys.path.insert(1, os.path.join(sys.path[0], os.path.pardir))
import common
import model
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
class ModelData(object):
INPUT_NAME = "data"
INPUT_SHAPE = (1,28,28)
OUTPUT_NAME = "prob"
OUTPUT_SIZE = 10
DTYPE = trt.float32
def populate_network_with_some_dummy_weights(network, weights):
input_tensor = network.add_input(name=ModelData.INPUT_NAME, dtype=ModelData.DTYPE, shape=ModelData.INPUT_SHAPE)
conv1_w = np.zeros((20,5,5),dtype=np.float32)
conv1_b = np.zeros(20,dtype=np.float32)
conv1 = network.add_convolution(input=input_tensor, num_output_maps=20, kernel_shape=(5, 5),kernel=conv1_w, bias=conv1_b)
conv1.name = "conv_1"
conv1.stride = (1,1)
pool1 = network.add_pooling(input=conv1.get_output(0),type=trt.PoolingType.MAX,window_size=(2,2))
pool1.stride = (2,2)
conv2_w = weights['conv2.weight'].numpy()
conv2_b = weights['conv2.bias'].numpy()
conv2 = network.add_convolution(pool1.get_output(0), 50, (5,5), conv2_w, conv2_b)
conv2.stride = (1,1)
pool2 = network.add_pooling(conv2.get_output(0), trt.PoolingType.MAX, (2,2))
pool2.stride = (2,2)
fc1_w = weights['fc1.weight'].numpy()
fc1_b = weights['fc1.bias'].numpy()
fc1 = network.add_fully_connected(input=pool2.get_output(0), num_outputs=500, kernel=fc1_w,bias=fc1_b)
relu1 = network.add_activation(input=fc1.get_output(0),type=trt.ActivationType.RELU)
fc2_w = weights['fc2.weight'].numpy()
fc2_b = weights['fc2.bias'].numpy()
fc2 = network.add_fully_connected(relu1.get_output(0),ModelData.OUTPUT_SIZE, fc2_w, fc2_b)
fc2.get_output(0).name = ModelData.OUTPUT_NAME
network.mark_output(tensor=fc2.get_output(0))
def build_engine_with_some_missing_weights(weights):
with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network:
builder.max_workspace_size = common.GiB(1)
builder.refittable = True
populate_network_with_some_dummy_weights(network,weights)
return builder.build_cuda_engine(network)
def load_img_to_input_buffer(img,pageloked_buffer):
np.copyto(pageloked_buffer,img)
def get_trt_test_accuracy(engine, inputs, outputs, bindings, stream, mnist_model):
with engine.create_execution_context() as context:
correct = 0
total = 0
for test_img, test_name in mnist_model.get_all_test_samples():
load_img_to_input_buffer(test_img, pageloked_buffer=inputs[0].host)
[output] = common.do_inference(context, bindings=bindings,inputs= inputs,outputs=outputs,stream=stream)
pred = np.argmax(output)
correct += (test_name == pred)
total += 1
accuracy = float(correct)/total
print("Got {} correct predictions out of {} ({:.1f}%)".format(correct, total, 100 * accuracy))
return accuracy
def main():
common.add_help(description="Runs an MNIST network using a PyTorch model")
mnist_model = model.MnistModel()
mnist_model.learn()
weights = mnist_model.get_weigths()
with build_engine_with_some_missing_weights(weights) as engine:
inputs, outputs, bindings, stream = common.allocate_buffers(engine)
print("Accuracy Befor Engine Refit")
get_trt_test_accuracy(engine, inputs, outputs, bindings, stream, mnist_model)
with trt.Refitter(engine,TRT_LOGGER) as refitter:
refitter.set_weights("conv_1",trt.WeightsRole.KERNEL, weights['conv1.weight'].numpy())
refitter.set_weights("conv_1",trt.WeightsRole.BIAS, weights['conv1.bias'].numpy())
[missingLayers, weightRoles] = refitter.get_missing()
assert len(missingLayers)==0,"Refitter found missing weights. Call set_weights() for all missing weights"
assert refitter.refit_cuda_engine()
expected_correct_predictions = mnist_model.get_latest_test_set_accuracy()
print("Accuracy After Engine Refit (expecting {:.1f}% correct predictions)".format(
100 * expected_correct_predictions))
assert get_trt_test_accuracy(engine, inputs, outputs, bindings, stream,
mnist_model) >= expected_correct_predictions
if __name__ == '__main__':
main()