在FATE1.10,原生的homo nn的场景只能支持torch Sequential来的模型,也就是这样的
model = t.nn.Sequential(
t.nn.Linear(784, 32),
t.nn.ReLU(),
t.nn.Linear(32, 10),
t.nn.Softmax(dim=1)
)
不支持用Class定义的,但是sequential的模型肯定不能满足需求的。于是FATE就后续引入model zoo来解决这个问题。可以参考:https://github.com/FederatedAI/FATE/blob/master/doc/tutorial/pipeline/nn_tutorial/Homo-NN-Customize-Model.ipynb
大家读过后,也就是说需要自己在 federatedml/nn/model_zoo
里面写进去customized model,FATE官方使用jupyter有个save to fate命令%%save_to_fate model image_net.py
能够方便保存,那我们自己如果不用jupyter工具的话,要么命令行save to fate
,要么自己手动在model zoo里面写入。
下述是官方给出的说法:
To use your custom model in a federated task, simply place it in the federatedml/nn/model_zoo directory and specify the module and model class through the interface when submitting the task. Homo-NN will automatically search and import the model you have implemented.
Name the model code image_net.py, you can put it directly under federatedml/nn/model_zoo or use the shortcut interface of jupyter notebook to save it directly to federatedml/nn/model_zoo
那我不用jupyter,我就直接手动放到zoo里面。路径在{FATE项目路径}/fate/python/federatedml/nn/model_zoo
。1.10.0里面只有一个pretrain bert和vision,我们自己加入自己的模型。
import torch.nn as nn
import torch
import torch.nn.functional as F
class SimpleNet(nn.Module):
def __init__(self):
super(SimpleNet,self).__init__()
#define three layers,
# fc stands for fully connected layer. conv is for convolution layer(nn.Con2d()
self.fc1=nn.Linear(28*28,256)
self.fc2 = nn.Linear(256, 64)
self.fc3 = nn.Linear(64, 10)
def forward(self, x):
x=F.relu(self.fc1(x))
x=F.relu(self.fc2(x))
x = self.fc3(x)
return x
请注意如果不进行这一步,FATE会不认得你的模型结构而报错,即便是你把类写在了你运行的py文件里。
而后在开发文件里用from federatedml.nn.model_zoo import SimpleNet
导入模型,实例化以后,后还必须用一个sequential把这个模型包起来,如文档所写(下面的代码不能运行,仅供展示):
import torch as t
from pipeline.component.homo_nn import DatasetParam, TrainerParam
model = t.nn.Sequential(
# the class_num=10 is the initialzation parameter for your model
t.nn.CustModel(module_name='image_net', class_name='ImgNet', class_num=10)
)
nn_component = HomoNN(name='nn_0',
model=model, # your cust model
loss=t.nn.CrossEntropyLoss(),
optimizer=t.optim.Adam(model.parameters(), lr=0.01),
dataset=DatasetParam(dataset_name='image'), # use image dataset
trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=3, batch_size=1024, validation_freqs=1),
torch_seed=100 # global random seed
)
虽然不能运行,不过参考文档的例子,也改的差不多了,我过几天把更加完整的代码贴一下。
最后总结下就是:
CustModel
不是你model的类,而是FATE提供的一个类(我本来以为是吧CustModel改成我自己model的名字)!另外注意下module name
和class name
不要写错了。import os
from torchvision.datasets import ImageFolder
from torchvision import transforms
from federatedml.nn.model_zoo.simple_net import SimpleNet # 通过手动添加自己的Net到model zoo里面。我这里自己的model叫simplenet
from federatedml.nn.dataset.base import Dataset
class MNISTDataset(Dataset):
def __init__(self, flatten_feature=False): # flatten feature or not
super(MNISTDataset, self).__init__()
self.image_folder = None
self.ids = None
self.flatten_feature = flatten_feature
def load(self, path): # read data from path, and set sample ids
# read using ImageFolder
self.image_folder = ImageFolder(root=path, transform=transforms.Compose([transforms.ToTensor()]))
# filename as the image id
ids = []
for image_name in self.image_folder.imgs:
ids.append(image_name[0].split('/')[-1].replace('.jpg', ''))
self.ids = ids
return self
def get_sample_ids(self): # implement the get sample id interface, simply return ids
return self.ids
def __len__(self,): # return the length of the dataset
return len(self.image_folder)
def __getitem__(self, idx): # get item
ret = self.image_folder[idx]
if self.flatten_feature:
img = ret[0][0].flatten() # return flatten tensor 784-dim
return img, ret[1] # return tensor and label
else:
return ret
ds = MNISTDataset(flatten_feature=True)
ds.load('mnist/')
# 必须在federatedml.nn.datasets目录下 手动加入新的数据集的信息!https://blog.csdn.net/Yonggie/article/details/129404212
# real training
import torch as t
from torch import nn
from pipeline import fate_torch_hook
from pipeline.component import HomoNN
from pipeline.backend.pipeline import PipeLine
from pipeline.component import Reader, Evaluation, DataTransform
from pipeline.interface import Data, Model
t = fate_torch_hook(t)
import os
# bind data path to name & namespace
fate_project_path = os.path.abspath('./')
host = 1
guest = 2
arbiter = 3
pipeline = PipeLine().set_initiator(role='guest', party_id=guest).set_roles(guest=guest, host=host,
arbiter=arbiter)
data_0 = {"name": "mnist_guest", "namespace": "experiment"}
data_1 = {"name": "mnist_host", "namespace": "experiment"}
data_path_0 = fate_project_path + '/mnist'
data_path_1 = fate_project_path + '/mnist'
pipeline.bind_table(name=data_0['name'], namespace=data_0['namespace'], path=data_path_0)
pipeline.bind_table(name=data_1['name'], namespace=data_1['namespace'], path=data_path_1)
reader_0 = Reader(name="reader_0")
reader_0.get_party_instance(role='guest', party_id=guest).component_param(table=data_0)
reader_0.get_party_instance(role='host', party_id=host).component_param(table=data_1)
from pipeline.component.nn import DatasetParam
dataset_param = DatasetParam(dataset_name='mnist_dataset', flatten_feature=True) # specify dataset, and its init parameters
from pipeline.component.homo_nn import TrainerParam # Interface
# 这里需要再套一层sequential,module name是model zoo中文件的名称,class是类名,
model = t.nn.Sequential(
t.nn.CustModel(module_name='spnet',class_name='SimpleNet',class_num=10)
)
# 下面的就和普通的一样了。
nn_component = HomoNN(name='nn_0',
model=model, # model
loss=t.nn.CrossEntropyLoss(), # loss
optimizer=t.optim.Adam(model.parameters(), lr=0.01), # optimizer
dataset=dataset_param, # dataset
trainer=TrainerParam(trainer_name='fedavg_trainer', epochs=2, batch_size=1024, validation_freqs=1),
torch_seed=100 # random seed
)
pipeline.add_component(reader_0)
pipeline.add_component(nn_component, data=Data(train_data=reader_0.output.data))
pipeline.add_component(Evaluation(name='eval_0', eval_type='multi'), data=Data(data=nn_component.output.data))
pipeline.compile()
pipeline.fit()
# print result and summary
pipeline.get_component('nn_0').get_output_data()
pipeline.get_component('nn_0').get_summary()