侵权联系即删
paper with code
code:https://github.com/drwuHUST
paper:https://ieeexplore.ieee.org/document/8930057/references#references
%% Illustrate how MBGD_RDA, MBGD_RDA2, MBGD_RDA_T and MBGD_RDA2_T are used.
%% By Dongrui WU, drwu@hust.edu.cn
clc; clearvars; close all; %rng(0);
nMFs=2; % number of MFs in each input domain, used in MBGD_RDA and MBGD_RDA_T
alpha=.01; % initial learning rate
lambda=0.05; % L2 regularization coefficient
P=0.5; % DropRule rate
nIt=500; % number of iterations
Nbs=64; % batch size
temp=load('D:\1\MBGD_RDA-master\NO2.mat');
data=temp.data;
X=data(:,1:end-1); y=data(:,end); y=y-mean(y);
%这里的X和Y表示的是前7列的数据和最后一列的数据,最后一列的数据减去本身的均值
X = zscore(X); [N0,M]=size(X);
%将X采用zcore方法归一化,同时N0和M为X的两个维度,N0=500,M=7
N=round(N0*.7);%N=350,0.7*500
idsTrain=datasample(1:N0,N,'replace',false);%随机采样,不会重复,返回值为1*350,范围为1-500
XTrain=X(idsTrain,:); yTrain=y(idsTrain);%这里划分训练和测试,350*7以及350*1
XTest=X; XTest(idsTrain,:)=[];%XTest 150*7,YTest,150*1,
yTest=y; yTest(idsTrain)=[];%XTrain 350*7,YTrain 350*1
RMSEtrain=zeros(6,nIt); RMSEtest=RMSEtrain;%测试和训练集的均方根误差都是6*500
% Specify the total number of rules; use the original features without dimensionality reduction
nRules=30; % number of rules, used in MBGD_RDA2 and MBGD_RDA2_T
[RMSEtrain(1,:),RMSEtest(1,:)]=MBGD_RDA2(XTrain,yTrain,XTest,yTest,alpha,lambda,P,nRules,nIt,Nbs); % Gaussian MFs
[RMSEtrain(2,:),RMSEtest(2,:)]=MBGD_RDA2_T(XTrain,yTrain,XTest,yTest,alpha,lambda,P,nRules,nIt,Nbs); % Trapezoidal MFs
%训练和测试的误差存进去RMSE中
maxFeatures=5; % maximum number of features to use
if M>maxFeatures%制定降维,将数据维度降维为maxFeature
[~,XPCA,latent]=pca(X);
realDim98=find(cumsum(latent)>=.98*sum(latent),1,'first');
usedDim=min(maxFeatures,realDim98);
X=XPCA(:,1:usedDim); [N0,M]=size(X);
end%结束后X [500*5]
XTrain=X(idsTrain,:); XTest=X; XTest(idsTrain,:)=[];%重分配训练集和测试集
% Specify the number of MFs in each input domain
% Assume x1 has two MFs X1_1 and X1_2; then, all rules involving the first FS of x1 use the same X1_1,
% and all rules involving the second FS of x1 use the same X1_2
[RMSEtrain(3,:),RMSEtest(3,:)]=MBGD_RDA(XTrain,yTrain,XTest,yTest,alpha,lambda,P,nMFs,nIt,Nbs); % Gaussian MFs
[RMSEtrain(4,:),RMSEtest(4,:),A,B,C,D]=MBGD_RDA_T(XTrain,yTrain,XTest,yTest,alpha,lambda,P,nMFs,nIt,Nbs); % Trapezoidal MFs
%不同的隶属度函数测试
nRules=nMFs^M; % number of rules
[RMSEtrain(5,:),RMSEtest(5,:)]=MBGD_RDA2(XTrain,yTrain,XTest,yTest,alpha,lambda,P,nRules,nIt,Nbs); % Gaussian MFs
[RMSEtrain(6,:),RMSEtest(6,:)]=MBGD_RDA2_T(XTrain,yTrain,XTest,yTest,alpha,lambda,P,nRules,nIt,Nbs); % Trapezoidal MFs
%重新设置模糊规则数,并训练得到最后的均方根误差
例:MBGD_RDA
function [RMSEtrain,RMSEtest,C,Sigma,W]=MBGD_RDA(XTrain,yTrain,XTest,yTest,alpha,rr,P,nMFs,nIt,Nbs)
beta1=0.9; beta2=0.999;%参数设置
[N,M]=size(XTrain); NTest=size(XTest,1);%return row and col,第二个参数1表示行,2表示列
if Nbs>N; Nbs=N; end%Nbs 为mini batch,每行作为一个batch
nMFsVec=nMFs*ones(M,1);%返回M*1的全一矩阵 * nMFs,1*5,值为2
nRules=nMFs^M; % number of rules
C=zeros(M,nMFs); Sigma=C; W=zeros(nRules,M+1);%c 2*5,
for m=1:M % Initialization
C(m,:)=linspace(min(XTrain(:,m)),max(XTrain(:,m)),nMFs);%生成 n 个点。这些点的间距为 (max-min)/(n-1)。
Sigma(m,:)=std(XTrain(:,m));%标准差
end
minSigma=min(Sigma(:));%最小值
%% Iterative update
mu=zeros(M,nMFs); RMSEtrain=zeros(1,nIt); RMSEtest=RMSEtrain;
mC=0; vC=0; mW=0; mSigma=0; vSigma=0; vW=0; yPred=nan(Nbs,1);
%参数初始化
for it=1:nIt%k=1->m
deltaC=zeros(M,nMFs); deltaSigma=deltaC; deltaW=rr*W; deltaW(:,1)=0; % consequent
f=ones(Nbs,nRules); % firing level of rules
idsTrain=datasample(1:N,Nbs,'replace',false);%随机选择一个Nbs大小的数据训练,
idsGoodTrain=true(Nbs,1);
for n=1:Nbs
for m=1:M % membership grades of MFs
mu(m,:)=exp(-(XTrain(idsTrain(n),m)-C(m,:)).^2./(2*Sigma(m,:).^2));
end
%数据模糊化
idsKeep=rand(1,nRules)<=P;
f(n,~idsKeep)=0;
for r=1:nRules
if idsKeep(r)
idsMFs=idx2vec(r,nMFsVec);
for m=1:M
f(n,r)=f(n,r)*mu(m,idsMFs(m));
end
end
end
%模糊规则剪切
if ~sum(f(n,:)) % special case: all f(n,:)=0; no dropRule
idsKeep=true(1,nRules);
f(n,:)=1;
for r=1:nRules
idsMFs=idx2vec(r,nMFsVec);
for m=1:M
f(n,r)=f(n,r)*mu(m,idsMFs(m));
end
end
end
%随机删除一些模糊规则
fBar=f(n,:)/sum(f(n,:));
yR=[1 XTrain(idsTrain(n),:)]*W';
yPred(n)=fBar*yR'; % prediction
if isnan(yPred(n))
%save2base(); return;
idsGoodTrain(n)=false;
continue;
end
% Compute delta
for r=1:nRules
if idsKeep(r)
temp=(yPred(n)-yTrain(idsTrain(n)))*(yR(r)*sum(f(n,:))-f(n,:)*yR')/sum(f(n,:))^2*f(n,r);
if ~isnan(temp) && abs(temp)<inf
vec=idx2vec(r,nMFsVec);
% delta of c, sigma, and b
for m=1:M
deltaC(m,vec(m))=deltaC(m,vec(m))+temp*(XTrain(idsTrain(n),m)-C(m,vec(m)))/Sigma(m,vec(m))^2;
deltaSigma(m,vec(m))=deltaSigma(m,vec(m))+temp*(XTrain(idsTrain(n),m)-C(m,vec(m)))^2/Sigma(m,vec(m))^3;
deltaW(r,m+1)=deltaW(r,m+1)+(yPred(n)-yTrain(idsTrain(n)))*fBar(r)*XTrain(idsTrain(n),m);
end
% delta of b0
deltaW(r,1)=deltaW(r,1)+(yPred(n)-yTrain(idsTrain(n)))*fBar(r);
end
end
end
end
% AdaBound
lb=alpha*(1-1/((1-beta2)*it+1));
ub=alpha*(1+1/((1-beta2)*it));
mC=beta1*mC+(1-beta1)*deltaC;
vC=beta2*vC+(1-beta2)*deltaC.^2;
mCHat=mC/(1-beta1^it);
vCHat=vC/(1-beta2^it);
lrC=min(ub,max(lb,alpha./(sqrt(vCHat)+10^(-8))));
C=C-lrC.*mCHat;
mSigma=beta1*mSigma+(1-beta1)*deltaSigma;
vSigma=beta2*vSigma+(1-beta2)*deltaSigma.^2;
mSigmaHat=mSigma/(1-beta1^it);
vSigmaHat=vSigma/(1-beta2^it);
lrSigma=min(ub,max(lb,alpha./(sqrt(vSigmaHat)+10^(-8))));
Sigma=max(minSigma,Sigma-lrSigma.*mSigmaHat);
mW=beta1*mW+(1-beta1)*deltaW;
vW=beta2*vW+(1-beta2)*deltaW.^2;
mWHat=mW/(1-beta1^it);
vWHat=vW/(1-beta2^it);
lrW=min(ub,max(lb,alpha./(sqrt(vWHat)+10^(-8))));
W=W-lrW.*mWHat;
% Training RMSE
RMSEtrain(it)=sqrt(sum((yTrain(idsTrain(idsGoodTrain))-yPred(idsGoodTrain)).^2)/sum(idsGoodTrain));
% Test RMSE
f=ones(NTest,nRules); % firing level of rules
for n=1:NTest
for m=1:M % membership grades of MFs
mu(m,:)=exp(-(XTest(n,m)-C(m,:)).^2./(2*Sigma(m,:).^2));
end
for r=1:nRules % firing levels of rules
idsMFs=idx2vec(r,nMFsVec);
for m=1:M
f(n,r)=f(n,r)*mu(m,idsMFs(m));
end
end
end
yR=[ones(NTest,1) XTest]*W';
yPredTest=sum(f.*yR,2)./sum(f,2); % prediction
RMSEtest(it)=sqrt((yTest-yPredTest)'*(yTest-yPredTest)/NTest);
if isnan(RMSEtest(it)) && it>1
RMSEtest(it)=RMSEtest(it-1);
end
end
end
function vec=idx2vec(idx,nMFs)
% Convert from a scalar index of the rule to a vector index of MFs
vec=zeros(1,length(nMFs));
prods=[1; cumprod(nMFs(end:-1:1))];
if idx>prods(end)
error('Error: idx is larger than the number of rules.');
end
prev=0;
for i=1:length(nMFs)
vec(i)=floor((idx-1-prev)/prods(end-i))+1;
prev=prev+(vec(i)-1)*prods(end-i);
end
end