282 lines
7.0 KiB
Mathematica
282 lines
7.0 KiB
Mathematica
|
clc
|
||
|
clear all
|
||
|
close all
|
||
|
|
||
|
%%
|
||
|
[datanum,datatxt_final,dataraw]=xlsread('labeled_data.csv');
|
||
|
datatxt=datatxt_final(2:end,6);
|
||
|
no_of_data_to_process=1000;
|
||
|
txtdataout=datatxt(1:no_of_data_to_process);
|
||
|
classdata=datanum(1:no_of_data_to_process,6);
|
||
|
neitherloc=find(classdata==2);
|
||
|
offensiveloc=find(classdata==1);
|
||
|
hateloc=find(classdata==0);
|
||
|
txtdataout=string(txtdataout);
|
||
|
traindata_doc=textpreprocessing(txtdataout);
|
||
|
encode_word=wordEncoding(traindata_doc);
|
||
|
seq_maxleen=10;
|
||
|
traindata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
|
||
|
testdata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
|
||
|
trainclass=categorical(classdata+1);
|
||
|
numofdata=length(trainclass);
|
||
|
|
||
|
networkiter=5;
|
||
|
datapass{1}=traindata;
|
||
|
datapass{2}=trainclass;
|
||
|
datapass{3}=testdata;
|
||
|
datapass{5}=numofdata;
|
||
|
datapass{6}=encode_word;
|
||
|
datapass{21}=0;
|
||
|
datapass{22}=networkiter;
|
||
|
|
||
|
pop_size=2;
|
||
|
no_of_iter=10;
|
||
|
|
||
|
|
||
|
traindata=datapass{1};
|
||
|
traindata_matrix=cell2mat(traindata);
|
||
|
len_data=max(max(traindata_matrix));
|
||
|
max_val1=1;
|
||
|
min_val1=0;
|
||
|
dim=len_data;
|
||
|
max_range=[repmat(max_val1,[1 dim])];
|
||
|
min_range=[repmat(min_val1,[1 dim])];
|
||
|
len=length(max_range);
|
||
|
|
||
|
|
||
|
int_pos_data=init_pop_data(pop_size,len,max_range,min_range);
|
||
|
|
||
|
Positions=rand(pop_size,len).*(max_range-min_range)+min_range;
|
||
|
a =rand(pop_size,len);
|
||
|
b= (max_range-min_range)+min_range;
|
||
|
|
||
|
data_pass_to{1}=[];
|
||
|
for indr=1:pop_size
|
||
|
dataele=(int_pos_data(indr,:));
|
||
|
dataele=limit_chk_process(dataele,max_val1,min_val1,data_pass_to);
|
||
|
elechoose=dataele;
|
||
|
|
||
|
traindata=datapass{1};
|
||
|
trainclass=datapass{2};
|
||
|
testdata=datapass{3};
|
||
|
lengthdata=datapass{5};
|
||
|
encode_word=datapass{6};
|
||
|
flag=datapass{21};
|
||
|
networkiter=datapass{22};
|
||
|
|
||
|
datanum1=find(elechoose==0);
|
||
|
traindata_matrix=cell2mat(traindata);
|
||
|
ckloc = ismember(traindata_matrix,datanum1);
|
||
|
loc=find(ckloc);
|
||
|
traindata_matrix(loc)=0;
|
||
|
[rr,cc]=size(traindata_matrix);
|
||
|
for kr=1:rr
|
||
|
traindata1{kr}=traindata_matrix(kr,:);
|
||
|
end
|
||
|
traindata=traindata1;
|
||
|
datain_size=1;
|
||
|
dim_data=50;
|
||
|
hidden_len=80;
|
||
|
total_word=encode_word.NumWords;
|
||
|
no_of_class=3;
|
||
|
network_layer_infor=[ ...
|
||
|
sequenceInputLayer(datain_size)
|
||
|
wordEmbeddingLayer(dim_data,total_word)
|
||
|
lstmLayer(hidden_len,'OutputMode','last')
|
||
|
fullyConnectedLayer(no_of_class)
|
||
|
softmaxLayer
|
||
|
classificationLayer];
|
||
|
|
||
|
if(flag==1)
|
||
|
train_opt=trainingOptions('adam', ...
|
||
|
'MiniBatchSize',16, ...
|
||
|
'GradientThreshold',2, ...
|
||
|
'Shuffle','every-epoch', ...
|
||
|
'Plots','training-progress', ...
|
||
|
'Verbose',false);
|
||
|
|
||
|
else
|
||
|
|
||
|
train_opt=trainingOptions('adam', ...
|
||
|
'MiniBatchSize',16, ...
|
||
|
'GradientThreshold',2, ...
|
||
|
'Shuffle','every-epoch', ...
|
||
|
'Plots','none', ...
|
||
|
'Verbose',false);
|
||
|
|
||
|
end
|
||
|
train_opt.MaxEpochs=networkiter;
|
||
|
net=trainNetwork(traindata,trainclass,network_layer_infor,train_opt);
|
||
|
resultout=predict(net,testdata);
|
||
|
[maxval,maxlc]=max(((round(resultout.'))));
|
||
|
ypred1=categorical(maxlc).';
|
||
|
sin=double(trainclass);
|
||
|
sout=double(ypred1);
|
||
|
tardata=[];
|
||
|
resdata=[];
|
||
|
for km=1:length(sin)
|
||
|
tardata=[tardata double(ismember([1;2;3],sin(km)))];
|
||
|
resdata=[resdata double(ismember([1;2;3],sout(km)))];
|
||
|
end
|
||
|
[~,confu_result]=confusion(tardata,resdata);
|
||
|
%% find accuracy
|
||
|
accuracy=(sum(diag(confu_result))/sum(confu_result(:)))*100;
|
||
|
final_result{1}=accuracy;
|
||
|
final_result{2}=confu_result;
|
||
|
final_result{3}=tardata;
|
||
|
final_result{4}=resdata;
|
||
|
|
||
|
|
||
|
|
||
|
int_pos_data(indr,:)=dataele;
|
||
|
fitness(indr)=final_result{1};
|
||
|
finalall{indr}=final_result;
|
||
|
end
|
||
|
[maxval,maxloc]=max(fitness);
|
||
|
bestdata=int_pos_data(maxloc(1),:);
|
||
|
bestfit=maxval;
|
||
|
xg=bestdata;
|
||
|
gbestdata=int_pos_data(maxloc(1),:);
|
||
|
gbestfit=maxval;
|
||
|
xhi=gbestdata;
|
||
|
[rr,cc]=size(int_pos_data);
|
||
|
initvel=ones(rr,cc)*0.01;
|
||
|
iter_inc=1;% Loop counter
|
||
|
% Main loop
|
||
|
data_pass_to{1}=0;
|
||
|
while iter_inc<=no_of_iter
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
gdata=linspace(1,0.1,pop_size);
|
||
|
for kpop=1:pop_size
|
||
|
xit=int_pos_data(kpop,:);
|
||
|
if(fitness(kpop)>gbestfit)
|
||
|
a1=rand;a2=rand;betaval=rand;
|
||
|
rpdata=norm(bestdata-int_pos_data(kpop,:),2);
|
||
|
rgdata=norm(gbestdata-int_pos_data(kpop,:),2);
|
||
|
f1=gdata(kpop)*initvel(kpop,:);
|
||
|
f2=a1*exp(-betaval/rpdata)*(xhi-xit);
|
||
|
f3=a2*exp(-betaval/rgdata)*(xg-xit);
|
||
|
newvel=f1+f2+f3;
|
||
|
|
||
|
else
|
||
|
dval=rand;rval=randn;
|
||
|
newvel=gdata(kpop)*initvel(kpop,:)+dval*rval;
|
||
|
end
|
||
|
initvel(kpop,:)=newvel;
|
||
|
end
|
||
|
int_pos_data=int_pos_data+initvel;
|
||
|
|
||
|
for indr=1:pop_size
|
||
|
dataele=(int_pos_data(indr,:));
|
||
|
dataele=limit_chk_process(dataele,...
|
||
|
max_val1,min_val1,data_pass_to);
|
||
|
elechoose=dataele;
|
||
|
obj_result=objective_process(datapass,elechoose);
|
||
|
int_pos_data1(indr,:)=dataele;
|
||
|
fitnessl(indr)=obj_result{1};
|
||
|
finalall{indr}=obj_result;
|
||
|
end
|
||
|
|
||
|
for kpop=1:pop_size
|
||
|
xit=int_pos_data(kpop,:);
|
||
|
yit=int_pos_data1(kpop,:);
|
||
|
if(fitness(kpop)<fitnessl(kpop))
|
||
|
a3=rand;betaval=rand;
|
||
|
rmfdata=norm(xit-yit,2);
|
||
|
f1=gdata(kpop)*initvel(kpop,:);
|
||
|
f3=a3*exp(-betaval/rmfdata)*(xit-yit);
|
||
|
newvel=f1+f3;
|
||
|
|
||
|
else
|
||
|
flval=rand;r2val=randn;
|
||
|
newvel=gdata(kpop)*initvel(kpop,:)+flval*r2val;
|
||
|
end
|
||
|
initvel(kpop,:)=newvel;
|
||
|
end
|
||
|
int_pos_data1=int_pos_data1+initvel;
|
||
|
L=rand;
|
||
|
int_pos_data11=L*int_pos_data1+(1-L)*int_pos_data;
|
||
|
|
||
|
for indr=1:pop_size
|
||
|
dataele=(int_pos_data11(indr,:));
|
||
|
dataele=limit_chk_process(dataele,...
|
||
|
max_val1,min_val1,data_pass_to);
|
||
|
elechoose=dataele;
|
||
|
|
||
|
if(iter_inc==no_of_iter && indr==pop_size)
|
||
|
datapass{21}=1;
|
||
|
else
|
||
|
datapass{21}=0;
|
||
|
end
|
||
|
|
||
|
obj_result=objective_process(datapass,elechoose);
|
||
|
int_pos_data11(indr,:)=dataele;
|
||
|
fitnessl1(indr)=obj_result{1};
|
||
|
finalall{indr}=obj_result;
|
||
|
end
|
||
|
|
||
|
[maxval,maxloc]=max(fitnessl1);
|
||
|
bestdata=int_pos_data11(maxloc(1),:);
|
||
|
bestdatafinal=finalall{maxloc(1)};
|
||
|
xg=bestdata;
|
||
|
|
||
|
|
||
|
[best_conver_data(iter_inc),best_location(iter_inc)]=max(fitnessl1);
|
||
|
|
||
|
final_data{iter_inc}=bestdata;
|
||
|
final_alldata{iter_inc}=bestdatafinal;
|
||
|
|
||
|
if iter_inc>=2 && best_conver_data(iter_inc)<best_conver_data(iter_inc-1)
|
||
|
best_conver_data(iter_inc)=best_conver_data(iter_inc-1);
|
||
|
final_data{iter_inc}=final_data{iter_inc-1};
|
||
|
final_alldata{iter_inc}=final_alldata{iter_inc-1};
|
||
|
end
|
||
|
iter_inc=iter_inc+1;
|
||
|
end
|
||
|
conver_result=best_conver_data;
|
||
|
Final_result=final_data{end};
|
||
|
all_result=final_alldata{end};
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
tardata=all_result{3};resdata=all_result{4};
|
||
|
figure,chadata=confusionchart(all_result{2},{'Hate speech','offensive' , 'neither'});
|
||
|
chadata.Title = 'Improved Mayfly Algorithm';
|
||
|
chadata.RowSummary = 'row-normalized';
|
||
|
chadata.ColumnSummary = 'column-normalized';
|
||
|
figure,plot(conver_result,'r-s','linewidth',2);
|
||
|
grid on;xlabel('iteration');ylabel('Accuracy');
|
||
|
title('Convergence graph for Improved Mayfly Algorithm');
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|