Hate_Speech_IMAYFLY_and_HOR.../FINAL_DESIGN_RUN_IMAYFLY.m

282 lines
7.0 KiB
Mathematica
Raw Normal View History

2022-07-14 07:27:48 +00:00
clc
clear all
close all
%%
[datanum,datatxt_final,dataraw]=xlsread('labeled_data.csv');
datatxt=datatxt_final(2:end,6);
no_of_data_to_process=1000;
txtdataout=datatxt(1:no_of_data_to_process);
classdata=datanum(1:no_of_data_to_process,6);
neitherloc=find(classdata==2);
offensiveloc=find(classdata==1);
hateloc=find(classdata==0);
txtdataout=string(txtdataout);
traindata_doc=textpreprocessing(txtdataout);
encode_word=wordEncoding(traindata_doc);
seq_maxleen=10;
traindata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
testdata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
trainclass=categorical(classdata+1);
numofdata=length(trainclass);
networkiter=5;
datapass{1}=traindata;
datapass{2}=trainclass;
datapass{3}=testdata;
datapass{5}=numofdata;
datapass{6}=encode_word;
datapass{21}=0;
datapass{22}=networkiter;
pop_size=2;
no_of_iter=10;
traindata=datapass{1};
traindata_matrix=cell2mat(traindata);
len_data=max(max(traindata_matrix));
max_val1=1;
min_val1=0;
dim=len_data;
max_range=[repmat(max_val1,[1 dim])];
min_range=[repmat(min_val1,[1 dim])];
len=length(max_range);
int_pos_data=init_pop_data(pop_size,len,max_range,min_range);
Positions=rand(pop_size,len).*(max_range-min_range)+min_range;
a =rand(pop_size,len);
b= (max_range-min_range)+min_range;
data_pass_to{1}=[];
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,max_val1,min_val1,data_pass_to);
elechoose=dataele;
traindata=datapass{1};
trainclass=datapass{2};
testdata=datapass{3};
lengthdata=datapass{5};
encode_word=datapass{6};
flag=datapass{21};
networkiter=datapass{22};
datanum1=find(elechoose==0);
traindata_matrix=cell2mat(traindata);
ckloc = ismember(traindata_matrix,datanum1);
loc=find(ckloc);
traindata_matrix(loc)=0;
[rr,cc]=size(traindata_matrix);
for kr=1:rr
traindata1{kr}=traindata_matrix(kr,:);
end
traindata=traindata1;
datain_size=1;
dim_data=50;
hidden_len=80;
total_word=encode_word.NumWords;
no_of_class=3;
network_layer_infor=[ ...
sequenceInputLayer(datain_size)
wordEmbeddingLayer(dim_data,total_word)
lstmLayer(hidden_len,'OutputMode','last')
fullyConnectedLayer(no_of_class)
softmaxLayer
classificationLayer];
if(flag==1)
train_opt=trainingOptions('adam', ...
'MiniBatchSize',16, ...
'GradientThreshold',2, ...
'Shuffle','every-epoch', ...
'Plots','training-progress', ...
'Verbose',false);
else
train_opt=trainingOptions('adam', ...
'MiniBatchSize',16, ...
'GradientThreshold',2, ...
'Shuffle','every-epoch', ...
'Plots','none', ...
'Verbose',false);
end
train_opt.MaxEpochs=networkiter;
net=trainNetwork(traindata,trainclass,network_layer_infor,train_opt);
resultout=predict(net,testdata);
[maxval,maxlc]=max(((round(resultout.'))));
ypred1=categorical(maxlc).';
sin=double(trainclass);
sout=double(ypred1);
tardata=[];
resdata=[];
for km=1:length(sin)
tardata=[tardata double(ismember([1;2;3],sin(km)))];
resdata=[resdata double(ismember([1;2;3],sout(km)))];
end
[~,confu_result]=confusion(tardata,resdata);
%% find accuracy
accuracy=(sum(diag(confu_result))/sum(confu_result(:)))*100;
final_result{1}=accuracy;
final_result{2}=confu_result;
final_result{3}=tardata;
final_result{4}=resdata;
int_pos_data(indr,:)=dataele;
fitness(indr)=final_result{1};
finalall{indr}=final_result;
end
[maxval,maxloc]=max(fitness);
bestdata=int_pos_data(maxloc(1),:);
bestfit=maxval;
xg=bestdata;
gbestdata=int_pos_data(maxloc(1),:);
gbestfit=maxval;
xhi=gbestdata;
[rr,cc]=size(int_pos_data);
initvel=ones(rr,cc)*0.01;
iter_inc=1;% Loop counter
% Main loop
data_pass_to{1}=0;
while iter_inc<=no_of_iter
gdata=linspace(1,0.1,pop_size);
for kpop=1:pop_size
xit=int_pos_data(kpop,:);
if(fitness(kpop)>gbestfit)
a1=rand;a2=rand;betaval=rand;
rpdata=norm(bestdata-int_pos_data(kpop,:),2);
rgdata=norm(gbestdata-int_pos_data(kpop,:),2);
f1=gdata(kpop)*initvel(kpop,:);
f2=a1*exp(-betaval/rpdata)*(xhi-xit);
f3=a2*exp(-betaval/rgdata)*(xg-xit);
newvel=f1+f2+f3;
else
dval=rand;rval=randn;
newvel=gdata(kpop)*initvel(kpop,:)+dval*rval;
end
initvel(kpop,:)=newvel;
end
int_pos_data=int_pos_data+initvel;
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
obj_result=objective_process(datapass,elechoose);
int_pos_data1(indr,:)=dataele;
fitnessl(indr)=obj_result{1};
finalall{indr}=obj_result;
end
for kpop=1:pop_size
xit=int_pos_data(kpop,:);
yit=int_pos_data1(kpop,:);
if(fitness(kpop)<fitnessl(kpop))
a3=rand;betaval=rand;
rmfdata=norm(xit-yit,2);
f1=gdata(kpop)*initvel(kpop,:);
f3=a3*exp(-betaval/rmfdata)*(xit-yit);
newvel=f1+f3;
else
flval=rand;r2val=randn;
newvel=gdata(kpop)*initvel(kpop,:)+flval*r2val;
end
initvel(kpop,:)=newvel;
end
int_pos_data1=int_pos_data1+initvel;
L=rand;
int_pos_data11=L*int_pos_data1+(1-L)*int_pos_data;
for indr=1:pop_size
dataele=(int_pos_data11(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
if(iter_inc==no_of_iter && indr==pop_size)
datapass{21}=1;
else
datapass{21}=0;
end
obj_result=objective_process(datapass,elechoose);
int_pos_data11(indr,:)=dataele;
fitnessl1(indr)=obj_result{1};
finalall{indr}=obj_result;
end
[maxval,maxloc]=max(fitnessl1);
bestdata=int_pos_data11(maxloc(1),:);
bestdatafinal=finalall{maxloc(1)};
xg=bestdata;
[best_conver_data(iter_inc),best_location(iter_inc)]=max(fitnessl1);
final_data{iter_inc}=bestdata;
final_alldata{iter_inc}=bestdatafinal;
if iter_inc>=2 && best_conver_data(iter_inc)<best_conver_data(iter_inc-1)
best_conver_data(iter_inc)=best_conver_data(iter_inc-1);
final_data{iter_inc}=final_data{iter_inc-1};
final_alldata{iter_inc}=final_alldata{iter_inc-1};
end
iter_inc=iter_inc+1;
end
conver_result=best_conver_data;
Final_result=final_data{end};
all_result=final_alldata{end};
tardata=all_result{3};resdata=all_result{4};
figure,chadata=confusionchart(all_result{2},{'Hate speech','offensive' , 'neither'});
chadata.Title = 'Improved Mayfly Algorithm';
chadata.RowSummary = 'row-normalized';
chadata.ColumnSummary = 'column-normalized';
figure,plot(conver_result,'r-s','linewidth',2);
grid on;xlabel('iteration');ylabel('Accuracy');
title('Convergence graph for Improved Mayfly Algorithm');