updated files

This commit is contained in:
Mannohargs 2022-07-14 12:57:48 +05:30
commit d6f8e56ae7
12 changed files with 27694 additions and 0 deletions

View File

@ -0,0 +1,58 @@
clc
clear all
close all
%%
[datanum,datatxt_final,dataraw]=xlsread('labeled_data.csv');
datatxt=datatxt_final(2:end,6);
no_of_data_to_process=1000;
txtdataout=datatxt(1:no_of_data_to_process);
classdata=datanum(1:no_of_data_to_process,6);
neitherloc=find(classdata==2);
offensiveloc=find(classdata==1);
hateloc=find(classdata==0);
txtdataout=string(txtdataout);
traindata_doc=textpreprocessing(txtdataout);
encode_word=wordEncoding(traindata_doc);
seq_maxleen=10;
traindata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
testdata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
trainclass=categorical(classdata+1);
numofdata=length(trainclass);
networkiter=5;
datapass{1}=traindata;
datapass{2}=trainclass;
datapass{3}=testdata;
datapass{5}=numofdata;
datapass{6}=encode_word;
datapass{21}=0;
datapass{22}=networkiter;
pop_size=2;
no_of_iter=10;
[conver_result,Final_result,all_result]=horseherd_process(datapass,pop_size,no_of_iter);
tardata=all_result{3};resdata=all_result{4};
figure,chadata=confusionchart(all_result{2},{'Hate speech','offensive' , 'neither'});
chadata.Title='Horse Herd optimization Algorithm';
chadata.RowSummary = 'row-normalized';
chadata.ColumnSummary = 'column-normalized';
figure,plot(conver_result,'r-s','linewidth',2);
grid on;xlabel('iteration');ylabel('Accuracy');
title('Convergence graph for Horse Herd optimization Algorithm');

281
FINAL_DESIGN_RUN_IMAYFLY.m Normal file
View File

@ -0,0 +1,281 @@
clc
clear all
close all
%%
[datanum,datatxt_final,dataraw]=xlsread('labeled_data.csv');
datatxt=datatxt_final(2:end,6);
no_of_data_to_process=1000;
txtdataout=datatxt(1:no_of_data_to_process);
classdata=datanum(1:no_of_data_to_process,6);
neitherloc=find(classdata==2);
offensiveloc=find(classdata==1);
hateloc=find(classdata==0);
txtdataout=string(txtdataout);
traindata_doc=textpreprocessing(txtdataout);
encode_word=wordEncoding(traindata_doc);
seq_maxleen=10;
traindata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
testdata=doc2sequence(encode_word,traindata_doc,'Length',seq_maxleen);
trainclass=categorical(classdata+1);
numofdata=length(trainclass);
networkiter=5;
datapass{1}=traindata;
datapass{2}=trainclass;
datapass{3}=testdata;
datapass{5}=numofdata;
datapass{6}=encode_word;
datapass{21}=0;
datapass{22}=networkiter;
pop_size=2;
no_of_iter=10;
traindata=datapass{1};
traindata_matrix=cell2mat(traindata);
len_data=max(max(traindata_matrix));
max_val1=1;
min_val1=0;
dim=len_data;
max_range=[repmat(max_val1,[1 dim])];
min_range=[repmat(min_val1,[1 dim])];
len=length(max_range);
int_pos_data=init_pop_data(pop_size,len,max_range,min_range);
Positions=rand(pop_size,len).*(max_range-min_range)+min_range;
a =rand(pop_size,len);
b= (max_range-min_range)+min_range;
data_pass_to{1}=[];
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,max_val1,min_val1,data_pass_to);
elechoose=dataele;
traindata=datapass{1};
trainclass=datapass{2};
testdata=datapass{3};
lengthdata=datapass{5};
encode_word=datapass{6};
flag=datapass{21};
networkiter=datapass{22};
datanum1=find(elechoose==0);
traindata_matrix=cell2mat(traindata);
ckloc = ismember(traindata_matrix,datanum1);
loc=find(ckloc);
traindata_matrix(loc)=0;
[rr,cc]=size(traindata_matrix);
for kr=1:rr
traindata1{kr}=traindata_matrix(kr,:);
end
traindata=traindata1;
datain_size=1;
dim_data=50;
hidden_len=80;
total_word=encode_word.NumWords;
no_of_class=3;
network_layer_infor=[ ...
sequenceInputLayer(datain_size)
wordEmbeddingLayer(dim_data,total_word)
lstmLayer(hidden_len,'OutputMode','last')
fullyConnectedLayer(no_of_class)
softmaxLayer
classificationLayer];
if(flag==1)
train_opt=trainingOptions('adam', ...
'MiniBatchSize',16, ...
'GradientThreshold',2, ...
'Shuffle','every-epoch', ...
'Plots','training-progress', ...
'Verbose',false);
else
train_opt=trainingOptions('adam', ...
'MiniBatchSize',16, ...
'GradientThreshold',2, ...
'Shuffle','every-epoch', ...
'Plots','none', ...
'Verbose',false);
end
train_opt.MaxEpochs=networkiter;
net=trainNetwork(traindata,trainclass,network_layer_infor,train_opt);
resultout=predict(net,testdata);
[maxval,maxlc]=max(((round(resultout.'))));
ypred1=categorical(maxlc).';
sin=double(trainclass);
sout=double(ypred1);
tardata=[];
resdata=[];
for km=1:length(sin)
tardata=[tardata double(ismember([1;2;3],sin(km)))];
resdata=[resdata double(ismember([1;2;3],sout(km)))];
end
[~,confu_result]=confusion(tardata,resdata);
%% find accuracy
accuracy=(sum(diag(confu_result))/sum(confu_result(:)))*100;
final_result{1}=accuracy;
final_result{2}=confu_result;
final_result{3}=tardata;
final_result{4}=resdata;
int_pos_data(indr,:)=dataele;
fitness(indr)=final_result{1};
finalall{indr}=final_result;
end
[maxval,maxloc]=max(fitness);
bestdata=int_pos_data(maxloc(1),:);
bestfit=maxval;
xg=bestdata;
gbestdata=int_pos_data(maxloc(1),:);
gbestfit=maxval;
xhi=gbestdata;
[rr,cc]=size(int_pos_data);
initvel=ones(rr,cc)*0.01;
iter_inc=1;% Loop counter
% Main loop
data_pass_to{1}=0;
while iter_inc<=no_of_iter
gdata=linspace(1,0.1,pop_size);
for kpop=1:pop_size
xit=int_pos_data(kpop,:);
if(fitness(kpop)>gbestfit)
a1=rand;a2=rand;betaval=rand;
rpdata=norm(bestdata-int_pos_data(kpop,:),2);
rgdata=norm(gbestdata-int_pos_data(kpop,:),2);
f1=gdata(kpop)*initvel(kpop,:);
f2=a1*exp(-betaval/rpdata)*(xhi-xit);
f3=a2*exp(-betaval/rgdata)*(xg-xit);
newvel=f1+f2+f3;
else
dval=rand;rval=randn;
newvel=gdata(kpop)*initvel(kpop,:)+dval*rval;
end
initvel(kpop,:)=newvel;
end
int_pos_data=int_pos_data+initvel;
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
obj_result=objective_process(datapass,elechoose);
int_pos_data1(indr,:)=dataele;
fitnessl(indr)=obj_result{1};
finalall{indr}=obj_result;
end
for kpop=1:pop_size
xit=int_pos_data(kpop,:);
yit=int_pos_data1(kpop,:);
if(fitness(kpop)<fitnessl(kpop))
a3=rand;betaval=rand;
rmfdata=norm(xit-yit,2);
f1=gdata(kpop)*initvel(kpop,:);
f3=a3*exp(-betaval/rmfdata)*(xit-yit);
newvel=f1+f3;
else
flval=rand;r2val=randn;
newvel=gdata(kpop)*initvel(kpop,:)+flval*r2val;
end
initvel(kpop,:)=newvel;
end
int_pos_data1=int_pos_data1+initvel;
L=rand;
int_pos_data11=L*int_pos_data1+(1-L)*int_pos_data;
for indr=1:pop_size
dataele=(int_pos_data11(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
if(iter_inc==no_of_iter && indr==pop_size)
datapass{21}=1;
else
datapass{21}=0;
end
obj_result=objective_process(datapass,elechoose);
int_pos_data11(indr,:)=dataele;
fitnessl1(indr)=obj_result{1};
finalall{indr}=obj_result;
end
[maxval,maxloc]=max(fitnessl1);
bestdata=int_pos_data11(maxloc(1),:);
bestdatafinal=finalall{maxloc(1)};
xg=bestdata;
[best_conver_data(iter_inc),best_location(iter_inc)]=max(fitnessl1);
final_data{iter_inc}=bestdata;
final_alldata{iter_inc}=bestdatafinal;
if iter_inc>=2 && best_conver_data(iter_inc)<best_conver_data(iter_inc-1)
best_conver_data(iter_inc)=best_conver_data(iter_inc-1);
final_data{iter_inc}=final_data{iter_inc-1};
final_alldata{iter_inc}=final_alldata{iter_inc-1};
end
iter_inc=iter_inc+1;
end
conver_result=best_conver_data;
Final_result=final_data{end};
all_result=final_alldata{end};
tardata=all_result{3};resdata=all_result{4};
figure,chadata=confusionchart(all_result{2},{'Hate speech','offensive' , 'neither'});
chadata.Title = 'Improved Mayfly Algorithm';
chadata.RowSummary = 'row-normalized';
chadata.ColumnSummary = 'column-normalized';
figure,plot(conver_result,'r-s','linewidth',2);
grid on;xlabel('iteration');ylabel('Accuracy');
title('Convergence graph for Improved Mayfly Algorithm');

BIN
horseherd.pdf Normal file

Binary file not shown.

124
horseherd_process.m Normal file
View File

@ -0,0 +1,124 @@
function [conver_result,Final_result,all_result]=horseherd_process(datapass,pop_size,no_of_iter)
traindata=datapass{1};
traindata_matrix=cell2mat(traindata);
len_data=max(max(traindata_matrix));
max_val1=1;
min_val1=0;
dim=len_data;
max_range=[repmat(max_val1,[1 dim])];
min_range=[repmat(min_val1,[1 dim])];
len=length(max_range);
int_pos_data=init_pop_data(pop_size,len,max_range,min_range);
data_pass_to{1}=[];
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
obj_result=objective_process(datapass,elechoose);
int_pos_data(indr,:)=dataele;
fitness(indr)=obj_result{1};
finalall{indr}=obj_result;
end
[maxval,maxloc]=max(fitness);
bestdata=int_pos_data(maxloc(1),:);
bestfit=maxval;
xg=bestdata;
gbestdata=int_pos_data(maxloc(1),:);
gbestfit=maxval;
xhi=gbestdata;
[rr,cc]=size(int_pos_data);
initvel=ones(rr,cc)*0.01;
iter_inc=1;% Loop counter
% Main loop
data_pass_to{1}=0;
percentagehorse=[10 20 30 40]/100;
agedata=randsrc(1,pop_size,[1 2 3 4;percentagehorse]);
wg=0.95;wh=0.9;wsoc=0.9;wim=0.8;wdefmec=0.9;wro=0.9;
giter=1;hmiter=1;sociter=1;imiter=1;roiter=1;defmeciter=1;
while iter_inc<=no_of_iter
low=0.95;upp=1.05;
giter=wg*giter;
hmiter=hmiter*wh;
sociter=sociter*wsoc;
imiter=imiter*wim;
defmeciter=defmeciter*wdefmec;
roiter=roiter*wro;
newpos=randsrc(1,5,1:pop_size);
newpos2=randsrc(1,5,1:pop_size);
for kpop=1:pop_size
pit=int_pos_data(kpop,:);
r=rand;
gram=giter*(low+r*upp)*pit;
hm=hmiter*(bestdata-pit);
socm=sociter*(mean(int_pos_data)-pit);
imm=imiter*(mean(int_pos_data(newpos,:))-pit);
defmec=defmeciter*(mean(int_pos_data(newpos2,:))-pit);
ro=roiter*(pit);
velmalpha=gram+defmec;
velmbeta=gram+hm+socm+defmec;
velmgamma=gram+hm+socm+defmec+imm+ro;
velmdel=gram+imm+ro;
if(agedata(kpop)==1)
initvel(kpop,:)=velmalpha;
end
if(agedata(kpop)==2)
initvel(kpop,:)=velmbeta;
end
if(agedata(kpop)==3)
initvel(kpop,:)=velmgamma;
end
if(agedata(kpop)==4)
initvel(kpop,:)=velmdel;
end
end
int_pos_data=int_pos_data+initvel;
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
if(iter_inc==no_of_iter && indr==pop_size)
datapass{21}=1;
else
datapass{21}=0;
end
obj_result=objective_process(datapass,elechoose);
int_pos_data11(indr,:)=dataele;
fitnessl1(indr)=obj_result{1};
finalall{indr}=obj_result;
end
[maxval,maxloc]=max(fitnessl1);
bestdata=int_pos_data11(maxloc(1),:);
bestdatafinal=finalall{maxloc(1)};
[best_conver_data(iter_inc),best_location(iter_inc)]=max(fitnessl1);
final_data{iter_inc}=bestdata;
final_alldata{iter_inc}=bestdatafinal;
if iter_inc>=2 && best_conver_data(iter_inc)<best_conver_data(iter_inc-1)
best_conver_data(iter_inc)=best_conver_data(iter_inc-1);
final_data{iter_inc}=final_data{iter_inc-1};
final_alldata{iter_inc}=final_alldata{iter_inc-1};
end
iter_inc=iter_inc+1;
end
conver_result=best_conver_data;
Final_result=final_data{end};
all_result=final_alldata{end};

134
imayfly_process.m Normal file
View File

@ -0,0 +1,134 @@
function [conver_result,Final_result,all_result]=imayfly_process(datapass,pop_size,no_of_iter)
traindata=datapass{1};
traindata_matrix=cell2mat(traindata);
len_data=max(max(traindata_matrix));
max_val1=1;
min_val1=0;
dim=len_data;
max_range=[repmat(max_val1,[1 dim])];
min_range=[repmat(min_val1,[1 dim])];
len=length(max_range);
int_pos_data=init_pop_data(pop_size,len,max_range,min_range);
data_pass_to{1}=[];
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
obj_result=objective_process(datapass,elechoose);
int_pos_data(indr,:)=dataele;
fitness(indr)=obj_result{1};
finalall{indr}=obj_result;
end
[maxval,maxloc]=max(fitness);
bestdata=int_pos_data(maxloc(1),:);
bestfit=maxval;
xg=bestdata;
gbestdata=int_pos_data(maxloc(1),:);
gbestfit=maxval;
xhi=gbestdata;
[rr,cc]=size(int_pos_data);
initvel=ones(rr,cc)*0.01;
iter_inc=1;% Loop counter
% Main loop
data_pass_to{1}=0;
while iter_inc<=no_of_iter
gdata=linspace(1,0.1,pop_size);
for kpop=1:pop_size
xit=int_pos_data(kpop,:);
if(fitness(kpop)>gbestfit)
a1=rand;a2=rand;betaval=rand;
rpdata=norm(bestdata-int_pos_data(kpop,:),2);
rgdata=norm(gbestdata-int_pos_data(kpop,:),2);
f1=gdata(kpop)*initvel(kpop,:);
f2=a1*exp(-betaval/rpdata)*(xhi-xit);
f3=a2*exp(-betaval/rgdata)*(xg-xit);
newvel=f1+f2+f3;
else
dval=rand;rval=randn;
newvel=gdata(kpop)*initvel(kpop,:)+dval*rval;
end
initvel(kpop,:)=newvel;
end
int_pos_data=int_pos_data+initvel;
%Male
for indr=1:pop_size
dataele=(int_pos_data(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
obj_result=objective_process(datapass,elechoose);
int_pos_data1(indr,:)=dataele;
fitnessl(indr)=obj_result{1};
finalall{indr}=obj_result;
end
for kpop=1:pop_size
xit=int_pos_data(kpop,:);
yit=int_pos_data1(kpop,:);
if(fitness(kpop)<fitnessl(kpop))
a3=rand;betaval=rand;
rmfdata=norm(xit-yit,2);
f1=gdata(kpop)*initvel(kpop,:);
f3=a3*exp(-betaval/rmfdata)*(xit-yit);
newvel=f1+f3;
else
flval=rand;r2val=randn;
newvel=gdata(kpop)*initvel(kpop,:)+flval*r2val;
end
initvel(kpop,:)=newvel;
end
int_pos_data1=int_pos_data1+initvel;
L=rand;
int_pos_data11=L*int_pos_data1+(1-L)*int_pos_data;
%Female
for indr=1:pop_size
dataele=(int_pos_data11(indr,:));
dataele=limit_chk_process(dataele,...
max_val1,min_val1,data_pass_to);
elechoose=dataele;
if(iter_inc==no_of_iter && indr==pop_size)
datapass{21}=1;
else
datapass{21}=0;
end
obj_result=objective_process(datapass,elechoose);
int_pos_data11(indr,:)=dataele;
fitnessl1(indr)=obj_result{1};
finalall{indr}=obj_result;
end
[maxval,maxloc]=max(fitnessl1);
bestdata=int_pos_data11(maxloc(1),:);
bestdatafinal=finalall{maxloc(1)};
xg=bestdata;
[best_conver_data(iter_inc),best_location(iter_inc)]=max(fitnessl1);
final_data{iter_inc}=bestdata;
final_alldata{iter_inc}=bestdatafinal;
if iter_inc>=2 && best_conver_data(iter_inc)<best_conver_data(iter_inc-1)
best_conver_data(iter_inc)=best_conver_data(iter_inc-1);
final_data{iter_inc}=final_data{iter_inc-1};
final_alldata{iter_inc}=final_alldata{iter_inc-1};
end
iter_inc=iter_inc+1;
end
conver_result=best_conver_data;
Final_result=final_data{end};
all_result=final_alldata{end};

Binary file not shown.

16
init_pop_data.m Normal file
View File

@ -0,0 +1,16 @@
function Positions=init_pop_data(SearchAgents_no,dim,upper_lmt,lower_lmt)
Boundary_no= size(upper_lmt,2);
if Boundary_no==1
Positions=rand(SearchAgents_no,dim).*(upper_lmt-lower_lmt)+lower_lmt;
end
if Boundary_no>1
for i=1:dim
ub_i=upper_lmt(i);
lb_i=lower_lmt(i);
v1 = rand(SearchAgents_no,1);
v2 =(ub_i-lb_i)+lb_i;
Values = round(rand(SearchAgents_no,1)*(ub_i-lb_i)+lb_i);
Positions(:,i)= Values;
end
end

26954
labeled_data.csv Normal file

File diff suppressed because it is too large Load Diff

22
limit_chk_process.asv Normal file
View File

@ -0,0 +1,22 @@
function dataout2=limit_chk_process(datain,upper_lmt,lower_lmt,data_pass_to_loadflow)
datain=round(datain);
upper_cond=datain>upper_lmt;
lower_cond=datain<lower_lmt;
dataout=(datain.*(~(upper_cond+lower_cond)))+...
upper_lmt.*upper_cond+lower_lmt.*lower_cond;
if(~isempty(find(upper_cond)) | ~isempty(find(lower_cond)) )
dataout=randsrc(1,length(datain),[lower_lmt(1) upper_lmt(1)]);
dataout1=dataout;
else
dataout1=dataout;
end
if(length(unique(dataout1))==1)
dataout=randsrc(1,length(datain),[lower_lmt(1) upper_lmt(1)]);
dataout1=dataout;
end
dataout2=dataout1;

22
limit_chk_process.m Normal file
View File

@ -0,0 +1,22 @@
function dataout2=limit_chk_process(datain,upper_lmt,lower_lmt,data_pass_to_loadflow)
datain=round(datain);
upper_cond=datain>upper_lmt;
lower_cond=datain<lower_lmt;
dataout=(datain.*(~(upper_cond+lower_cond)))+...
upper_lmt.*upper_cond+lower_lmt.*lower_cond;
if(~isempty(find(upper_cond)) | ~isempty(find(lower_cond)) )
dataout=randsrc(1,length(datain),[lower_lmt(1) upper_lmt(1)]);
dataout1=dataout;
else
dataout1=dataout;
end
if(length(unique(dataout1))==1)
dataout=randsrc(1,length(datain),[lower_lmt(1) upper_lmt(1)]);
dataout1=dataout;
end
dataout2=dataout1;

75
objective_process.m Normal file
View File

@ -0,0 +1,75 @@
function final_result=objective_process(datapass,elechoose)
traindata=datapass{1};
trainclass=datapass{2};
testdata=datapass{3};
lengthdata=datapass{5};
encode_word=datapass{6};
flag=datapass{21};
networkiter=datapass{22};
datanum=find(elechoose==0);
traindata_matrix=cell2mat(traindata);
loc=find(ismember(traindata_matrix,datanum));
traindata_matrix(loc)=0;
[rr,cc]=size(traindata_matrix);
for kr=1:rr
traindata1{kr}=traindata_matrix(kr,:);
end
traindata=traindata1;
datain_size=1;
dim_data=50;
hidden_len=80;
total_word=encode_word.NumWords;
no_of_class=3;
network_layer_infor=[ ...
sequenceInputLayer(datain_size)
wordEmbeddingLayer(dim_data,total_word)
lstmLayer(hidden_len,'OutputMode','last')
fullyConnectedLayer(no_of_class)
softmaxLayer
classificationLayer];
if(flag==1)
train_opt=trainingOptions('adam', ...
'MiniBatchSize',16, ...
'GradientThreshold',2, ...
'Shuffle','every-epoch', ...
'Plots','training-progress', ...
'Verbose',false);
else
train_opt=trainingOptions('adam', ...
'MiniBatchSize',16, ...
'GradientThreshold',2, ...
'Shuffle','every-epoch', ...
'Plots','none', ...
'Verbose',false);
end
train_opt.MaxEpochs=networkiter;
net=trainNetwork(traindata,trainclass,network_layer_infor,train_opt);
resultout=predict(net,testdata);
[maxval,maxlc]=max(((round(resultout.'))));
ypred1=categorical(maxlc).';
sin=double(trainclass);
sout=double(ypred1);
tardata=[];
resdata=[];
for km=1:length(sin)
tardata=[tardata double(ismember([1;2;3],sin(km)))];
resdata=[resdata double(ismember([1;2;3],sout(km)))];
end
[~,confu_result]=confusion(tardata,resdata);
%% find accuracy
accuracy=(sum(diag(confu_result))/sum(confu_result(:)))*100;
final_result{1}=accuracy;
final_result{2}=confu_result;
final_result{3}=tardata;
final_result{4}=resdata;

8
textpreprocessing.m Normal file
View File

@ -0,0 +1,8 @@
function documents=textpreprocessing(textData)
textData=eraseURLs(textData);
documents = tokenizedDocument(textData,'DetectPatterns','at-mention');
documents=removeStopWords(documents);
documents = lower(documents);
documents = erasePunctuation(documents);
end