123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385 |
- %function [NNeighPerf, NormCpxty, NormPQCpxty]= WNNetsOverPQ_Kmeans(kk,mm,nRecall,Npeaks)
- % clear all
- % close all
- clc
-
-
- addpath('./PQ Library');
- addpath('./WNN Library');
- addpath('./PQ Library/Yael Library');
- set(0,'defaulttextinterpreter','latex');
- % ---- Quantities of interest for the input Data Set --------------
- nDim=784; % Dimensionality of the Data
- NLearn=6e4; % Number of descriptors learn set
- Ntrain=6e4; % Number of descriptors train set
- Ntests=1e4; % Number of descriptors test set
- % ---- Quantities of interest for Fine Product Quantization -------
- kkf=256; % Number of clusters kmeans
- mmf=16; % Number of splits for PQ (divides 128)
- mDimf=nDim/mmf; % Dimensionality of splitted vectors
- nRecall=100;
- recAtR=[1 2 5 10 20 50 100 200 500 1000 2000 5000 10000];
- recAtR=recAtR(recAtR<=nRecall);
-
- % ---- Quantities of interest for Coarse Product Quantization -----
- kkc=41; % Number of clusters kmeans
- mmc=2; % Number of splits for PQ (divides 128)
- mDimc=nDim/mmc; % Dimensionality of splitted vectors
- % ---- Quantities of interest for Willshaw Networks ---------------
- nn=10; % Number of vectors per WNN
- qq=Ntrain/nn; % Number of different WNNs
- %Npeaks=round([1, qq./[10 5 4 3 2 4/3 1]]); % Number of peaks to select
- Npeaks=round([1 qq./[1000 600 300 200 100 20 10 5 4 3 2 4/3 1]]);
- %Npeaks=round([1, qq./[2000 1000 200 100 200/3 50 20 10 5]]);
- %Npeaks=round([1 2 3 4 5 6 10 qq./[ 20 10 5 4 3 2 4/3 1]]);
- binaryLab=0;
- Zoomed=0;
-
- if(binaryLab && Zoomed)
- resultsFileName=sprintf('./Binary Result Sets/finePQ_m%d_k%d_coarseWNN_n%d_m%d_k%d_Zoomed.mat',mmf,kkf,nn,mmc,kkc);
- elseif binaryLab
- resultsFileName=sprintf('./Binary Result Sets/finePQ_m%d_k%d_coarseWNN_n%d_m%d_k%d.mat',mmf,kkf,nn,mmc,kkc);
- elseif Zoomed
- resultsFileName=sprintf('./Result Sets/finePQ_m%d_k%d_coarseWNN_n%d_m%d_k%d_Zoomed.mat',mmf,kkf,nn,mmc,kkc);
- else
- resultsFileName=sprintf('./Result Sets/finePQ_m%d_k%d_coarseWNN_n%d_m%d_k%d.mat',mmf,kkf,nn,mmc,kkc);
- end
- if exist(resultsFileName,'file')==2
- %% LOADING RESULTS SECTION
- load(resultsFileName);
- else
- %% LOADING DATA SECTION
-
- fileOffset = 16; % bytes of file offset
- fileID = fopen('../Data/train-images.idx3-ubyte');
- bWSetVec = fread(fileID,nDim*Ntrain+fileOffset,'uint8');
- bWSetVec = bWSetVec(fileOffset+1:end);
- bWSetMat = reshape(bWSetVec,nDim,Ntrain);
- fclose(fileID);
-
- fileID = fopen('../Data/t10k-images.idx3-ubyte');
- bZSetVec = fread(fileID,nDim*Ntests+fileOffset,'uint8');
- bZSetVec = bZSetVec(fileOffset+1:end);
- bZSetMat = reshape(bZSetVec,nDim,Ntests);
- fclose(fileID);
-
- fileOffsetL = 8; % bytes of file offset
- fileIDL = fopen('../Data/train-labels.idx1-ubyte');
- bWLab = fread(fileIDL,Ntrain+fileOffsetL,'uint8');
- bWLab = bWLab(fileOffsetL+1:end);
- fclose(fileIDL);
-
- fileIDL = fopen('../Data/t10k-labels.idx1-ubyte');
- bZLab = fread(fileIDL,Ntests+fileOffsetL,'uint8');
- bZLab = bZLab(fileOffsetL+1:end);
- fclose(fileIDL);
-
- clear bWSetVec bZSetVec fileID fileIDL fileOffset fileOffsetL
- %imshow(reshape(bWSetMat(:,1),sqrt(nDim),sqrt(nDim))')
-
- %bVSetMat=double(fvecs_read('../Data/sift/sift_learn.fvecs'));
- %bWSetMat=double(fvecs_read('../Data/sift/sift_base.fvecs'));
- %bZSetMat=double(fvecs_read('../Data/sift/sift_query.fvecs'));
- %iMinDistExh=ivecs_read('../Data/sift/sift_groundtruth.ivecs')+1;
-
- %% COARSE PRODUCT QUANTIZATION SECTION
-
- if(binaryLab)
- coarseFileName=sprintf('./Coarse Quantization Indices and Dissimilarities/coarse_CWidx_CZidx_k%d_m%d.mat',kkc,mmc);
- else
- coarseFileName=sprintf('./Coarse Quantization Indices and Dissimilarities/coarse_CWidx_sZSetMat_k%d_m%d.mat',kkc,mmc);
- end
- if exist(coarseFileName,'file')==2
- load(coarseFileName);
- else
- str=sprintf('Coarse PQ: computing m=%1.0f, k=%1.0f subcentroids ',mmc,kkc);
- fprintf('%s%s ',str,('.')*ones(1,55-length(str)));
- CSubSetMatc=zeros(mDimc,kkc,mmc);
- CWidxc=zeros(Ntrain,mmc);
- CZidxc=zeros(Ntests,mmc);
- sZSetMatc=zeros(kkc*mmc,Ntests);
- pperc=[];
- nsplits=10;
- nnsp=Ntrain/nsplits;
- for jj=1:mmc
- [CSubSetMatc(:,:,jj), ~ ]=yael_kmeans(...
- single(bWSetMat((jj-1)*mDimc+1:jj*mDimc,:)),kkc, 'niter', 100, 'verbose', 0);
- for rr=1:nsplits
- [~,CWidxc((rr-1)*nnsp+1:rr*nnsp,jj)]=Quantization(...
- bWSetMat((jj-1)*mDimc+1:jj*mDimc,(rr-1)*nnsp+1:rr*nnsp),...
- CSubSetMatc(:,:,jj));
- end
- if(binaryLab)
- [~,CZidxc(:,jj)]=Quantization(...
- bZSetMat((jj-1)*mDimc+1:jj*mDimc,:),...
- CSubSetMatc(:,:,jj));
- else
- ZDistMatc=EuclideanDistancesMat(bZSetMat((jj-1)*mDimc+1:jj*mDimc,:),CSubSetMatc(:,:,jj));
- %sZSetMatc((jj-1)*kkc+1:jj*kkc,:)=exp(-sqrt(ZDistMatc)');
- %sZSetMatc((jj-1)*kkc+1:jj*kkc,:)=exp(1-bsxfun(@rdivide, abs(ZDistMatc)', min(abs(ZDistMatc)')));
- sZSetMatc((jj-1)*kkc+1:jj*kkc,:)=(bsxfun(@ldivide, abs(ZDistMatc)', min(abs(ZDistMatc)')));
- end
- perc=sprintf('%2.2f%%',jj/mmc*100);
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf(1,'%s',perc); pperc=perc;
- end
- %clearvars CSubSetMatc ZDistMatc
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf('Done.\n');
- if(binaryLab)
- save(coarseFileName,'CWidxc','CZidxc');
- else
- save(coarseFileName,'CWidxc','sZSetMatc');
- end
- end
-
- %% FINE PRODUCT QUANTIZATION SECTION
-
- fineFileName=sprintf('./Fine Quantization Indices and Distances/fine_CWidx_ZDistMat_k%d_m%d.mat',kkf,mmf);
- if exist(fineFileName,'file')==2
- load(fineFileName);
- else
- str=sprintf('Fine PQ: computing m=%1.0f, k=%1.0f subcentroids ',mmf,kkf);
- fprintf('%s%s ',str,('.')*ones(1,55-length(str)));
- CSubSetMatf=zeros(mDimf,kkf,mmf);
- CWidxf=zeros(Ntrain,mmf);
- ZDistMatf=zeros(Ntests,kkf,mmf);
- pperc=[];
- for jj=1:mmf
- [CSubSetMatf(:,:,jj), ~ ]=yael_kmeans(...
- single(bWSetMat((jj-1)*mDimf+1:jj*mDimf,:)),kkf, 'niter', 100, 'verbose', 0);
- ZDistMatf(:,:,jj)=EuclideanDistancesMat(bZSetMat((jj-1)*mDimf+1:jj*mDimf,:),CSubSetMatf(:,:,jj));
- [~,CWidxf(:,jj)]=Quantization(bWSetMat((jj-1)*mDimf+1:jj*mDimf,:),CSubSetMatf(:,:,jj));
- perc=sprintf('%2.2f%%',jj/mmf*100);
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf(1,'%s',perc);pperc=perc;
- end
- clearvars CSubSetMatf bZSetMat
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf('Done.\n');
- save(fineFileName,'CWidxf','ZDistMatf');
- end
- clearvars bWSetMat bVSetMat
- %% PREPROCESSING DATA TO STORE IN WNNets SECTION
- str=sprintf('Building Learning Data Sets ');
- fprintf('%s%s ',str,('.')*ones(1,55-length(str)));
- [minDh,learningNetsIdx]=QuantHammingCanonicalIndices_ceil(CWidxc,qq);
- nStoredVec=hist(learningNetsIdx,1:qq);
- fprintf('Done.\n');
- %break;
-
- eWDiffFeatsSplittedRpi(1:qq)=struct('RunPermIndices',[]);
- eWDiffFeatsSplittedRci(1:qq)=struct('RunCentroidsIndices',[]);
- multiCount=ones(1,qq);
- str=sprintf('Preprocessing Data for Learning ');
- fprintf('%s%s ',str,('.')*ones(1,55-length(str)));
- pperc=[];
- for ii=1:Ntrain
- eWDiffFeatsSplittedRpi(learningNetsIdx(ii)).RunPermIndices(multiCount(learningNetsIdx(ii)))=ii;
- eWDiffFeatsSplittedRci(learningNetsIdx(ii)).RunCentroidsIndices(:,multiCount(learningNetsIdx(ii)))=CWidxc(ii,:);
- multiCount(learningNetsIdx(ii))=multiCount(learningNetsIdx(ii))+1;
- perc=sprintf('%2.2f%%',ii/Ntrain*100);
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf(1,'%s',perc); pperc=perc;
- end
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf('Done.\n');
- clearvars eWSetMat permRand CWidxc %multiCount nStoredVec
-
- %% BUILDING WNNets AND SCORES COMPUTATION SECTION
-
- str=sprintf('Building %d Willshaw NNets and Computing Scores ',qq);
- fprintf('%s%s ',str,('.')*ones(1,55-length(str)));
- II=eye(kkc);
- P0=0;
- %Ntests=1;
- sX0=zeros(Ntests,qq);
- pperc=[];
- zc=zeros(kkc*mmc,Ntests);
- if(binaryLab)
- for jj=1:mmc
- zc((jj-1)*kkc+1:jj*kkc,:)=II(CZidxc(1:Ntests,jj),:)';
- end
- end
-
- for jj=1:qq
- uniqCent=unique(eWDiffFeatsSplittedRci(jj).RunCentroidsIndices','rows');
- WXsmatrix=BuildNetworkfromIdxs(uniqCent',II);
- P0=P0+mean(mean(WXsmatrix));
- if(binaryLab)
- %sX0(:,jj)=diag(zc(:,1:Ntests)'*double(WXsmatrix)*zc(:,1:Ntests));
- sX0(:,jj)=1./(sum(zc(:,1:Ntests).^2))'.*diag(zc(:,1:Ntests)'*double(WXsmatrix)*zc(:,1:Ntests));
- else
- %sX0(:,jj)=diag(sZSetMatc(:,1:Ntests)'*double(WXsmatrix)*sZSetMatc(:,1:Ntests));
- sX0(:,jj)=1./(sum(sZSetMatc(:,1:Ntests)).^2)'.*diag(sZSetMatc(:,1:Ntests)'*double(WXsmatrix)*sZSetMatc(:,1:Ntests));
- end
- perc=sprintf('%2.2f%%',jj/qq*100);
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf(1,'%s',perc);pperc=perc;
- end
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf('Done.\n');
- avgP0=P0/qq;
- clearvars sZSetMatc eWDiffFeatsSplittedRsm WXsmatrix eWDiffFeatsSplittedRci uniqCent
-
- %% ADC COMPUTATION FOR PERFORMANCES COMPUTATION SECTION
-
- str=sprintf('Computing Performances: L=[%d:%d], recalls@[%d:%d] ',Npeaks(1),Npeaks(end),recAtR(1),recAtR(end));
- fprintf('%s%s ',str,('.')*ones(1,55-length(str)));
- NNClassPerf=zeros(length(recAtR),length(Npeaks));
- iXMinDistADC=zeros(1,nRecall);
- actIXMinDistADC=zeros(1,nRecall);
- [~,sortemp]=sort(sX0,2,'descend');
- NNmostLikely=sortemp(:,1:Npeaks(end))';
- [ssxo,isxo]=sort(sX0,2,'descend');
- %%
- % close all;
- % for itest=500
- %
- % lrp2=0;
- % rpixcs2=[];
- % for jj=1:qq
- % lrp2(jj)=length(eWDiffFeatsSplittedRpi(jj).RunPermIndices);
- % rpixcs2=[ rpixcs2 eWDiffFeatsSplittedRpi(jj).RunPermIndices];
- % end
- %
- % %for jj=1:qq
- % % exhNN(jj)=any(eWDiffFeatsSplittedRpi(jj).RunPermIndices == iMinDistExh(1,itest));
- % %end
- % %exhNNidx=find(exhNN==1);
- %
- % interl=randperm(qq);
- % revint(interl)=1:qq;
- %
- % figure()
- % plot((sX0(itest,interl))); hold on;
- %
- % [~,indL0]=sort(sX0(itest,:),'descend');
- % stem(revint(indL0(1:10)),(sX0(itest,indL0(1:10)))); hold on;
- %
- % stem(revint(exhNNidx)*[1 1],[0 sX0(itest,exhNNidx)]);
- % end
- % xlabel('Network Index $l=1,\dots,L$','interpreter','latex');
- % ylabel('Scores $s(\mathbf{z}_0,\mathcal{Z}_l)$','interpreter','latex');
- % h=legend('Score of the $l$-th Network','$L_0$ Highest Scores Networks','Score of the NN$(\mathbf{x}_0)$ Network');
- % set(h,'interpreter','latex');
-
- %%
- plot(nStoredVec); hold on;
- plot(mean(nStoredVec)*ones(size(nStoredVec)));
- title(sprintf('Fine PQ $(k_f=%d, m_f=%d)$ // Coarse WNNets $(k_c=%d,m_c=%d,q=%d,n=%d)$',kkf,mmf,kkc,mmc,qq,nn));
- xlabel('Network Index $l=1,\dots,L$','interpreter','latex');
- ylabel('Number of vectors stored in each Neural Network $\mathcal{Z}_l$','interpreter','latex');
-
-
- clearvars sortemp
- pperc=[];
- for jj=1:Ntests
- for tt=1:length(Npeaks)
- if tt>1
- retIdxs=actIXMinDistADC;
- for ll=Npeaks(tt-1)+1:Npeaks(tt)
- retIdxs=[retIdxs, eWDiffFeatsSplittedRpi(NNmostLikely(ll,jj)).RunPermIndices];
- end
- else
- retIdxs=[];
- for ll=1:Npeaks(tt)
- retIdxs=[retIdxs, eWDiffFeatsSplittedRpi(NNmostLikely(ll,jj)).RunPermIndices];
- end
- end
- iXMinDistADC=AsymmetricDistanceComputationWDist(CWidxf(retIdxs,:)', ZDistMatf(jj,:,:), nRecall);
- numRetIdxs=length(retIdxs);
- if numRetIdxs >= nRecall
- actIXMinDistADC=retIdxs(iXMinDistADC);
- else
- actIXMinDistADC(1:numRetIdxs)=retIdxs(iXMinDistADC(1:numRetIdxs));
- actIXMinDistADC(numRetIdxs+1:end)=ones(1,nRecall-numRetIdxs);
- end
-
- for rr=1:length(recAtR)
- if recAtR(rr)==1
- NNClassPerf(rr,tt)=NNClassPerf(rr,tt)+sum(bWLab(actIXMinDistADC(:,1))==bZLab(jj));
- else
- NNClassPerf(rr,tt)=NNClassPerf(rr,tt)+sum(max((bWLab(actIXMinDistADC(:,1:recAtR(rr)))==repmat(bZLab(jj),recAtR(rr),1))'));
- end
- end
-
- %NNClassPerf(:,tt) = NNClassPerf(:,tt)+ RecallTest(actIXMinDistADC,nRecall,iMinDistExh(:,jj))';
- perc=sprintf('%2.2f%%',((jj-1)*length(Npeaks)+tt)/(Ntests*length(Npeaks))*100);
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf(1,'%s',perc);pperc=perc;
- end
- end
- NNClassPerf=NNClassPerf/Ntests;
- fprintf(1,'%s',sprintf('\b')*ones(1,length(pperc)));
- fprintf('Done.\n');
-
- clearvars SortQueryNetIdx actIXMinDistADC RetrievedIdxs bZSetMat CWidxf NNMostLikely eWDiffFeatsSplittedRpi sX0
-
- %% COMPUTATIONAL COST EVALUATION SECTION
-
- Cpxty=kkf*nDim+kkc*nDim+avgP0*(mmc*kkc)^2*qq+Npeaks*nn*mmf;
- NormCpxty=Cpxty/(nDim*Ntrain);
- NormPQCpxty=(kkf*nDim+mmf*Ntrain)/(nDim*Ntrain);
- NormNormCpxty=NormCpxty/NormPQCpxty;
-
- %% SAVING RESULTS SECTION
-
- save(resultsFileName,'NormCpxty','NormPQCpxty','NNClassPerf','Npeaks');
- end
-
- %% RESULTS VISUALIZATION SECTION
-
- figure();
- plot((NormCpxty(1:end)/NormPQCpxty),NNClassPerf(:,1:end)','x-'); hold on;
- for jj=1:length(Npeaks)-1
- %text(NormCpxty(jj)/NormPQCpxty,1.005,sprintf('L=%d',Npeaks(jj)),'interpreter','latex');
- end
- %plot((NormPQCpxty),NNClassPerf(:,end),'*');
- %text((NormPQCpxty),1.005,'PQ','interpreter','latex');
- title(sprintf('Fine PQ $(k_f=%d, m_f=%d)$ + Coarse WNNets $(k_c=%d,m_c=%d,q=%d,n=%d)$',kkf,mmf,kkc,mmc,qq,nn));
- ylabel('Nearest Neighbour Search Performances','interpreter','latex');
- xlabel('Computational Cost (Normalized to Exhaustive Search)','interpreter','latex');
- h=legend('recall @1','recall @2','recall @5','recall @10','recall @20',...
- 'recall @50','recall @100','Location','southeast');
- grid on; grid minor;
- set(h,'interpreter','latex');
- %ylim([0.84 1.002]);
- %xlim([NormCpxty(1)-0.0005 NormPQCpxty(end)+.0005])
- ylim([0.84 1.01]);
- xlim([0.21 1.1]);
-
- %% Lower RESULTS VISUALIZATION SECTION
- MostRelPeaks=[1 6 60 300 600];
-
- mri=arrayfun(@(ll) find(Npeaks==MostRelPeaks(ll)),1:length(MostRelPeaks));
-
- figure();
- plot((NormCpxty(mri)/NormPQCpxty),NNClassPerf(:,mri)','x'); hold on;
- plot((NormCpxty(1:mri(end))/NormPQCpxty),NNClassPerf(:,1:mri(end))'); hold on;
- for jj=mri
- text(NormCpxty(jj)/NormPQCpxty+0.001,NNClassPerf(1,jj)-0.002,sprintf('$L_0=\\frac{d}{d}$',Npeaks(jj)),'interpreter','latex');
- end
- %plot((NormPQCpxty),NNClassPerf(:,end),'*');
- %text((NormPQCpxty),1.005,'PQ','interpreter','latex');
- %title(sprintf('Fine PQ $(k_f=%d, m_f=%d)$ + Coarse WNNets $(k_c=%d,m_c=%d,q=%d,n=%d)$',kkf,mmf,kkc,mmc,qq,nn));
- ylabel('Classification Performances','interpreter','latex');
- xlabel('Computational Cost (Normalized to Fine PQ)','interpreter','latex');
- h=legend('Recall@1','Recall@2','Recall@5','Recall@10','Recall@20',...
- 'Recall@50','Recall@100','Location','southeast');
- grid on; grid minor;
- set(h,'interpreter','latex');
- %ylim([0.84 1.002]);
- %xlim([NormCpxty(1)-0.0005 NormPQCpxty(end)+.0005])
- ylim([0.864 1.006]);
- xlim([0.22 .31]);
|