56Fe
/
Nearest-Neighbour-Search-Neural-Networks-Product-Quantization


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
							function [iNN,cand] = lshlookup(x0,x,T,varargin)
% [iNN,cand] = lshlookup(x0,x,T)
%
%   iNN contains indices of matches in T for a single query x0;
%   x is the representation in the feature space; assumes to be a cell
%   array with equal size cells (this is a hack around Matlab's problem
%   with allocating large contiguous chunks of memory);
%   dfun is the dist. function ('l1','l2','cos')
%   
%   returns in iNN the indices of the found NN; cand is the # of 
%   examined candidates (i.e., size of the union of the matching buckets
%   in all tables)
%
% Optional arguments:
%
%   'k' : if given, return this many neighbors (default 1)
%   'sel' : if 'random', select random neighbors matching other
%     criteria. If 'best', select best (closest) matches. Default is 'best'.
%   'r' : max. distance cut-off
%   'distfun', 'distargs' : distance function (and additional args.) to
%     use. Default: L1 if T.type is 'lsh' and L2 if it's 'e2lsh'.
%   'verb' : verbosity (overrides T.verbose)
%
% (C) Greg Shakhnarovich, TTI-Chicago (2008)

distfun='lpnorm';
switch T(1).type,
 case 'lsh', distargs={1};
 case 'e2lsh', distargs={2};
end
k=1;
r=inf;
sel='best';
f=[];
fargs=[];
verb=T(1).verbose;

% parse args.
for a=1:2:length(varargin)
  eval(sprintf('%s = varargin{a+1};',varargin{a}));
end


l = length(T);

iNN=[];

% find the union of buckets in all tables that match query
for j=1:l
  % look up T_j
  % buck is the # of bucket in T{j}
  buck = findbucket(T(j).type,x0,T(j).I);
  % find the bucket in j-th table
  key = lshhash(buck);
  ihash = T(j).bhash{key}; % possible matching buckets
  if (~isempty(ihash)) % nothing matches
    b = ihash(find(all(bsxfun(@eq,buck,T(j).buckets(ihash,:)),2)));
    if (~isempty(b))
      iNN = [iNN T(j).Index{b}];
    end
  end
end

% delete duplicates
[iNN,iu]=unique(iNN);
cand = length(iNN);

% now iNN has the collection of candidate indices 
% we can start examining them

if (verb > 0)
  fprintf('Examining %d candidates\n',cand);
end

if (~isempty(iNN))
  
  if (strcmp(sel,'best'))

    D=feval(distfun,x0,Xsel(x,iNN),distargs{:});
    [dist,sortind]=sort(D);
    ind = find(dist(1:min(k,length(dist)))<=r);
    iNN=iNN(sortind(ind));
    
  else % random
    
    rp=randperm(cand);
    choose=[];
    for i=1:length(rp)
      d = feval(distfun,x0,Xsel(x,iNN(rp(i))),distargs{:});
      if (d <= r)
	choose = [choose iNN(rp(i))];
	if (length(choose) == k)
	  break;
	end
      end
    end
    iNN = choose;
  end
  
end


%%%%%%%%%%%%%%%%%%%%%%%%55 
function x=Xsel(X,ind)
% x=Xsel(X,ind)
% selects (i.e. collects) columns of cell array X
% (automatically determining the class, and looking for each column in
% the right cell.)

if (~iscell(X))
  x=X(:,ind);
  return;
end

d=size(X{1},1);

if (strcmp(class(X{1}),'logical'))
  x=false(d,length(ind));
else
  x=zeros(d,length(ind),class(X{1}));
end
sz=0; % offset of the i-th cell in X
collected=0; % offset within x
for i=1:length(X)
  thisCell=find(ind > sz & ind <= sz+size(X{i},2));
  if (~isempty(thisCell))
    x(:,thisCell)=X{i}(:,ind(thisCell)-sz);
  end
  collected=collected+length(thisCell);
  sz=sz+size(X{i},2);      
end