lshlookup.m 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. function [iNN,cand] = lshlookup(x0,x,T,varargin)
  2. % [iNN,cand] = lshlookup(x0,x,T)
  3. %
  4. % iNN contains indices of matches in T for a single query x0;
  5. % x is the representation in the feature space; assumes to be a cell
  6. % array with equal size cells (this is a hack around Matlab's problem
  7. % with allocating large contiguous chunks of memory);
  8. % dfun is the dist. function ('l1','l2','cos')
  9. %
  10. % returns in iNN the indices of the found NN; cand is the # of
  11. % examined candidates (i.e., size of the union of the matching buckets
  12. % in all tables)
  13. %
  14. % Optional arguments:
  15. %
  16. % 'k' : if given, return this many neighbors (default 1)
  17. % 'sel' : if 'random', select random neighbors matching other
  18. % criteria. If 'best', select best (closest) matches. Default is 'best'.
  19. % 'r' : max. distance cut-off
  20. % 'distfun', 'distargs' : distance function (and additional args.) to
  21. % use. Default: L1 if T.type is 'lsh' and L2 if it's 'e2lsh'.
  22. % 'verb' : verbosity (overrides T.verbose)
  23. %
  24. % (C) Greg Shakhnarovich, TTI-Chicago (2008)
  25. distfun='lpnorm';
  26. switch T(1).type,
  27. case 'lsh', distargs={1};
  28. case 'e2lsh', distargs={2};
  29. end
  30. k=1;
  31. r=inf;
  32. sel='best';
  33. f=[];
  34. fargs=[];
  35. verb=T(1).verbose;
  36. % parse args.
  37. for a=1:2:length(varargin)
  38. eval(sprintf('%s = varargin{a+1};',varargin{a}));
  39. end
  40. l = length(T);
  41. iNN=[];
  42. % find the union of buckets in all tables that match query
  43. for j=1:l
  44. % look up T_j
  45. % buck is the # of bucket in T{j}
  46. buck = findbucket(T(j).type,x0,T(j).I);
  47. % find the bucket in j-th table
  48. key = lshhash(buck);
  49. ihash = T(j).bhash{key}; % possible matching buckets
  50. if (~isempty(ihash)) % nothing matches
  51. b = ihash(find(all(bsxfun(@eq,buck,T(j).buckets(ihash,:)),2)));
  52. if (~isempty(b))
  53. iNN = [iNN T(j).Index{b}];
  54. end
  55. end
  56. end
  57. % delete duplicates
  58. [iNN,iu]=unique(iNN);
  59. cand = length(iNN);
  60. % now iNN has the collection of candidate indices
  61. % we can start examining them
  62. if (verb > 0)
  63. fprintf('Examining %d candidates\n',cand);
  64. end
  65. if (~isempty(iNN))
  66. if (strcmp(sel,'best'))
  67. D=feval(distfun,x0,Xsel(x,iNN),distargs{:});
  68. [dist,sortind]=sort(D);
  69. ind = find(dist(1:min(k,length(dist)))<=r);
  70. iNN=iNN(sortind(ind));
  71. else % random
  72. rp=randperm(cand);
  73. choose=[];
  74. for i=1:length(rp)
  75. d = feval(distfun,x0,Xsel(x,iNN(rp(i))),distargs{:});
  76. if (d <= r)
  77. choose = [choose iNN(rp(i))];
  78. if (length(choose) == k)
  79. break;
  80. end
  81. end
  82. end
  83. iNN = choose;
  84. end
  85. end
  86. %%%%%%%%%%%%%%%%%%%%%%%%55
  87. function x=Xsel(X,ind)
  88. % x=Xsel(X,ind)
  89. % selects (i.e. collects) columns of cell array X
  90. % (automatically determining the class, and looking for each column in
  91. % the right cell.)
  92. if (~iscell(X))
  93. x=X(:,ind);
  94. return;
  95. end
  96. d=size(X{1},1);
  97. if (strcmp(class(X{1}),'logical'))
  98. x=false(d,length(ind));
  99. else
  100. x=zeros(d,length(ind),class(X{1}));
  101. end
  102. sz=0; % offset of the i-th cell in X
  103. collected=0; % offset within x
  104. for i=1:length(X)
  105. thisCell=find(ind > sz & ind <= sz+size(X{i},2));
  106. if (~isempty(thisCell))
  107. x(:,thisCell)=X{i}(:,ind(thisCell)-sz);
  108. end
  109. collected=collected+length(thisCell);
  110. sz=sz+size(X{i},2);
  111. end