1 |
- %%% data generator
clear
close all
%% Configs
numDims = 100;
numClusters = 10;
stdClust = 0.5; %std within cluster - not so bad at 1
clusterSep = 25; % std of distribution from which clusters are drawn
ptsPerCluster = 250;
%%
stds = stdClust .* ones(numClusters, 1);
clustMeans = clusterSep*rand(numClusters, numDims); % original data is strictly pos.
allPts_unshuff = [];
clustIDs_unshuff = [];
for clustIdx = 1:numClusters
newBlob = [];
for dim = 1:numDims
newBlobDim = clustMeans(clustIdx,dim)+stds(clustIdx).*randn(ptsPerCluster, 1);
newBlob = [newBlob newBlobDim]; %100 pts in 100D space
end
clustIDs_unshuff(end+1:end+ptsPerCluster) = clustIdx;
allPts_unshuff = [allPts_unshuff; newBlob];
end
allPts_dm_unshuff = allPts_unshuff;
for dim = 1:numDims
allPts_dm_unshuff(:,dim) = allPts_dm_unshuff(:,dim) - mean(allPts_dm_unshuff(:,dim));
end
% calc clust means for use in dot product calculation
clustMeans = zeros(size(clustMeans));
for clustIdx = 1:numClusters
startPt = ((clustIdx-1)*ptsPerCluster) + 1; %first pt in cluster
endPt = startPt + ptsPerCluster - 1; %last pt in cluster
thisBlob = allPts_dm_unshuff(startPt:endPt, :);
thisMean = mean(thisBlob, 1);
clustMeans(clustIdx, :) = thisMean ./ norm(thisMean); %normalized
end
shuffIdx = randperm(ptsPerCluster*numClusters);
allPts = allPts_unshuff(shuffIdx, :);
allPts_dm = allPts_dm_unshuff(shuffIdx, :);
clustIDs = clustIDs_unshuff(shuffIdx);
ccDot = zeros(numClusters);
ccDist = zeros(numClusters);
for clust_i = 1:numClusters
for clust_j = 1:numClusters
ccDot(clust_i, clust_j) = abs(clustMeans(clust_i,:) * clustMeans(clust_j,:)');
ccDist(clust_i, clust_j) = norm(clustMeans(clust_i,:) - clustMeans(clust_j,:));
end
end
ptDist = zeros(numClusters*ptsPerCluster);
for ptIdx = 1:size(allPts, 1)
for ptJdx = 1:size(allPts, 1)
ptDist(ptIdx, ptJdx) =...
norm(allPts_unshuff(ptIdx,:) - allPts_unshuff(ptJdx,:));
end
end
if numDims == 2
figure(10);
plot(allPts_dm(:,1)', allPts_dm(:,2)', '.')
grid on;
hold on;
plot(0, 0, 'r*')
hold off;
end
figure(1); % useful bc weight vectors should be as separate as possible
imagesc(ccDot); colorbar
title('Dot Products of Cluster Means')
xlabel('Cluster')
ylabel('Cluster')
figure(2)
imagesc(ccDist); colorbar
title('Average Distance between Clusters')
xlabel('Cluster')
ylabel('Cluster')
figure(3)
imagesc(ptDist); colorbar
title('Pointwise Distances')
savename = ['genData' num2str(numDims) 'D_' num2str(numClusters)...
'c_' num2str(ptsPerCluster) 'ppc.mat'];
clearvars -except allPts allPts_dm allPts_unshuff...
allPts_dm_unshuff clustIDs clustIDs_unshuff savename
save(savename)
|