123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- % Studyforrest2Arff.m
- %
- % This function converts gaze data from studyforrest file format to ARFF. The
- % input comprises from a gaze file and a frames timing file
- %
- % input:
- % eventsFile - file containing events (onset, duration, frameidx, videotime, audiotime, lasttrigger)
- % gazeFile - file containing gaze (x, y, pupil dilation(units?), frameId reference)
- % metadata - metadata in the form accepted from SaveArff.m
- % outputFile - (optional) name of ARFF. If it is not used the data is stored in the same directory as events with .arff extension
- function Studyforrest2Arff(eventsFile, gazeFile, metadata, outputFile)
- if (nargin < 4)
- [dir, file, ext] = fileparts(eventsFile);
- if (length(dir) > 0)
- dir = [dir '/'];
- end
- ind = findstr(file, '_events');
- outputFile = [dir file(1:ind(end)-1) '.arff'];
- end
- frames = importdata(eventsFile, '\t');
- frames = frames.data;
- % check for missing frames
- %{
- frame_step = frames(2:end,3) - frames(1:end-1,3);
- if (size(find(frame_step>1),1) > 0)
- warning(['Missing frames in ' eventsFile '. No ARFF file is written']);
- return;
- end
- %}
- gaze = importdata(gazeFile, '\t');
- if (IsOctave())
- % at points where confidence is 0 we have 2 entries in the .tsv file.
- % This translates to 2 columns from importdata with data. The last 2
- % columns are zeros. Below we add the frame numbering to the last
- % column. Also when the 3rd column is 0 the confidence is 0 too
- gaze(gaze(:,3)==0,4) = gaze(gaze(:,3)==0,2);
- gaze(gaze(:,3)==0,2) = 0; % remove frameId values
- else
- % in matlab the first two columns are NaN
- gaze(isnan(gaze)) = 0;
- end
- % get gaze points only until the last frame
- if (gaze(end,4) > frames(end,3))
- warning(['Gaze entries point to more frames than they actually exist in ' eventsFile]);
- gaze = gaze(gaze(:,4)<=frames(end,3),:);
- end
-
- % allocate and assign arff values
- arffData = zeros(size(gaze,1),5); % (time, x, y, confidence, frame_id)
- arffData(:,2) = gaze(:,1); % x
- arffData(:,3) = gaze(:,2); % y
- arffData(:,4) = 1.0; % confidence
- arffData(gaze(:,3)==0,4) = 0.0;
- arffData(:,5) = gaze(:,4); % frame id
- % calculate timestamps
- % get gaze count for each frame
- gazeCount = histc(gaze(:,4), frames(:,3));
- prevFrameId = -1;
- gazeCounter = 0;
- gazeStep = 0;
- for i=1:size(arffData,1)
- frameId = gaze(i,4);
- if (frameId ~= prevFrameId)
- gazeCounter = 0;
- gazeStep = frames(frameId,2)/gazeCount(frameId);
- end
- arffData(i,1) = frames(frameId,1) + gazeCounter*gazeStep;
- % make sure timestamps are monotonous. i.e. we don't move past next frame's time
- if (frameId<size(frames,1) && arffData(i,1) > frames(frameId+1,1))
- arffData(i,1) = frames(frameId+1,1);
- end
- % convert to microseconds
- arffData(i,1) = arffData(i,1)*1000000;
- gazeCounter = gazeCounter + 1;
- prevFrameId = frameId;
- end
- arffData(:,1) = floor(arffData(:,1)); % round to int
- % if there is a difference in the amount of frames referenced in gazeFile
- % and those in eventsFile then we get inf and nan values. The solution is to
- % keep entries just before those observations appear. (The reason for the frame
- % difference is not clear)
- indInf = find(isinf(arffData(:,1)));
- indNan = find(isnan(arffData(:,1)));
- indTot = min([indInf; indNan]);
-
- if (size(indTot,1)>0)
- arffData = arffData(1:indTot-1,:);
- warning(['Missing frames in ' eventsFile '. Written timestamps might be erroneous']);
- end
- % save data
- relation = 'studyforrest_gaze';
- attributes = {'time', 'INTEGER';
- 'x', 'NUMERIC';
- 'y', 'NUMERIC';
- 'confidence', 'NUMERIC';
- 'frame_id', 'INTEGER'};
- %SaveArff(outputFile, arffData, metadata, attributes, relation);
- % write to file directly to speed up the process. 7 times faster based on measurements
- % start writing
- fid = fopen(outputFile, 'w+');
- % write relation
- fprintf(fid, '@RELATION %s\n\n', relation);
- % write metadata
- fprintf(fid, '%%@METADATA width_px %d\n', metadata.width_px);
- fprintf(fid, '%%@METADATA height_px %d\n', metadata.height_px);
- fprintf(fid, '%%@METADATA width_mm %.2f\n', metadata.width_mm);
- fprintf(fid, '%%@METADATA height_mm %.2f\n', metadata.height_mm);
- fprintf(fid, '%%@METADATA distance_mm %.2f\n\n', metadata.distance_mm);
- % write metadata extras. Those are data that vary between experiments
- for i=1:size(metadata.extra,1)
- fprintf(fid, '%%@METADATA %s %s\n', metadata.extra{i,1}, metadata.extra{i,2});
- end
- % print an empty line
- fprintf(fid, '\n');
- for i=1:size(attributes,1)
- fprintf(fid, '@ATTRIBUTE %s %s\n', attributes{i,1}, upper(attributes{i,2}));
- end
- % write data keyword
- fprintf(fid,'\n@DATA\n');
- % write actual data
- %for i=1:size(arffData,1)
- % fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData(i,:));
- %end
- fprintf(fid, '%d,%.2f,%.2f,%.2f,%d\n', arffData'); % simpler and faster
- % close file
- fclose(fid);
- end
|