123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- % LoadArff.m
- %
- % Thi funciton loads data from an ARFF file and returns the data, metadata,
- % attributes, relation and comments. All returned strings are lower case.
- %
- % input:
- % arffFile - path to ARFF file to read
- %
- % output:
- % data - data stored in the ARFF file
- % metadata - structure holding metadta in the form: metadata.{width_px, height_px, width_mm, height_mm, distance_mm} -1 if not available. Extra metadata are stored in metadata.extra, which is an nx2 cell array holding name-value pairs
- % attributes - nx2 cell array with attribute names and types, where n is the number of attributes
- % relation - relation described in ARFF
- % comments - nx1 cell array containing one comment line per cell
- function [data, metadata, attributes, relation, comments] = LoadArff(arffFile)
- % initialize data
- data = [];
- % initialize metadata
- metadata.width_px = -1;
- metadata.height_px = -1;
- metadata.width_mm = -1;
- metadata.height_mm = -1;
- metadata.distance_mm = -1;
- metadata.extra = {};
- attributes = {};
- relation = '';
- comments = {};
- % nominal attribute handling
- nomMat = logical([]);
- nomMaps = {};
- % read header
- numOfHeaderLines = 1;
- fid = fopen(arffFile, 'r');
- fline = fgetl(fid);
- while (ischar(fline))
- % split lines into words
- words = strsplit(fline,' ');
- % check for relation
- if (size(words,2)>1 && strcmpi(words{1,1},'@relation')==1)
- relation = lower(words{1,2});
- % check for width_px
- elseif (size(words,2)>2 && strcmpi(words{1,1},'%@metadata')==1 && strcmpi(words{1,2},'width_px')==1)
- metadata.width_px = str2num(words{1,3});
- % check for height_px
- elseif (size(words,2)>2 && strcmpi(words{1,1},'%@metadata')==1 && strcmpi(words{1,2},'height_px')==1)
- metadata.height_px = str2num(words{1,3});
- % check for width_mm
- elseif (size(words,2)>2 && strcmpi(words{1,1},'%@metadata')==1 && strcmpi(words{1,2},'width_mm')==1)
- metadata.width_mm = str2num(words{1,3});
- % check for height_mm
- elseif (size(words,2)>2 && strcmpi(words{1,1},'%@metadata')==1 && strcmpi(words{1,2},'height_mm')==1)
- metadata.height_mm = str2num(words{1,3});
- % check for distance_mm
- elseif (size(words,2)>2 && strcmpi(words{1,1},'%@metadata')==1 && strcmpi(words{1,2},'distance_mm')==1)
- metadata.distance_mm = str2num(words{1,3});
- % process the rest of the metadata
- elseif (size(words,2)>2 && strcmpi(words{1,1},'%@metadata')==1)
- pos = size(metadata.extra,1)+1;
- metadata.extra{pos,1} = words{1,2};
- metadata.extra{pos,2} = words{1,3};
- % check for attributes
- elseif (size(words,2)>2 && strcmpi(words{1,1},'@attribute')==1)
- index = size(attributes,1)+1;
- attributes{index,1} = lower(words{1,2});
- attributes{index,2} = words{1,3};
- [isNom, nominalMap] = IsNomAttribute(fline);
- nomMat = [nomMat; isNom];
- if (isNom)
- nomMaps = [nomMaps; {nominalMap}];
- attributes{index,2} = GetNomAttValue(fline);
- else
- nomMaps = [nomMaps; {[]}];
- end
- % check if it is a comment
- elseif (length(fline>0) && fline(1) == '%')
- comments{end+1} = fline;
- % check if data has been reached
- elseif (size(words,2)>0 && strcmpi(words{1,1},'@data')==1)
- break;
- end
- fline = fgetl(fid);
- numOfHeaderLines = numOfHeaderLines+1;
- end
- numAtts = size(attributes,1);
- readFormat = '';
- for ind=1:numAtts
- if (nomMat(ind))
- readFormat = [readFormat '%s '];
- else
- readFormat = [readFormat '%f '];
- end
- end
- lines = textscan(fid, readFormat, 'Delimiter', ',');
- nomIndices = find(nomMat);
- for nomInd=nomIndices'
- if (isempty(nomInd))
- break;
- end
- for ind=1:size(lines{1,nomInd},1)
- lines{1,nomInd}{ind} = nomMaps{nomInd,1}(lines{1,nomInd}{ind});
- end
- lines{1,nomInd} = cell2mat(lines{1,nomInd});
- end
- data = cell2mat(lines);
- fclose(fid);
- end
|