1 month ago · 9effb2ef38
--- a/code/classification_analysis.py
+++ b/code/classification_analysis.py
@@ -6,31 +6,31 @@ from sklearn.neighbors import KNeighborsClassifier
 
				 from sklearn.metrics import classification_report
			
 
				 
			
 
				 parser = argparse.ArgumentParser(description="Analyze iris data")
			
 
				-parser.add_argument('data', help="Input data (CSV) to process")
			
 
				-parser.add_argument('output_figure', help="Output figure path")
			
 
				-parser.add_argument('output_report', help="Output report path")
			
 
				+parser.add_argument("data", help="Input data (CSV) to process")
			
 
				+parser.add_argument("output_figure", help="Output figure path")
			
 
				+parser.add_argument("output_report", help="Output report path")
			
 
				 args = parser.parse_args()
			
 
				 
			
 
				 # prepare the data as a pandas dataframe
			
 
				 df = pd.read_csv(args.data)
			
 
				-attributes = ["sepal_length", "sepal_width", "petal_length","petal_width", "class"]
			
 
				+attributes = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
			
 
				 df.columns = attributes
			
 
				 
			
 
				 # create a pairplot to plot pairwise relationships in the dataset
			
 
				-plot = sns.pairplot(df, hue='class', palette='muted')
			
 
				+plot = sns.pairplot(df, hue="class", palette="muted")
			
 
				 plot.savefig(args.output_figure)
			
 
				 
			
 
				 # perform a K-nearest-neighbours classification with scikit-learn
			
 
				 
			
 
				 # Step 1: split data in test and training dataset (20:80)
			
 
				 array = df.values
			
 
				-X = array[:,0:4]
			
 
				-Y = array[:,4]
			
 
				+X = array[:, 0:4]
			
 
				+Y = array[:, 4]
			
 
				 test_size = 0.20
			
 
				 seed = 7
			
 
				-X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y,
			
 
				-                                                                    test_size=test_size,
			
 
				-                                                                    random_state=seed)
			
 
				+X_train, X_test, Y_train, Y_test = model_selection.train_test_split(
			
 
				+    X, Y, test_size=test_size, random_state=seed
			
 
				+)
			
 
				 
			
 
				 # Step 2: Fit the model and make predictions on the test dataset
			
 
				 knn = KNeighborsClassifier()