|
@@ -6,31 +6,31 @@ from sklearn.neighbors import KNeighborsClassifier
|
|
from sklearn.metrics import classification_report
|
|
from sklearn.metrics import classification_report
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="Analyze iris data")
|
|
parser = argparse.ArgumentParser(description="Analyze iris data")
|
|
-parser.add_argument('data', help="Input data (CSV) to process")
|
|
|
|
-parser.add_argument('output_figure', help="Output figure path")
|
|
|
|
-parser.add_argument('output_report', help="Output report path")
|
|
|
|
|
|
+parser.add_argument("data", help="Input data (CSV) to process")
|
|
|
|
+parser.add_argument("output_figure", help="Output figure path")
|
|
|
|
+parser.add_argument("output_report", help="Output report path")
|
|
args = parser.parse_args()
|
|
args = parser.parse_args()
|
|
|
|
|
|
# prepare the data as a pandas dataframe
|
|
# prepare the data as a pandas dataframe
|
|
df = pd.read_csv(args.data)
|
|
df = pd.read_csv(args.data)
|
|
-attributes = ["sepal_length", "sepal_width", "petal_length","petal_width", "class"]
|
|
|
|
|
|
+attributes = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
|
|
df.columns = attributes
|
|
df.columns = attributes
|
|
|
|
|
|
# create a pairplot to plot pairwise relationships in the dataset
|
|
# create a pairplot to plot pairwise relationships in the dataset
|
|
-plot = sns.pairplot(df, hue='class', palette='muted')
|
|
|
|
|
|
+plot = sns.pairplot(df, hue="class", palette="muted")
|
|
plot.savefig(args.output_figure)
|
|
plot.savefig(args.output_figure)
|
|
|
|
|
|
# perform a K-nearest-neighbours classification with scikit-learn
|
|
# perform a K-nearest-neighbours classification with scikit-learn
|
|
|
|
|
|
# Step 1: split data in test and training dataset (20:80)
|
|
# Step 1: split data in test and training dataset (20:80)
|
|
array = df.values
|
|
array = df.values
|
|
-X = array[:,0:4]
|
|
|
|
-Y = array[:,4]
|
|
|
|
|
|
+X = array[:, 0:4]
|
|
|
|
+Y = array[:, 4]
|
|
test_size = 0.20
|
|
test_size = 0.20
|
|
seed = 7
|
|
seed = 7
|
|
-X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y,
|
|
|
|
- test_size=test_size,
|
|
|
|
- random_state=seed)
|
|
|
|
|
|
+X_train, X_test, Y_train, Y_test = model_selection.train_test_split(
|
|
|
|
+ X, Y, test_size=test_size, random_state=seed
|
|
|
|
+)
|
|
|
|
|
|
# Step 2: Fit the model and make predictions on the test dataset
|
|
# Step 2: Fit the model and make predictions on the test dataset
|
|
knn = KNeighborsClassifier()
|
|
knn = KNeighborsClassifier()
|