repl.it
@Caoyq1992/

Machine Learning

Python

No description

fork
loading
Files
  • main.py
  • graph.png
  • iris.csv
  • Packager files
  • poetry.lock
  • pyproject.toml
  • requirements.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# Load libraries
import pandas

from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
from sklearn import model_selection
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

mpl.use('Agg')
if os.path.exists('graph.png'):
  os.remove('graph.png')

filename = "iris.csv"

names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pandas.read_csv(filename, names=names)

# # Shape
# print(dataset.shape)

# # Head
# print(dataset.head(20))

# # Descriptions
# print(dataset.describe())

# print(dataset.groupby('class').size())

# box and whisker plots
# dataset.plot(kind='box',subplots=True,layout=(2,2),sharex=False,sharey=False)
# plt.show()
# # the following is just for our online python interpreter
# plt.savefig('graph.png')

# box and whisker plots
# dataset.hist()
# plt.show()
# # the following is just for our online python interpreter
# plt.savefig('graph.png')

# scatter plot matrix
# scatter_matrix(dataset)
# plt.show()
# # the following is just for our online python interpreter
# plt.savefig('graph.png')

# Split-out validation dataset
array = dataset.values
X = array[:,0:4]
Y = array[:,4]
validation_size = 0.20
seed = 7

X_train,X_validation,Y_train,Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size,random_state=seed)

# Test options and evaluation metric
seed = 7
scoring = 'accuracy'

# Spot Check Algorithms
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))

print(models)

# evaluate each model in turn
results = []
names = []
for name, model in models:
    kfold = model_selection.KFold(n_splits=10, random_state=seed)
    cv_results = model_selection.cross_val_score(model,X_train,Y_train,cv=kfold,scoring=scoring)
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

# # # Compare Algorithms
# # fig = plt.figure()
# # fig.suptitle('Algorithm Comparison')
# # ax = fig.add_subplot(111)
# # plt.boxplot(results)
# # ax.set_xticklabels(names)
# # plt.show()
# # # the following is just for our online python interpreter
# # plt.savefig('graph.png')

# # Make predictions on validation dataset
# knn = KNeighborsClassifier()
# knn.fit(X_train, Y_train)
# predictions = knn.predict(X_validation)
# print(accuracy_score(Y_validation, predictions))
# print(confusion_matrix(Y_validation, predictions))
# print(classification_report(Y_validation, predictions))