repl.it
@justinholman/

NY-HondaAccord

Python

No description

fork
loading
Files
  • main.py
  • hist1.png
  • honda-accord.csv
  • swarm1.png
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
#import numpy as np
import seaborn as sns
mpl.use('Agg')

# read vehpop data
vehdata = pd.read_csv('honda-accord.csv')
vehpop = vehdata[['vpop']]

# generate descriptive summary statistics
vehmed = round(vehpop.median(),1)
strvehmed = str(vehmed.iloc[0])
vehmean = round(vehpop.mean(),1)
strvehmean = str(vehmean.iloc[0])
vehstd = round(vehpop.std(),1)
strvehstd = str(vehstd.iloc[0])
vehmin = round(vehpop.min())
strvehmin = str(vehmin.iloc[0])
vehmax = round(vehpop.max())
strvehmax = str(vehmax.iloc[0])
vehsum = round(vehpop.sum())
strvehsum = str(vehsum.iloc[0])

# generate a histogram, along with summary statistics
#sns.distplot(vehpop,kde=False) 
sns.distplot(vehpop,kde=False, rug=True)
plt.title('Honda Accord VIO by County in New York State')
plt.text(22000, 38, "median: ")
plt.text(30700, 38, strvehmed)
plt.text(22000, 36, "mean: ")
plt.text(30000, 36, strvehmean)
plt.text(22000, 34, "std dev: ")
plt.text(30000, 34, strvehstd)
plt.text(22000, 31, "min: ")
plt.text(30000, 31, strvehmin)
plt.text(22000, 29, "max: ")
plt.text(30000, 29, strvehmax)
plt.text(22000, 27, "sum: ")
plt.text(30000, 27, strvehsum)
plt.xlabel('VIO')
plt.ylabel('Number of Counties')
plt.show()
plt.savefig('hist1.png')
plt.clf()

# swarmplot of county vio
sns.swarmplot(x='vpop', data=vehdata)
plt.title('Honda Accord VIO by County in New York State')
plt.xlabel('VIO')
plt.show()
plt.savefig('swarm1.png')
plt.clf()