@justinholman/

TermProject-EDA_VehPop

Python

No description

fork
loading
Files
  • main.py
  • box1.png
  • hist1.png
  • strip1.png
  • swarm1.png
  • vehpop.csv
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
mpl.use('Agg')

# read vehpop data
vehdata = pd.read_csv('vehpop.csv')
vehpop = vehdata[['Veh']]
#print(vehpop)

# generate descriptive summary statistics
vehmean = np.mean(vehpop)
vehstd = np.std(vehpop)
vehmed = np.median(vehpop)
vehlow = np.percentile(vehpop,2.5)
vehhigh = np.percentile(vehpop,97.5)

print("mean: " + str(vehmean))
print("std dev: " + str(vehstd))
print("median: " + str(vehmed))
print("95th percentile lower bound: " + str(vehlow))
print("95th percentile upper bound: " + str(vehhigh))

# generate a histogram
sns.distplot(vehpop)
plt.xlabel('Vehicle Population')
plt.ylabel('% of Counties')
plt.show()
plt.savefig('hist1.png')
plt.clf()

# boxplots by state
sns.boxplot(x='State',y='Veh', data=vehdata)
plt.xlabel('States')
plt.ylabel('Vehicle Population by County')
plt.show()
plt.savefig('box1.png')
plt.clf()

# stripplots by state
sns.stripplot(x='State',y='Veh', data=vehdata, jitter=True)
plt.xlabel('States')
plt.ylabel('Vehicle Population by County')
plt.show()
plt.savefig('strip1.png')
plt.clf()

# swarmplots by state
sns.swarmplot(x='State',y='Veh', data=vehdata)
plt.xlabel('States')
plt.ylabel('Vehicle Population by County')
plt.show()
plt.savefig('swarm1.png')
plt.clf()