@justinholman/

TermProject-VehCorrelations

Python

No description

fork
loading
Files
  • main.py
  • cmx.csv
  • hist1.png
  • scatter1.png
  • veh.csv
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
mpl.use('Agg')

# read vehpop data
vehdata = pd.read_csv('veh.csv')

# histogram for vehper10k
vehper10k = vehdata['Veh Per 10k']
sns.distplot(vehper10k)
plt.xlabel('Veh per 10k')
plt.ylabel('% of Counties')
plt.show()
plt.savefig('hist1.png')
plt.clf()

# extract num vars, gen corr matrix, write to csv
nvars = vehdata.iloc[:,2:]
cmx = nvars.corr(method='pearson')
cmx.to_csv('cmx.csv')

# make a scatter plot
sns.regplot('popest','Veh Pop',data=vehdata)
plt.show()
plt.savefig('scatter1.png')
plt.clf()

# test a model
pct_bachelororhigher = vehdata['pct_bachelorsorhigher']
poppct_urban = vehdata['poppct_urban']
pct_racewhite = vehdata['pct_racewhite']
countyid = vehdata['county']
bachc = 2.40748873043362
urbanc = 0.0844764032499319
racec = -0.323391806760662
yintercept = 4.91914408164027


#for i, county in enumerate(vehdata) :
for i in range(len(vehdata)) :
  print(i)
  temp = vehdata[i,:]
  print(temp)
  #print(county)
  #bach = vehdata.loc['pct_bachelorsorhigher']
  
  #print(bach)
  #urban = poppct_urban[i]
  #race = pct_racewhite[i]
  #yhat = bach*bachc + urban*urbanc + race*racec + yintercept
  #print(countyid, yhat)
  i = i + 1