@justinholman/

TermProject-ModelTest

Python

No description

fork
loading
Files
  • main.py
  • colorado-profile-county.csv
  • colorado.csv
  • estimates.csv
  • hist1.png
  • hist2.png
  • out.csv
  • scatter1.png
  • scatter2.png
  • veh.csv

This Plugin Crashed!

Error: Error: must not create an existing file {"type":"CREATE_FILE","wid":"0.7014306012767568","path":"main.py","file":{"path":"main.py","content":{"asEncoding":{"base64":"aW1wb3J0IG1hdHBsb3RsaWIgYXMgbXBsCmltcG9ydCBtYXRwbG90bGliLnB5cGxvdCBhcyBwbHQKaW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbnVtcHkgYXMgbnAKaW1wb3J0IHNlYWJvcm4gYXMgc25zCmltcG9ydCBjc3YKbXBsLnVzZSgnQWdnJykKCiMgcmVhZCB2ZWhwb3AgZGF0YQp2ZWhkYXRhID0gcGQucmVhZF9jc3YoJ3ZlaC5jc3YnKQoKIyB0ZXN0IGEgbW9kZWwKYmFjaGMgPSAyLjQwNzQ4ODczMDQzMzYyCnVyYmFuYyA9IDAuMDg0NDc2NDAzMjQ5OTMxOQpyYWNlYyA9IC0wLjMyMzM5MTgwNjc2MDY2Mgp5aW50ZXJjZXB0ID0gNC45MTkxNDQwODE2NDAyNwoKb3V0cHV0ID0gW10KcHJlZCA9IFtdCnZlaGFjdHVhbCA9IFtdCmZvciBpLCByb3cgaW4gdmVoZGF0YS5pdGVycm93cygpIDoKICBiYWNoID0gcm93WydwY3RfYmFjaGVsb3Jzb3JoaWdoZXInXQogIHVyYmFuID0gcm93Wydwb3BwY3RfdXJiYW4nXQogIHJhY2UgPSByb3dbJ3BjdF9yYWNld2hpdGUnXQogIGNvdW50eSA9IHJvd1snY291bnR5J10KICBwb3Blc3QgPSByb3dbJ3BvcGVzdCddCiAgdmVocG9wID0gcm93WydWZWggUG9wJ10KICB5aGF0ID0gcm91bmQoYmFjaCpiYWNoYyArIHVyYmFuKnVyYmFuYyArIHJhY2UqcmFjZWMgKyB5aW50ZXJjZXB0LDEpCiAgYWN0dWFsID0gcm93WydWZWggUGVyIDEwayddCiAgcmVzaWQgPSByb3VuZChhY3R1YWwgLSB5aGF0LDEpCiAgcHJlZHZlaHBvcCA9IHJvdW5kKCh5aGF0LzEwMDAwKSpwb3Blc3QsMCkKICByZXNpZHBvcCA9IHJvdW5kKHZlaHBvcC1wcmVkdmVocG9wLDEpCiAgb3V0cHV0LmFwcGVuZChbY291bnR5LGFjdHVhbCx5aGF0LHJlc2lkLHZlaHBvcCxwcmVkdmVocG9wLHJlc2lkcG9wXSkKCiMgd3JpdGUgb3V0cHV0IHRvIGNzdiBhbmQgY3JlYXRlIHNvbWUgZ3JhcGhpY3MKZGYgPSBwZC5EYXRhRnJhbWUob3V0cHV0KQpteWNvbHVtbnMgPSBbJ2NvdW50eScsJ2FjdHVhbCcsJ3loYXQnLCdyZXNpZCcsJ3ZlaHBvcCcsJ3ByZWR2ZWhwb3AnLCdyZXNpZHBvcCddCmRmLmNvbHVtbnMgPSBteWNvbHVtbnMKZGYudG9fY3N2KCdvdXQuY3N2JykKCnNucy5kaXN0cGxvdChkZlsneWhhdCddKQpwbHQuc2hvdygpCnBsdC5zYXZlZmlnKCdoaXN0MS5wbmcnKQpwbHQuY2xmKCkKCnNucy5kaXN0cGxvdChkZlsncmVzaWQnXSkKcGx0LnNob3coKQpwbHQuc2F2ZWZpZygnaGlzdDIucG5nJykKcGx0LmNsZigpCgpzbnMucmVncGxvdCgnYWN0dWFsJywneWhhdCcsZGF0YT1kZikKcGx0LnNob3coKQpwbHQuc2F2ZWZpZygnc2NhdHRlcjEucG5nJykKcGx0LmNsZigpCgpzbnMucmVncGxvdCgndmVocG9wJywncHJlZHZlaHBvcCcsZGF0YT1kZikKcGx0LnNob3coKQpwbHQuc2F2ZWZpZygnc2NhdHRlcjIucG5nJykKcGx0LmNsZigpCgojIHNvcnQgYW5kIHByaW50IHJlc2lkdWFscwpwcmludChkZi5zb3J0X3ZhbHVlcyhieT0ncmVzaWRwb3AnKSkKCiMgbm93IGNhbGN1bGF0ZSBlc3RpbWF0ZXMgZm9yIENvbG9yYWRvCmNvbG9yYWRvID0gcGQucmVhZF9jc3YoJ2NvbG9yYWRvLmNzdicpCmNvbG9yYWRvX2VzdGltYXRlcyA9IFtdCmZvciBpLCByb3cgaW4gY29sb3JhZG8uaXRlcnJvd3MoKSA6CiAgYmFjaCA9IHJvd1sncGN0X2JhY2hlbG9yc29yaGlnaGVyJ10KICB1cmJhbiA9IHJvd1sncG9wcGN0X3VyYmFuJ10KICByYWNlID0gcm93WydwY3RfcmFjZXdoaXRlJ10KICBjb3VudHkgPSByb3dbJ2NvdW50eSddCiAgcG9wZXN0ID0gcm93Wydwb3Blc3QnXQogIHloYXQgPSByb3VuZChiYWNoKmJhY2hjICsgdXJiYW4qdXJiYW5jICsgcmFjZSpyYWNlYyArIHlpbnRlcmNlcHQsMSkKICBwcmVkdmVocG9wID0gcm91bmQoKHloYXQvMTAwMDApKnBvcGVzdCwwKQogIGNvbG9yYWRvX2VzdGltYXRlcy5hcHBlbmQoW2NvdW50eSxwb3Blc3QseWhhdCxwcmVkdmVocG9wXSkKICAKZGYgPSBwZC5EYXRhRnJhbWUoY29sb3JhZG9fZXN0aW1hdGVzKQpteWNvbHVtbnMgPSBbJ2NvdW50eScsJ3BvcGVzdCcsJ3loYXQnLCdwcmVkdmVocG9wJ10KZGYuY29sdW1ucyA9IG15Y29sdW1ucwpkZi50b19jc3YoJ2VzdGltYXRlcy5jc3YnKQoKcHJpbnQoIm91dHB1dCB3cml0dGVuIHRvIGZpbGUiKQ=="},"asBuffer":null},"loaded":true}}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import csv
mpl.use('Agg')

# read vehpop data
vehdata = pd.read_csv('veh.csv')

# test a model
bachc = 2.40748873043362
urbanc = 0.0844764032499319
racec = -0.323391806760662
yintercept = 4.91914408164027

output = []
pred = []
vehactual = []
for i, row in vehdata.iterrows() :
  bach = row['pct_bachelorsorhigher']
  urban = row['poppct_urban']
  race = row['pct_racewhite']
  county = row['county']
  popest = row['popest']
  vehpop = row['Veh Pop']
  yhat = round(bach*bachc + urban*urbanc + race*racec + yintercept,1)
  actual = row['Veh Per 10k']
  resid = round(actual - yhat,1)
  predvehpop = round((yhat/10000)*popest,0)
  residpop = round(vehpop-predvehpop,1)
  output.append([county,actual,yhat,resid,vehpop,predvehpop,residpop])

# write output to csv and create some graphics
df = pd.DataFrame(output)
mycolumns = ['county','actual','yhat','resid','vehpop','predvehpop','residpop']
df.columns = mycolumns
df.to_csv('out.csv')

sns.distplot(df['yhat'])
plt.show()
plt.savefig('hist1.png')
plt.clf()

sns.distplot(df['resid'])
plt.show()
plt.savefig('hist2.png')
plt.clf()

sns.regplot('actual','yhat',data=df)
plt.show()
plt.savefig('scatter1.png')
plt.clf()

sns.regplot('vehpop','predvehpop',data=df)
plt.show()
plt.savefig('scatter2.png')
plt.clf()

# sort and print residuals
print(df.sort_values(by='residpop'))

# now calculate estimates for Colorado
colorado = pd.read_csv('colorado.csv')
colorado_estimates = []
for i, row in colorado.iterrows() :
  bach = row['pct_bachelorsorhigher']
  urban = row['poppct_urban']
  race = row['pct_racewhite']
  county = row['county']
  popest = row['popest']
  yhat = round(bach*bachc + urban*urbanc + race*racec + yintercept,1)
  predvehpop = round((yhat/10000)*popest,0)
  colorado_estimates.append([county,popest,yhat,predvehpop])
  
df = pd.DataFrame(colorado_estimates)
mycolumns = ['county','popest','yhat','predvehpop']
df.columns = mycolumns
df.to_csv('estimates.csv')

print("output written to file")