repl.it
@justinholman/

SeabornHistograms

Python

No description

fork
loading
Files
  • main.py
  • 2008election.csv
  • blank
  • foursquare.png
  • hist1.png
  • hist2.png
  • hist3.png
  • hist4.png
  • hist5.png
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# first we import matplotlib and use('Agg') so we can "savefig" below
import matplotlib as mpl
mpl.use('Agg')

# now lets import the other modules we need
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm

# with all our modules in place, let's read csv data
e08 = pd.read_csv('2008election.csv')
dvote = e08[['dem_share']]

# now let's use Seaborn to make some graphics
#sns.set(style="white") - this changes background from grey to white
sns.set() 

# 1. Histogram of % votes for Obama in 2008
sns.distplot(dvote, kde=False)
plt.xlabel('% Obama Vote in 2008')
plt.ylabel('Number of US Counties')
plt.show()
plt.savefig('hist1.png')
plt.clf()

# 2. Same as 1 but with a cool density curve, you know, just to show off
# note the change in the y-axis
sns.distplot(dvote)
plt.xlabel('% Obama Vote in 2008')
plt.ylabel('% of US Counties')
plt.show()
plt.savefig('hist2.png')
plt.clf()

# 3. Now let's just plot the density curve without the vertical bars
sns.distplot(dvote, hist=False)
plt.xlabel('% Obama Vote in 2008')
plt.ylabel('% of US Counties')
plt.show()
plt.savefig('hist3.png')
plt.clf()

# 4 You can add a "rug" to the density curve, like a toupe^ on a bald head
# it works better with fewer observations
sns.distplot(dvote, hist=False, rug=True)
plt.xlabel('% Obama Vote in 2008')
plt.ylabel('% of US Counties')
plt.show()
plt.savefig('hist4.png')
plt.clf()

# 5. Check this out - you can overlay the normal distribution curve
# to impress a date, call this the "maximum likelihood gaussian distribution fit"
# here I'll also change the color of the histogram so you can see how to do that
sns.distplot(dvote, fit=norm, kde=False, color='g')
plt.xlabel('% Obama Vote in 2008')
plt.ylabel('% of US Counties')
plt.show()
plt.savefig('hist5.png')
plt.clf()

# Here I'll try to make a matrix of plots
# I'm pretty much just copying this: https://seaborn.pydata.org/examples/distplot_options.html
f, axes = plt.subplots(2,2)
sns.distplot(dvote, color="b", ax=axes[0,0])
sns.distplot(dvote, hist=False, rug=True, color="r", ax=axes[0,1])
sns.distplot(dvote, hist=False, color="g", kde_kws={"shade": True}, ax=axes[1,0])
sns.distplot(dvote, color="m", ax=axes[1,1])
plt.setp(axes, yticks=[])
plt.tight_layout()
plt.show()
plt.savefig('foursquare.png')
plt.clf()