repl.it
@MaxVandervelden/

Medium Scraping

Python

No description

fork
loading
Files
  • main.py
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import re, cgi

def is_good_response(resp):
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)
def log_error(e):
    print(e)
def simple_get(url):
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None
    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None

raw_html = simple_get("https://medium.com/@21mvandervelden/followers")
html = BeautifulSoup(raw_html, 'html.parser')
followers = str(html.find_all(class_='ui-captionStrong'))
tag_re = re.compile('(<!--.*?-->|<[^>]*>)')
no_tags1 = tag_re.sub("",followers)
followers1 = cgi.escape(no_tags1)

c = 0
for item in followers1:
  c += 1

print(c)