@MaxVandervelden/

Medium Scraping

Python

No description

fork
loading
Files
  • main.py

This Plugin Crashed!

Error: Error: must not create an existing file {"type":"CREATE_FILE","wid":"0.14359534000386454","path":"main.py","file":{"path":"main.py","content":{"asEncoding":{"base64":"ZnJvbSByZXF1ZXN0cyBpbXBvcnQgZ2V0CmZyb20gcmVxdWVzdHMuZXhjZXB0aW9ucyBpbXBvcnQgUmVxdWVzdEV4Y2VwdGlvbgpmcm9tIGNvbnRleHRsaWIgaW1wb3J0IGNsb3NpbmcKZnJvbSBiczQgaW1wb3J0IEJlYXV0aWZ1bFNvdXAKaW1wb3J0IHJlLCBjZ2kKCmRlZiBpc19nb29kX3Jlc3BvbnNlKHJlc3ApOgogICAgY29udGVudF90eXBlID0gcmVzcC5oZWFkZXJzWydDb250ZW50LVR5cGUnXS5sb3dlcigpCiAgICByZXR1cm4gKHJlc3Auc3RhdHVzX2NvZGUgPT0gMjAwIAogICAgICAgICAgICBhbmQgY29udGVudF90eXBlIGlzIG5vdCBOb25lIAogICAgICAgICAgICBhbmQgY29udGVudF90eXBlLmZpbmQoJ2h0bWwnKSA+IC0xKQpkZWYgbG9nX2Vycm9yKGUpOgogICAgcHJpbnQoZSkKZGVmIHNpbXBsZV9nZXQodXJsKToKICAgIHRyeToKICAgICAgICB3aXRoIGNsb3NpbmcoZ2V0KHVybCwgc3RyZWFtPVRydWUpKSBhcyByZXNwOgogICAgICAgICAgICBpZiBpc19nb29kX3Jlc3BvbnNlKHJlc3ApOgogICAgICAgICAgICAgICAgcmV0dXJuIHJlc3AuY29udGVudAogICAgICAgICAgICBlbHNlOgogICAgICAgICAgICAgICAgcmV0dXJuIE5vbmUKICAgIGV4Y2VwdCBSZXF1ZXN0RXhjZXB0aW9uIGFzIGU6CiAgICAgICAgbG9nX2Vycm9yKCdFcnJvciBkdXJpbmcgcmVxdWVzdHMgdG8gezB9IDogezF9Jy5mb3JtYXQodXJsLCBzdHIoZSkpKQogICAgICAgIHJldHVybiBOb25lCgpyYXdfaHRtbCA9IHNpbXBsZV9nZXQoImh0dHBzOi8vbWVkaXVtLmNvbS9AMjFtdmFuZGVydmVsZGVuL2ZvbGxvd2VycyIpCmh0bWwgPSBCZWF1dGlmdWxTb3VwKHJhd19odG1sLCAnaHRtbC5wYXJzZXInKQpmb2xsb3dlcnMgPSBzdHIoaHRtbC5maW5kX2FsbChjbGFzc189J3VpLWNhcHRpb25TdHJvbmcnKSkKdGFnX3JlID0gcmUuY29tcGlsZSgnKDwhLS0uKj8tLT58PFtePl0qPiknKQpub190YWdzMSA9IHRhZ19yZS5zdWIoIiIsZm9sbG93ZXJzKQpmb2xsb3dlcnMxID0gY2dpLmVzY2FwZShub190YWdzMSkKCmMgPSAwCmZvciBpdGVtIGluIGZvbGxvd2VyczE6CiAgYyArPSAxCgpwcmludChjKQoK"},"asBuffer":null},"loaded":true}}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
import re, cgi

def is_good_response(resp):
    content_type = resp.headers['Content-Type'].lower()
    return (resp.status_code == 200 
            and content_type is not None 
            and content_type.find('html') > -1)
def log_error(e):
    print(e)
def simple_get(url):
    try:
        with closing(get(url, stream=True)) as resp:
            if is_good_response(resp):
                return resp.content
            else:
                return None
    except RequestException as e:
        log_error('Error during requests to {0} : {1}'.format(url, str(e)))
        return None

raw_html = simple_get("https://medium.com/@21mvandervelden/followers")
html = BeautifulSoup(raw_html, 'html.parser')
followers = str(html.find_all(class_='ui-captionStrong'))
tag_re = re.compile('(<!--.*?-->|<[^>]*>)')
no_tags1 = tag_re.sub("",followers)
followers1 = cgi.escape(no_tags1)

c = 0
for item in followers1:
  c += 1

print(c)