@21natzil/

repl scrapper

Python

This is a web scrapper targeted at farming repls. I tried to parse the dates and back into a better format, but that proved to be a challeng

fork
loading
Files
  • main.py
  • results.csv
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import csv
import demiurge


class ReplScrapper(demiurge.Item):
    name = demiurge.TextField("a:nth-child(1) > span:nth-child(2)")
    author = demiurge.TextField("a:nth-child(1) > div:nth-child(3)")
    language = demiurge.TextField("a:nth-child(1) > div:nth-child(4)")
    creation_date = demiurge.TextField("a:nth-child(1) > div:nth-child(5)")

    class Meta:
        selector = "li.jsx-3299880996"
        base_url = "https://repl.it"


repls = ReplScrapper.all("/site/repls")
with open("results.csv", "w") as file:
    writer = csv.DictWriter(file, ["url", "name", "author", "profile", "language", "creation_date"])
    writer.writeheader()
    for repl in repls:
        writer.writerow(
            {
                "url": "https://repl.it/repls/" + repl.name,
                "name": repl.name,
                "author": repl.author[3:],
                "profile": None if repl.author[3:] == "anonymous" else "https://repl.it/@" + repl.author[3:],
                "language": repl.language,
                "creation_date": repl.creation_date
            }
        )