repl.it
@TonySiu/

web scrapping

Python

No description

fork
loading
Files
  • main.py
  • dealers.csv
  • poetry.lock
  • pyproject.toml
  • requirements.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
from bs4 import BeautifulSoup
import urllib
import csv

states = []
with open('cars.txt') as f:
  states = f.readlines()

final_data = {}
for state in states:
  state_name = state[41:-2]
  print(state_name)
  state_data = {}
  final_data[state_name] = state_data
  page = urllib.request.urlopen(state).read()
  soup = BeautifulSoup(page)
  tables = soup.find_all('table')
  dealers = []
  state_data['dealers'] = dealers
  for table in tables:
    rows = table.find_all('tr')
    for ri in range(3, len(rows)):
      r = rows[ri]
      data = r.find_all('td')
      dealer = {}
      dealer['name'] = data[0].text.strip()
      dealer['city'] = data[1].text.strip()
      if len(data[2].text.strip()):
        dealer['inventory'] = 'https://www.globalautosports.com' + data[2].find_all('a', href=True)[0]['href']
      else:
        dealer['inventory'] = 'N/A'
      dealers.append(dealer)

with open('dealers.csv', 'w') as csvfile:
  writer  = csv.writer(csvfile)
  writer.writerow(['State', 'Dealer', 'City', 'Inventory'])
  for state in final_data.keys():
    for dealer in final_data[state]['dealers']:
      writer.writerow([state, dealer['name'], dealer['city'], dealer['inventory']])