repl.it
@notbehind_you/

repl talk question answer finder

Python

Finds the answer to questions on Repl Talk by searching on various sources

fork
loading
Files
  • main.py
  • a.html
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import repltalk
import asyncio
import aiohttp
import urllib.parse
import re
from bs4 import BeautifulSoup

client = repltalk.Client()

def html_to_str(html):
	html = html.replace('−', '-')
	html = html.replace('&lt;', '<')
	html = html.replace('&gt;', '>')
	return html

async def get_results_ddg(query):
	query_escaped = urllib.parse.quote_plus(query)
	async with aiohttp.ClientSession() as s:
		r = await s.get(
			f'https://duckduckgo.com/html/?q={query_escaped}',
			headers={
				'user-agent': 'Mozilla/5.0 (https://repl.it/talk) Firefox/66.0'
			}
		)
		r = await r.text()
		soup = BeautifulSoup(r, 'html.parser')
		results = soup.findAll('div', {'class': 'result__body'})
		for result in results:
			title_html = ''.join(map(str, result.h2.a.contents))
			description_html = ''.join(map(str, result.find('a', {'class': 'result__snippet'}).contents))
			website_url = result.find('a', {'class': 'result__url'})['href']

			title = title_html.replace('<b>', '\033[1m').replace('</b>', '\033[22m')
			description = description_html.replace('<b>', '\033[1m').replace('</b>', '\033[22m')
			yield title, description, website_url


async def tutorialspoint(url):
	async with aiohttp.ClientSession() as s:
		r = await s.get(url)
		html = await r.text()
	ended_part = True
	output = []
	for p in re.findall(r'<(pre|h2|p)([a-zA-Z=" ]*)>((.|\n){1,}?)<\/\1>', html):
		html_tag, content = p[0], p[2]
		content_md = content
		if html_tag != 'pre':
			content_md = content.replace('<b>', '*').replace('</b>', '*')
		content_md = html_to_str(content_md)
		if html_tag == 'h2':
			title = content_md
			if title in {'Syntax', 'Example'}:
				output.append('# ' + title)
				ended_part = False
		elif not ended_part:
			if html_tag == 'p':
				output.append(content_md)
			elif html_tag == 'pre':
				output.append('```\n'+content_md+'\n```')
				ended_part = True
	# soup = BeautifulSoup(html)
	# soup.findAll('h1')
	return '\n'.join(output)

async def quora(url):
	async with aiohttp.ClientSession() as s:
		r = await s.get(url)
		html = await r.text()
	print(url)
	r = re.findall(r'<p class="ui_qtext_para u-ltr u-text-align--start">(.{1,}?)</p>', html)
	print(r[0])

sites = {
	'www.tutorialspoint.com': tutorialspoint,
	'www.quora.com': quora
}

async def main():
	post = await client.get_post(13657)
	query = "why is my pp hard"

	try:
		lang = post.repl.language.display_name
		if lang == 'C++11':
			lang = 'C++'

		if lang.lower() not in query.lower():
			query += ' ' + lang
	except AttributeError:
		pass
	print('Query:', query)

	async for r in get_results_ddg(query):
		title, description, url = r

		short_url = url[url.find('//') + 2:]
		short_url = short_url[:short_url.find('/')]
		if short_url in sites:
			response = await sites[short_url](url)
			print(response)
			break


loop = asyncio.get_event_loop()
loop.run_until_complete(main())
Fetching token
?