loading
open in
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
#load file
quran = pd.read_csv('quran-simple-clean.txt', sep="|", header=None)
#drop end of file information
quran.drop(quran.index[6236:], inplace=True)
#rename columns
quran.columns = ['sura_no', 'aya_no', 'text']
#change dtypes of some columns to integer
quran['sura_no'] = pd.to_numeric(quran['sura_no'], downcast = 'integer')
quran['aya_no'] = pd.to_numeric(quran['aya_no'], downcast = 'integer')

#this function find unique words, 
# the 'neg' parameters find the words not in this sura
def unique_words(sura, neg=0):
    if neg==0:
        selection = quran[quran['sura_no']==sura].text.str.split().tolist()
    else:
        selection = quran[quran['sura_no']!=sura].text.str.split().tolist()
    flat_list = [item for aya in selection for item in aya]
    return set(flat_list)

#this function uses the above one to find words only used in this surah
def unique(sura):
    return (sorted(list(set(unique_words(sura))-set(unique_words(sura,1)))))

s = input("Enter Sura No.: ")
out = unique(int(s))
out = ','.join(out)
print(out)