repl.it
@K0DYungDeku/

Youtube Strip Element Timed Text

Python

No description

fork
loading
Files
  • main.py
  • 1readText.txt
  • 2writeText.txt
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#this project strips Timed Text on Youtube
import re
import io


text = open('1readText.txt','r') #open file
line2 = ""
final = ""

def humanize_time(secs):
    mins, secs = divmod(secs, 60)
    hours, mins = divmod(mins, 60)
    
    if secs < 3600000:
      return '%02d:%02d' %(mins, secs)
    
    if secs >= 3600000:
      return '%02d:%02d:%02d' % (hours, mins, secs)

#reads and saves text that is stripped halfway
with text as f: 
  while True:
    line = f.readline()
  #------------------This finds the <s> parts I want to delete and formats them
    result = str(re.search("<[s].+[0-9]+.>", line))
    result = result.strip("'>")
    result += ">"
    result = result.strip("<re.Match object; span=(0, ")
    result = result.strip(result[0:12])
  #------------------
    line2 += line.replace(result, "").replace("</s>", "").replace("</p>","")
    if line == '': #break at end of file
     break
text.close()


counter = 0
textLineByLine = io.StringIO(line2)
with textLineByLine as k: 
  while True:
    textLines = k.readline()
    if textLines == '': #break at end of file
      break
    if textLines.startswith("<p t") and counter <= 0: #filled <p......>
      final += "\n"
      int1 = int(re.search("[0-9]{5,}",textLines).group())
      final += humanize_time(int1/1000)
      counter = 1
      final += "\n"
    elif textLines.startswith("<p t") and counter >= 1: #blank double
      counter = 0
      continue
    else:
      final += textLines.replace("\n", "")
      


print(final)


j = open("2writeText.txt","w")
j.write(final)
j.close()


#https://regexr.com/
Fetching token
?