1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
|
import os
import re
tracks = [fn.replace(".txt", "") for fn in os.listdir("all")]
lyrics = {}
for track in tracks:
f = open(f"all/{track}.txt")
lyrics[track] = set(
[
word.strip(",.?!'\"():").replace("'s", "").replace("'d", "").lower()
for word in f.read().split()
]
)
f.close()
rf = open("list")
for line in rf.read().splitlines():
track, word = tuple(line.split("\t"))
other_tracks = [t for t in tracks if t != track]
# there might be false positives, e.g. even <-> evening, to <-> toes
variations = [
word + "s",
word + "es",
word + "d",
word + "ed",
word + "ing",
re.sub("s$", "", word),
re.sub("es$", "", word),
re.sub("d$", "", word),
re.sub("ed$", "", word),
re.sub("ing$", "", word),
]
dupe = False
for other_track in other_tracks:
if dupe:
break
for var in variations:
if var in lyrics[other_track]:
print(f"{word} ({track}), {var} ({other_track})")
dupe = True
break
rf.close()
|