blob: 3b740650146dacdcf3bbec8274ca30a0e7584c9c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
|
import os
# list all tracks
tracks = [fn.replace(".txt", "") for fn in os.listdir("all") if fn.endswith(".txt")]
lyrics = {}
for track in tracks:
f = open(f"all/{track}.txt")
# find every discrete word, deduped and normalized with best effort
lyrics[track] = set(
[
word.strip(",.?!'\"():").replace("'s", "").replace("'d", "").lower()
for word in f.read().split()
]
)
f.close()
rf = open("results", "w")
for track in tracks:
other_tracks = [t for t in tracks if t != track]
for word in lyrics[track]:
# if word does not appear in any other track
if not any([(word in lyrics[o]) for o in other_tracks]):
rf.write(f"{track}\t{word}\n")
rf.close()
|