Switch to wikipedia words
This commit is contained in:
parent
85180cd926
commit
55b61ad858
680
blacklist.txt
680
blacklist.txt
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
|||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
set -e
|
set -e
|
||||||
word_file=deu_news_2021_100K-words.txt
|
word_file=deu_wikipedia_2021_1M-words.txt
|
||||||
cat $word_file | head -n15000 | cut -f2 > top10000de_utf8.txt
|
cat $word_file | head -n35000 | cut -f2 > top10000de_utf8.txt
|
||||||
awk '{ if (length($0) == 5) print tolower($0) }' top10000de_utf8.txt > top10000de_utf8_len5.txt
|
awk '{ if (length($0) == 5) print tolower($0) }' top10000de_utf8.txt > top10000de_utf8_len5.txt
|
||||||
cat top10000de_utf8_len5.txt | rg "^([A-Za-z]|ä|ö|ü|Ä|Ö|Ü)+\$" | sort | uniq > top10000de_utf8_len5_filtered.txt
|
cat top10000de_utf8_len5.txt | rg "^([A-Za-z]|ä|ö|ü|Ä|Ö|Ü)+\$" | sort | uniq > top10000de_utf8_len5_filtered.txt
|
||||||
comm -23 top10000de_utf8_len5_filtered.txt blacklist.txt > valid_words.txt
|
comm -23 top10000de_utf8_len5_filtered.txt blacklist.txt > valid_words.txt
|
||||||
|
6404
valid_guesses.txt
6404
valid_guesses.txt
File diff suppressed because it is too large
Load Diff
600
valid_words.txt
600
valid_words.txt
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user