mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-10-22 02:25:05 +00:00
70 lines
2.2 KiB
Plaintext
70 lines
2.2 KiB
Plaintext
|
#!/bin/sh
|
||
|
|
||
|
#
|
||
|
# This script creates a new dictionary by expanding the original,
|
||
|
# Mozilla's, and the upstream dictionary to remove affix flags and
|
||
|
# then doing the wordlist equivalent of diff3 to create a new
|
||
|
# dictionary.
|
||
|
#
|
||
|
# The files 2-mozilla-add and 2-mozilla-rem contain words added and
|
||
|
# removed, receptively in the Mozilla dictionary. The final
|
||
|
# dictionary will be in hunspell-en_US-mozilla.zip.
|
||
|
#
|
||
|
|
||
|
set -e
|
||
|
|
||
|
export LANG=C
|
||
|
export LC_ALL=C
|
||
|
export LC_CTYPE=C
|
||
|
export LC_COLLATE=C
|
||
|
|
||
|
WKDIR="`pwd`"
|
||
|
|
||
|
export SCOWL="$WKDIR/scowl/"
|
||
|
|
||
|
ORIG="$WKDIR/orig/"
|
||
|
SPELLER="$SCOWL/speller"
|
||
|
|
||
|
expand() {
|
||
|
grep -v '^[0-9]\+$' | $SPELLER/munch-list expand $1 | sort -u
|
||
|
}
|
||
|
|
||
|
cd $SPELLER
|
||
|
MK_LIST="../mk-list -v1 --accents=both en_US 60"
|
||
|
cat <<EOF > params.txt
|
||
|
With Input Command: $MK_LIST
|
||
|
EOF
|
||
|
# note: output of make-hunspell-dict is utf-8
|
||
|
$MK_LIST | ./make-hunspell-dict -one en_US-custom params.txt > ./make-hunspell-dict.log
|
||
|
cd $WKDIR
|
||
|
|
||
|
# Note: Input and output of "expand" is always iso-8859-1.
|
||
|
# All expanded word list files are thus in iso-8859-1.
|
||
|
|
||
|
expand $SPELLER/en.aff < $SPELLER/en.dic.supp > 0-special # input: ASCII
|
||
|
|
||
|
# input in utf-8, expand expects iso-8859-1 so use iconv
|
||
|
iconv -f utf-8 -t iso-8859-1 $ORIG/en_US-custom.dic | expand $SPELLER/en_US-custom.aff > 1-base.txt
|
||
|
|
||
|
expand ../en-US.aff < ../en-US.dic > 2-mozilla.txt # input: iso-8850-1
|
||
|
|
||
|
# input in utf-8, expand expects iso-8859-1 so use iconv
|
||
|
iconv -f utf-8 -t iso-8859-1 $SPELLER/en_US-custom.dic | expand $SPELLER/en_US-custom.aff > 3-upstream.txt
|
||
|
|
||
|
comm -23 1-base.txt 2-mozilla.txt > 2-mozilla-rem
|
||
|
comm -13 1-base.txt 2-mozilla.txt > 2-mozilla-add
|
||
|
comm -23 3-upstream.txt 2-mozilla-rem | cat - 2-mozilla-add | sort -u > 4-patched.txt
|
||
|
|
||
|
# note: output of make-hunspell-dict is utf-8
|
||
|
cat 4-patched.txt | comm -23 - 0-special | $SPELLER/make-hunspell-dict -one en_US-mozilla /dev/null
|
||
|
|
||
|
# sanity check should yield identical results
|
||
|
#comm -23 1-base.txt 3-upstream.txt > 3-upstream-rem
|
||
|
#comm -13 1-base.txt 3-upstream.txt > 3-upstream-add
|
||
|
#comm -23 2-mozilla.txt 3-upstream-rem | cat - 3-upstream-add | sort -u > 4-patched-v2.txt
|
||
|
|
||
|
expand ../en-US.aff < mozilla-specific.txt > 5-mozilla-specific
|
||
|
|
||
|
comm -12 3-upstream.txt 2-mozilla-rem > 5-mozilla-removed
|
||
|
comm -13 3-upstream.txt 2-mozilla-add > 5-mozilla-added
|