Spell Corrector in AliceML
It seems that writing a spell correcor is becoming my new post hello world program, so well, after having this in my hard disk for a while I thought I would publish my new solution, wrote using AliceML.
I'm really not good at ML, so this code must not be considered a good example of how to code, but I thing it's a good thing to keep track of my learning :)
import structure MkRedBlackSet from "x-alice:/lib/data/MkRedBlackSet";
import structure MkRedBlackMap from "x-alice:/lib/data/MkRedBlackMap";
structure Set = MkRedBlackSet String;
structure Map = MkRedBlackMap String;
val wordlist = "/home/rff/wordlist.txt";
val alphabet = String.explode "abcdefghijklmnopqrstuvwxyz";
open List;
fun take2(i1,i2) = (fn cl => (take (cl,i1), drop (cl,i2)))
fun delete cl i =
let
val (first, second) = take2 (i,i+1) cl
in
String.implode (first @ second)
end
fun deletions cl = tabulate ((length cl), (delete cl));
fun substitute cl i =
let
val (first,second) = take2 (i, i+1) cl
fun sub_char c = String.implode (first @ [c] @ second)
in
map sub_char alphabet
end
fun substitutions cl = concat( tabulate ((length cl), (substitute cl)));
fun insert cl i =
let
val (first, second) = take2 (i,i) cl
fun ins_char c = String.implode (first @ [c] @ second)
in
map ins_char alphabet
end
fun insertions cl = concat( tabulate ((length cl)+1, (insert cl)));
fun transpose cl i =
let
val (first,second) = take2 (i,i+2) cl
val c1 = nth (cl,i)
val c2 = nth (cl,i+1)
in
String.implode (first @ [c2] @[c1] @ second)
end
fun transpositions cl = tabulate ((length cl)-1, (transpose cl));
fun edits1 word =
let
val w = String.explode word
val ws = deletions w @ transpositions w @ insertions w @ substitutions w
in
foldl (Fn.flip Set.insert) (Set.empty) ws
end
fun insertOrUpdate map key =
case Map.lookup (map, key) of
NONE => Map.insert (map, key, ref 1)
|SOME value => (value := (!value +1) ; map)
(* train *)
fun train io =
let
val chop = fn str => String.substring (str, 0, (String.size str) - 1)
in
case TextIO.inputLine io of
NONE => Map.empty
|SOME line => insertOrUpdate (train io) (chop line)
end
val nwords = train ( TextIO.openIn wordlist);
fun known_edits2 word =
let
fun set_edits set = Set.fold (fn (str,set) => Set.union (set, known (edits1 str))) Set.empty set
in
set_edits (edits1 word)
end
fun look k = !(valOf (Map.lookup (nwords, k)));
fun correct word =
let
val candidates = find (not o Set.isEmpty)
[ known (Set.fromList [word]),
known (edits1 word),
known_edits2 word]
val sort = sort (fn (a,b)=>Int.compare ((look b), (look a))) o Set.toList
in
case candidates of
SOME words => hd(sort words)
|NONE => word
end
correct "ciao" (*ciao match*);
correct "cao" (*ciao edit 1*);
correct "mioo" (*miao edit 1*);
correct "miaoo"(*miao edit 1*);
correct "mooo" (*mao edit 2 max value*);
correct "quux" (*quux no match*);
The worse part is the string manipulation, I think, and maybe the fact that I use references (aka mutable variables) when building the frequency count Map, but in the end I'm quite satisfied. Feel free to point out obvious errors, if you see them.
Sorry, comments are closed for this article.