ljy266987
add lfs
12bfd03
raw
history blame
6.02 kB
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; ;;
;;; Centre for Speech Technology Research ;;
;;; University of Edinburgh, UK ;;
;;; Copyright (c) 1997 ;;
;;; All Rights Reserved. ;;
;;; ;;
;;; Permission is hereby granted, free of charge, to use and distribute ;;
;;; this software and its documentation without restriction, including ;;
;;; without limitation the rights to use, copy, modify, merge, publish, ;;
;;; distribute, sublicense, and/or sell copies of this work, and to ;;
;;; permit persons to whom this work is furnished to do so, subject to ;;
;;; the following conditions: ;;
;;; 1. The code must retain the above copyright notice, this list of ;;
;;; conditions and the following disclaimer. ;;
;;; 2. Any modifications must be clearly marked as such. ;;
;;; 3. Original authors' names are not deleted. ;;
;;; 4. The authors' names are not used to endorse or promote products ;;
;;; derived from this software without specific prior written ;;
;;; permission. ;;
;;; ;;
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;;
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;;
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;;
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;;
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;;
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;;
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;;
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;;
;;; THIS SOFTWARE. ;;
;;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Author: Alan W Black
;;; Date: December 1997
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; THIS IS EXPERIMENTAL AND DOES *NOT* WORK
;;;
;;;
;;; An English morpho-syntax finite-state grammar
;;; This is used for morphological decomposition of unknown words
;;; specifically (only) words that are not found in the lexicon.
;;; This idea is that when an unknown word is found an attempt is made
;;; to see if it contains any well known morphological inflections or
;;; derivations, if so a better use of LTS can be made on the root, of
;;; none are found this
;;;
;;;
;;; Based on "Analysis of Unknown Words through Morphological
;;; Decomposition", Black, van de Plassche, Willians, European ACL 91.
;;; with the anyword matcher from a question by Lauri Karttunen after
;;; the talk.
;;;
;;; The suffixes and finite-state morph-syntax grammar is based
;;; (very roughly) on the rules in "Computational Morphology"
;;; Ritchie et al. MIT Press 1992.
;;;
;;; Can be compiled with
;;; wfst_build -type rg -o engmorphsyn.wfst -detmin engmorphsyn.scm
;;;
;;; The result can be combined with the morphographemic rules
;;; with
;;; wfst_build -type compose engmorph.wfst engmorphsyn.wfst -detmin -o engstemmer.wfst
;;;
;;; echo "# b o x e/+ s #" | wfst_run -wfst engstemmer.wfst -recog
;;; state 0 #/# -> 1
;;; state 1 b/b -> 3
;;; state 3 o/o -> 17
;;; state 17 x/x -> 14
;;; state 14 e/+ -> 36
;;; state 36 s/s -> 34
;;; state 34 #/# -> 16
;;; OK.
;;; echo "# b o x e s #" | wfst_run -wfst engstemmer.wfst -recog
;;; state 0 #/# -> 1
;;; state 1 b/b -> 3
;;; state 3 o/o -> 17
;;; state 17 x/x -> 14
;;; state 14 e/e -> 22
;;; state 22 s/s -> -1
(RegularGrammar
engsuffixmorphosyntax
;; Sets
(
(V a e i o u y)
(C b c d f g h j k l m n p q r s t v w x y z)
)
;; Rules
(
;; A word *must* have a suffix to be recognized
(Word -> # Syls Suffix )
(Word -> # Syls End )
;; This matches any string of characters that contains at least one vowel
(Syls -> Syl Syls )
(Syls -> Syl )
(Syl -> Cs V Cs )
(Cs -> C Cs )
(Cs -> )
(Suffix -> VerbSuffix )
(Suffix -> NounSuffix )
(Suffix -> AdjSuffix )
(VerbSuffix -> VerbFinal End )
(VerbSuffix -> VerbtoNoun NounSuffix )
(VerbSuffix -> VerbtoNoun End )
(VerbSuffix -> VerbtoAdj AdjSuffix )
(VerbSuffix -> VerbtoAdj End )
(NounSuffix -> NounFinal End )
(NounSuffix -> NountoNoun NounSuffix )
(NounSuffix -> NountoNoun End )
(NounSuffix -> NountoAdj AdjSuffix )
(NounSuffix -> NountoAdj End )
(NounSuffix -> NountoVerb VerbSuffix )
(NounSuffix -> NountoVerb End )
(AdjSuffix -> AdjFinal End )
(AdjSuffix -> AdjtoAdj AdjSuffix)
(AdjSuffix -> AdjtoAdj End)
(AdjSuffix -> AdjtoAdv End) ;; isn't any Adv to anything
(End -> # ) ;; word boundary symbol *always* present
(VerbFinal -> + e d)
(VerbFinal -> + i n g)
(VerbFinal -> + s)
(VerbtoNoun -> + e r)
(VerbtoNoun -> + e s s)
(VerbtoNoun -> + a t i o n)
(VerbtoNoun -> + i n g)
(VerbtoNoun -> + m e n t)
(VerbtoAdj -> + a b l e)
(NounFinal -> + s)
(NountoNoun -> + i s m)
(NountoNoun -> + i s t)
(NountoNoun -> + s h i p)
(NountoAdj -> + l i k e)
(NountoAdj -> + l e s s)
(NountoAdj -> + i s h)
(NountoAdj -> + o u s)
(NountoVerb -> + i f y)
(NountoVerb -> + i s e)
(NountoVerb -> + i z e)
(AdjFinal -> + e r)
(AdjFinal -> + e s t)
(AdjtoAdj -> + i s h)
(AdjtoAdv -> + l y)
(AdjtoNoun -> + n e s s)
(AdjtoVerb -> + i s e)
(AdjtoVerb -> + i z e)
)
)