;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;;; ;; | |
;;; Centre for Speech Technology Research ;; | |
;;; University of Edinburgh, UK ;; | |
;;; Copyright (c) 1997 ;; | |
;;; All Rights Reserved. ;; | |
;;; ;; | |
;;; Permission is hereby granted, free of charge, to use and distribute ;; | |
;;; this software and its documentation without restriction, including ;; | |
;;; without limitation the rights to use, copy, modify, merge, publish, ;; | |
;;; distribute, sublicense, and/or sell copies of this work, and to ;; | |
;;; permit persons to whom this work is furnished to do so, subject to ;; | |
;;; the following conditions: ;; | |
;;; 1. The code must retain the above copyright notice, this list of ;; | |
;;; conditions and the following disclaimer. ;; | |
;;; 2. Any modifications must be clearly marked as such. ;; | |
;;; 3. Original authors' names are not deleted. ;; | |
;;; 4. The authors' names are not used to endorse or promote products ;; | |
;;; derived from this software without specific prior written ;; | |
;;; permission. ;; | |
;;; ;; | |
;;; THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK ;; | |
;;; DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ;; | |
;;; ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT ;; | |
;;; SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE ;; | |
;;; FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;; | |
;;; WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ;; | |
;;; AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ;; | |
;;; ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF ;; | |
;;; THIS SOFTWARE. ;; | |
;;; ;; | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;;; Author: Alan W Black | |
;;; Date: December 1997 | |
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
;;; | |
;;; THIS IS EXPERIMENTAL AND DOES *NOT* WORK | |
;;; | |
;;; Koskenniemi-style context rewrite rules for English Morphographemics | |
;;; Basically splits words into their (potential) morphemes. | |
;;; | |
;;; Based (roughly) on the rules in "Computational Morphology" | |
;;; Ritchie et al. MIT Press 1992. | |
;;; | |
;;; This is not a Scheme file and can't be loaded and evaluated | |
;;; It is designed for use with the wfst tools in the speech tools | |
;;; e.g. wfst_build -type kk -o engmorph.wfst -detmin engmorph.scm | |
;;; | |
(KKrules | |
engmorph | |
(Alphabets | |
;; Input Alphabet | |
(a b c d e f g h i j k l m n o p q r s t u v w x y z #) | |
;; Output Alphabet | |
(a b c d e f g h i j k l m n o p q r s t u v w x y z + #) | |
) | |
(Sets | |
(LET a b c d e f g h i j k l m n o p q r s t u v w x y z) | |
) | |
(Rules | |
;; The basic rules | |
( a => nil --- nil) | |
( b => nil --- nil) | |
( c => nil --- nil) | |
( d => nil --- nil) | |
( e => nil --- nil) | |
( f => nil --- nil) | |
( g => nil --- nil) | |
( h => nil --- nil) | |
( i => nil --- nil) | |
( j => nil --- nil) | |
( k => nil --- nil) | |
( l => nil --- nil) | |
( m => nil --- nil) | |
( n => nil --- nil) | |
( o => nil --- nil) | |
( p => nil --- nil) | |
( q => nil --- nil) | |
( r => nil --- nil) | |
( s => nil --- nil) | |
( t => nil --- nil) | |
( u => nil --- nil) | |
( v => nil --- nil) | |
( w => nil --- nil) | |
( x => nil --- nil) | |
( y => nil --- nil) | |
( z => nil --- nil) | |
( # => nil --- nil) | |
; ( _epsilon_/+ => (or LET _epsilon_/e ) --- (LET)) | |
( _epsilon_/+ => (or LET _epsilon_/e) --- nil) | |
;; The rules that do interesting things | |
;; Epenthesis | |
;; churches -> church+s | |
;; boxes -> box+s | |
(e/+ <=> (or (s h) (or s x z) (i/y) (c h)) | |
--- | |
(s)) | |
;; Gemination | |
(b/+ <=> ( (or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) b ) | |
--- | |
((or a e i o u))) | |
(d/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) d ) | |
--- | |
((or a e i o u))) | |
(f/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) f ) | |
--- | |
((or a e i o u))) | |
(g/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) g ) | |
--- | |
((or a e i o u))) | |
(m/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) m ) | |
--- | |
((or a e i o u))) | |
(p/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) p ) | |
--- | |
((or a e i o u))) | |
(s/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) s ) | |
--- | |
((or a e i o u))) | |
(t/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) t ) | |
--- | |
((or a e i o u))) | |
(z/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) z ) | |
--- | |
((or a e i o u))) | |
(n/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) n ) | |
--- | |
((or a e i o u))) | |
(l/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) l ) | |
--- | |
((or a e i o u))) | |
(r/+ <=> ((or b c d f g h j k l m n p q r s t v w z) (or a e i o u y) r ) | |
--- | |
((or a e i o u))) | |
;; tries->try+s | |
( i/y <=> ((or b c d f g h j k l m n p q r s t v w x z)) | |
--- | |
((or ( e/+ s ) | |
( _epsilon_/+ (or a d e f h i l m n o p s w y))))) | |
;; Elision | |
;; moved -> move+ed | |
(_epsilon_/e <=> | |
((or a e i o u ) (or b c d f g j k l m n p q r s t v x z)) | |
--- | |
( _epsilon_/+ (or a e i o u ))) | |
) | |
) | |