Spaces:
Sleeping
Sleeping
.. Copyright (C) 2001-2023 NLTK Project | |
.. For license information, see LICENSE.TXT | |
============================== | |
Combinatory Categorial Grammar | |
============================== | |
Relative Clauses | |
---------------- | |
>>> from nltk.ccg import chart, lexicon | |
Construct a lexicon: | |
>>> lex = lexicon.fromstring(''' | |
... :- S, NP, N, VP | |
... | |
... Det :: NP/N | |
... Pro :: NP | |
... Modal :: S\\NP/VP | |
... | |
... TV :: VP/NP | |
... DTV :: TV/NP | |
... | |
... the => Det | |
... | |
... that => Det | |
... that => NP | |
... | |
... I => Pro | |
... you => Pro | |
... we => Pro | |
... | |
... chef => N | |
... cake => N | |
... children => N | |
... dough => N | |
... | |
... will => Modal | |
... should => Modal | |
... might => Modal | |
... must => Modal | |
... | |
... and => var\\.,var/.,var | |
... | |
... to => VP[to]/VP | |
... | |
... without => (VP\\VP)/VP[ing] | |
... | |
... be => TV | |
... cook => TV | |
... eat => TV | |
... | |
... cooking => VP[ing]/NP | |
... | |
... give => DTV | |
... | |
... is => (S\\NP)/NP | |
... prefer => (S\\NP)/NP | |
... | |
... which => (N\\N)/(S/NP) | |
... | |
... persuade => (VP/VP[to])/NP | |
... ''') | |
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) | |
>>> for parse in parser.parse("you prefer that cake".split()): | |
... chart.printCCGDerivation(parse) | |
... break | |
... | |
you prefer that cake | |
NP ((S\NP)/NP) (NP/N) N | |
--------------> | |
NP | |
---------------------------> | |
(S\NP) | |
--------------------------------< | |
S | |
>>> for parse in parser.parse("that is the cake which you prefer".split()): | |
... chart.printCCGDerivation(parse) | |
... break | |
... | |
that is the cake which you prefer | |
NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/NP) | |
----->T | |
(S/(S\NP)) | |
------------------>B | |
(S/NP) | |
----------------------------------> | |
(N\N) | |
----------------------------------------< | |
N | |
------------------------------------------------> | |
NP | |
-------------------------------------------------------------> | |
(S\NP) | |
-------------------------------------------------------------------< | |
S | |
Some other sentences to try: | |
"that is the cake which we will persuade the chef to cook" | |
"that is the cake which we will persuade the chef to give the children" | |
>>> sent = "that is the dough which you will eat without cooking".split() | |
>>> nosub_parser = chart.CCGChartParser(lex, chart.ApplicationRuleSet + | |
... chart.CompositionRuleSet + chart.TypeRaiseRuleSet) | |
Without Substitution (no output) | |
>>> for parse in nosub_parser.parse(sent): | |
... chart.printCCGDerivation(parse) | |
With Substitution: | |
>>> for parse in parser.parse(sent): | |
... chart.printCCGDerivation(parse) | |
... break | |
... | |
that is the dough which you will eat without cooking | |
NP ((S\NP)/NP) (NP/N) N ((N\N)/(S/NP)) NP ((S\NP)/VP) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) | |
----->T | |
(S/(S\NP)) | |
------------------------------------->B | |
((VP\VP)/NP) | |
----------------------------------------------<Sx | |
(VP/NP) | |
----------------------------------------------------------->B | |
((S\NP)/NP) | |
---------------------------------------------------------------->B | |
(S/NP) | |
--------------------------------------------------------------------------------> | |
(N\N) | |
---------------------------------------------------------------------------------------< | |
N | |
-----------------------------------------------------------------------------------------------> | |
NP | |
------------------------------------------------------------------------------------------------------------> | |
(S\NP) | |
------------------------------------------------------------------------------------------------------------------< | |
S | |
Conjunction | |
----------- | |
>>> from nltk.ccg.chart import CCGChartParser, ApplicationRuleSet, CompositionRuleSet | |
>>> from nltk.ccg.chart import SubstitutionRuleSet, TypeRaiseRuleSet, printCCGDerivation | |
>>> from nltk.ccg import lexicon | |
Lexicons for the tests: | |
>>> test1_lex = ''' | |
... :- S,N,NP,VP | |
... I => NP | |
... you => NP | |
... will => S\\NP/VP | |
... cook => VP/NP | |
... which => (N\\N)/(S/NP) | |
... and => var\\.,var/.,var | |
... might => S\\NP/VP | |
... eat => VP/NP | |
... the => NP/N | |
... mushrooms => N | |
... parsnips => N''' | |
>>> test2_lex = ''' | |
... :- N, S, NP, VP | |
... articles => N | |
... the => NP/N | |
... and => var\\.,var/.,var | |
... which => (N\\N)/(S/NP) | |
... I => NP | |
... anyone => NP | |
... will => (S/VP)\\NP | |
... file => VP/NP | |
... without => (VP\\VP)/VP[ing] | |
... forget => VP/NP | |
... reading => VP[ing]/NP | |
... ''' | |
Tests handling of conjunctions. | |
Note that while the two derivations are different, they are semantically equivalent. | |
>>> lex = lexicon.fromstring(test1_lex) | |
>>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) | |
>>> for parse in parser.parse("I will cook and might eat the mushrooms and parsnips".split()): | |
... printCCGDerivation(parse) | |
I will cook and might eat the mushrooms and parsnips | |
NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N | |
---------------------->B | |
((S\NP)/NP) | |
---------------------->B | |
((S\NP)/NP) | |
-------------------------------------------------> | |
(((S\NP)/NP)\.,((S\NP)/NP)) | |
-----------------------------------------------------------------------< | |
((S\NP)/NP) | |
-------------------------------------> | |
(N\.,N) | |
------------------------------------------------< | |
N | |
--------------------------------------------------------> | |
NP | |
-------------------------------------------------------------------------------------------------------------------------------> | |
(S\NP) | |
-----------------------------------------------------------------------------------------------------------------------------------< | |
S | |
I will cook and might eat the mushrooms and parsnips | |
NP ((S\NP)/VP) (VP/NP) ((_var0\.,_var0)/.,_var0) ((S\NP)/VP) (VP/NP) (NP/N) N ((_var0\.,_var0)/.,_var0) N | |
---------------------->B | |
((S\NP)/NP) | |
---------------------->B | |
((S\NP)/NP) | |
-------------------------------------------------> | |
(((S\NP)/NP)\.,((S\NP)/NP)) | |
-----------------------------------------------------------------------< | |
((S\NP)/NP) | |
------------------------------------------------------------------------------->B | |
((S\NP)/N) | |
-------------------------------------> | |
(N\.,N) | |
------------------------------------------------< | |
N | |
-------------------------------------------------------------------------------------------------------------------------------> | |
(S\NP) | |
-----------------------------------------------------------------------------------------------------------------------------------< | |
S | |
Tests handling subject extraction. | |
Interesting to point that the two parses are clearly semantically different. | |
>>> lex = lexicon.fromstring(test2_lex) | |
>>> parser = CCGChartParser(lex, ApplicationRuleSet + CompositionRuleSet + SubstitutionRuleSet) | |
>>> for parse in parser.parse("articles which I will file and forget without reading".split()): | |
... printCCGDerivation(parse) | |
articles which I will file and forget without reading | |
N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) | |
-----------------< | |
(S/VP) | |
------------------------------------->B | |
((VP\VP)/NP) | |
----------------------------------------------<Sx | |
(VP/NP) | |
-------------------------------------------------------------------------> | |
((VP/NP)\.,(VP/NP)) | |
----------------------------------------------------------------------------------< | |
(VP/NP) | |
--------------------------------------------------------------------------------------------------->B | |
(S/NP) | |
-------------------------------------------------------------------------------------------------------------------> | |
(N\N) | |
-----------------------------------------------------------------------------------------------------------------------------< | |
N | |
articles which I will file and forget without reading | |
N ((N\N)/(S/NP)) NP ((S/VP)\NP) (VP/NP) ((_var0\.,_var0)/.,_var0) (VP/NP) ((VP\VP)/VP['ing']) (VP['ing']/NP) | |
-----------------< | |
(S/VP) | |
------------------------------------> | |
((VP/NP)\.,(VP/NP)) | |
---------------------------------------------< | |
(VP/NP) | |
------------------------------------->B | |
((VP\VP)/NP) | |
----------------------------------------------------------------------------------<Sx | |
(VP/NP) | |
--------------------------------------------------------------------------------------------------->B | |
(S/NP) | |
-------------------------------------------------------------------------------------------------------------------> | |
(N\N) | |
-----------------------------------------------------------------------------------------------------------------------------< | |
N | |
Unicode support | |
--------------- | |
Unicode words are supported. | |
>>> from nltk.ccg import chart, lexicon | |
Lexicons for the tests: | |
>>> lex = lexicon.fromstring(''' | |
... :- S, N, NP, PP | |
... | |
... AdjI :: N\\N | |
... AdjD :: N/N | |
... AdvD :: S/S | |
... AdvI :: S\\S | |
... Det :: NP/N | |
... PrepNPCompl :: PP/NP | |
... PrepNAdjN :: S\\S/N | |
... PrepNAdjNP :: S\\S/NP | |
... VPNP :: S\\NP/NP | |
... VPPP :: S\\NP/PP | |
... VPser :: S\\NP/AdjI | |
... | |
... auto => N | |
... bebidas => N | |
... cine => N | |
... ley => N | |
... libro => N | |
... ministro => N | |
... panader铆a => N | |
... presidente => N | |
... super => N | |
... | |
... el => Det | |
... la => Det | |
... las => Det | |
... un => Det | |
... | |
... Ana => NP | |
... Pablo => NP | |
... | |
... y => var\\.,var/.,var | |
... | |
... pero => (S/NP)\\(S/NP)/(S/NP) | |
... | |
... anunci贸 => VPNP | |
... compr贸 => VPNP | |
... cree => S\\NP/S[dep] | |
... desminti贸 => VPNP | |
... lee => VPNP | |
... fueron => VPPP | |
... | |
... es => VPser | |
... | |
... interesante => AdjD | |
... interesante => AdjI | |
... nueva => AdjD | |
... nueva => AdjI | |
... | |
... a => PrepNPCompl | |
... en => PrepNAdjN | |
... en => PrepNAdjNP | |
... | |
... ayer => AdvI | |
... | |
... que => (NP\\NP)/(S/NP) | |
... que => S[dep]/S | |
... ''') | |
>>> parser = chart.CCGChartParser(lex, chart.DefaultRuleSet) | |
>>> for parse in parser.parse(u"el ministro anunci贸 pero el presidente desminti贸 la nueva ley".split()): | |
... printCCGDerivation(parse) # doctest: +SKIP | |
... # it fails on python2.7 because of the unicode problem explained in https://github.com/nltk/nltk/pull/1354 | |
... break | |
el ministro anunci贸 pero el presidente desminti贸 la nueva ley | |
(NP/N) N ((S\NP)/NP) (((S/NP)\(S/NP))/(S/NP)) (NP/N) N ((S\NP)/NP) (NP/N) (N/N) N | |
------------------> | |
NP | |
------------------>T | |
(S/(S\NP)) | |
--------------------> | |
NP | |
-------------------->T | |
(S/(S\NP)) | |
--------------------------------->B | |
(S/NP) | |
-----------------------------------------------------------> | |
((S/NP)\(S/NP)) | |
------------> | |
N | |
--------------------> | |
NP | |
--------------------<T | |
(S\(S/NP)) | |
-------------------------------------------------------------------------------<B | |
(S\(S/NP)) | |
--------------------------------------------------------------------------------------------<B | |
(S/NP) | |
--------------------------------------------------------------------------------------------------------------> | |
S | |