diff --git a/Makefile.am b/Makefile.am index 2bb2864..8b21a4d 100644 --- a/Makefile.am +++ b/Makefile.am @@ -30,27 +30,27 @@ TARGETS_COMMON = \ ## $(LANG1).twol.hfst: $(BASENAME).$(LANG1).twol - hfst-twolc -r -i $(BASENAME).$(LANG1).twol -o $@ + hfst-twolc -R -i $(BASENAME).$(LANG1).twol -o $@ $(LANG1).lexc.hfst: $(BASENAME).$(LANG1).lexc - hfst-lexc $(BASENAME).$(LANG1).lexc -o $@ + hfst-lexc -o $@ $(BASENAME).$(LANG1).lexc $(PREFIX2).autogen.hfst: $(LANG1).twol.hfst $(LANG1).lexc.hfst - hfst-compose-intersect -l $(LANG1).lexc.hfst $(LANG1).twol.hfst -o $@ + hfst-compose-intersect -1 $(LANG1).lexc.hfst -2 $(LANG1).twol.hfst -o $@ .deps/xfst2apertium.useless.twol.hfst: dev/xfst2apertium.useless.twol if [ ! -d .deps ]; then mkdir .deps; fi - hfst-twolc -r -i dev/xfst2apertium.useless.twol -o $@ + hfst-twolc -R -i dev/xfst2apertium.useless.twol -o $@ || rm -f $@ && exit 1 .deps/xfst2apertium.hashtags.twol.hfst: dev/xfst2apertium.hashtags.twol if [ ! -d .deps ]; then mkdir .deps; fi - hfst-twolc -r -i dev/xfst2apertium.hashtags.twol -o $@ + hfst-twolc -R -i dev/xfst2apertium.hashtags.twol -o $@ || rm -f $@ && exit 1 ## Split this up into several goals while we're still developing, making this takes too long :/ # $(PREFIX1).automorf.hfst: $(PREFIX2).autogen.hfst dev/xfst2apertium.relabel .deps/xfst2apertium.useless.twol.hfst .deps/xfst2apertium.hashtags.twol.hfst # hfst-invert $(PREFIX2).autogen.hfst -o $@.1.tmp -# hfst-compose-intersect -l $@.1.tmp .deps/xfst2apertium.useless.twol.hfst -o $@.2.tmp -# hfst-compose-intersect -l $@.2.tmp .deps/xfst2apertium.hashtags.twol.hfst -o $@.3.tmp +# hfst-compose-intersect -1 $@.1.tmp -2 .deps/xfst2apertium.useless.twol.hfst -o $@.2.tmp +# hfst-compose-intersect -1 $@.2.tmp -2 .deps/xfst2apertium.hashtags.twol.hfst -o $@.3.tmp # hfst-substitute -F dev/xfst2apertium.relabel -i $@.3.tmp -o $@ # @rm $@.1.tmp $@.2.tmp $@.3.tmp @@ -60,17 +60,17 @@ $(PREFIX2).autogen.hfst: $(LANG1).twol.hfst $(LANG1).lexc.hfst .deps/$(PREFIX1).automorf.useless.hfst: .deps/$(PREFIX1).automorf.inverted.hfst .deps/xfst2apertium.useless.twol.hfst if [ ! -d .deps ]; then mkdir .deps; fi - hfst-compose-intersect -l .deps/$(PREFIX1).automorf.inverted.hfst .deps/xfst2apertium.useless.twol.hfst -o $@ + hfst-compose-intersect -1 .deps/$(PREFIX1).automorf.inverted.hfst -2 .deps/xfst2apertium.useless.twol.hfst -o $@ .deps/$(PREFIX1).automorf.hashtags.hfst: .deps/$(PREFIX1).automorf.useless.hfst .deps/xfst2apertium.hashtags.twol.hfst if [ ! -d .deps ]; then mkdir .deps; fi - hfst-compose-intersect -l .deps/$(PREFIX1).automorf.useless.hfst .deps/xfst2apertium.hashtags.twol.hfst -o $@ + hfst-compose-intersect -1 .deps/$(PREFIX1).automorf.useless.hfst -2 .deps/xfst2apertium.hashtags.twol.hfst -o $@ $(PREFIX1).automorf.hfst: .deps/$(PREFIX1).automorf.hashtags.hfst dev/xfst2apertium.relabel hfst-substitute -F dev/xfst2apertium.relabel -i .deps/$(PREFIX1).automorf.hashtags.hfst -o $@ $(PREFIX1).automorf.hfst.ol: $(PREFIX1).automorf.hfst - hfst-lookup-optimize -i $(PREFIX1).automorf.hfst -o $@ + hfst-fst2fst -O -i $(PREFIX1).automorf.hfst -o $@ ## ## BILINGUAL DICTIONARY diff --git a/dev/xfst2apertium.hashtags.twol b/dev/xfst2apertium.hashtags.twol index 449a65c..5442cde 100644 --- a/dev/xfst2apertium.hashtags.twol +++ b/dev/xfst2apertium.hashtags.twol @@ -163,14 +163,14 @@ Rules "Delete compound tags (except Cmp/ShCmp) that weren't followed by an N or A tag" -RemCmptag:0 <=> \[ KeepCmpAfter:? | 0:%+G3 ] _ ; +R:0 <=> \[ KeepCmpAfter:? | 0:%+G3 ] _ ; where R in RemCmptag; !! We don't remove +Cmp or +ShCmp tags since they follow lemmas, not +N or +A: ! mielbargin miel+ShCmp#bargat+V+TV+Der2+Actor+N+Ess !! The :? is needed since we may have eg. %+N:%+N%* there, also the !! below rules can insert a +G3 after a +N or a +N* "Delete compound tags at the beginning of words" -RemWICmptag:0 <=> [ # | #: ] _ ; +W:0 <=> [ # | #: ] _ ; where W in RemWICmptag; @@ -194,4 +194,4 @@ RemWICmptag:0 <=> [ # | #: ] _ ; !! The above rules may insert a +G3 after a starred N "Never add stars (need to figure out a way to remove them before bidix first)" -Unstarred:Starred /<= _ ; +U:S /<= _ ; where U in Unstarred S in Starred; diff --git a/dev/xfst2apertium.useless.twol b/dev/xfst2apertium.useless.twol index 73cf411..4430b33 100644 --- a/dev/xfst2apertium.useless.twol +++ b/dev/xfst2apertium.useless.twol @@ -33,18 +33,18 @@ Alphabet %+Allegro:0 %+Guess:0 - %+Use/Sub:0 - %+Use/Marg:0 - %+Use/Circ:0 - %+Use/CircN:0 - %+Use/Ped:0 - %+Use/NG:0 - %+Dial/%-KJ:0 - %+Dial/%-GG:0 - %+Dial/%-GS:0 - %+Dial/KJ:0 - %+Dial/GG:0 - %+Dial/GS:0 + %+Use%/Sub:0 + %+Use%/Marg:0 + %+Use%/Circ:0 + %+Use%/CircN:0 + %+Use%/Ped:0 + %+Use%/NG:0 + %+Dial%/%-KJ:0 + %+Dial%/%-GG:0 + %+Dial%/%-GS:0 + %+Dial%/KJ:0 + %+Dial%/GG:0 + %+Dial%/GS:0 %+First:0 %+Last:0 %+None:0 @@ -53,10 +53,10 @@ Alphabet %+AllCmp:0 %+DefSgGenCmp:0 %+DefPlGenCmp:0 %+DefCmp:0 - %+CmpN/Sg:0 %+CmpN/SgN:0 %+CmpN/SgG:0 %+CmpN/PlG:0 - %+CmpN/First:0 %+CmpN/Last:0 %+CmpN/None:0 %+CmpN/Only:0 - %+CmpN/SgLeft:0 %+CmpN/SgNomLeft:0 %+CmpN/SgGenLeft:0 %+CmpN/PlGenLeft:0 - %+CmpN/All:0 %+CmpN/Def:0 %+CmpN/DefSgGen:0 %+CmpN/DefPlGen:0 + %+CmpN%/Sg:0 %+CmpN%/SgN:0 %+CmpN%/SgG:0 %+CmpN%/PlG:0 + %+CmpN%/First:0 %+CmpN%/Last:0 %+CmpN%/None:0 %+CmpN%/Only:0 + %+CmpN%/SgLeft:0 %+CmpN%/SgNomLeft:0 %+CmpN%/SgGenLeft:0 %+CmpN%/PlGenLeft:0 + %+CmpN%/All:0 %+CmpN%/Def:0 %+CmpN%/DefSgGen:0 %+CmpN%/DefPlGen:0 !!! Derivation tags copied in from sme.lex.txt:123..184 ! Der#begin @@ -64,184 +64,184 @@ Alphabet ! 1 2 3 POS type %+Der1 %+Der2 %+Der3 ! Der#1 -%+Der/t ! NN -%+Der/ár !ACRO>N -%+Der/Dimin ! NN (was: Der/aš & Der/š) -%+Der/laš ! NA -%+Der/meahttun ! VA -%+Der/d ! VV -%+Der/h ! VV - -hit/Causative -%+Der/huhtti ! VV -%+Der/j ! VV -%+Der/l ! VV -%+Der/st ! VV -%+Der/las ! VA * %+Der1%+Der2 - can only combine with Der3 -%+Der/heapmi ! NA * %+Der1%+Der2 - can only combine with Der3 -%+Der/lágan ! AA * %+Der1%+Der2 - can only combine with Der3 -%+Der/lágán ! AA * %+Der1%+Der2 - can only combine with Der3 -%+Der/halla ! VV * %+Der1%+Der2 - can only combine with Der3 -%+Der/huvva ! VV * %+Der1%+Der2 - can only combine with Der3 -%+Der/stuvva ! VV * %+Der1%+Der2 - can only combine with Der3 +%+Der%/t ! NN +%+Der%/ár !ACRO>N +%+Der%/Dimin ! NN (was: Der/aš & Der/š) +%+Der%/laš ! NA +%+Der%/meahttun ! VA +%+Der%/d ! VV +%+Der%/h ! VV - -hit/Causative +%+Der%/huhtti ! VV +%+Der%/j ! VV +%+Der%/l ! VV +%+Der%/st ! VV +%+Der%/las ! VA * %+Der1%+Der2 - can only combine with Der3 +%+Der%/heapmi ! NA * %+Der1%+Der2 - can only combine with Der3 +%+Der%/lágan ! AA * %+Der1%+Der2 - can only combine with Der3 +%+Der%/lágán ! AA * %+Der1%+Der2 - can only combine with Der3 +%+Der%/halla ! VV * %+Der1%+Der2 - can only combine with Der3 +%+Der%/huvva ! VV * %+Der1%+Der2 - can only combine with Der3 +%+Der%/stuvva ! VV * %+Der1%+Der2 - can only combine with Der3 ! Der#2 %+Actor - %+Der/Dimin ! NN - %+Der/eapmi ! VN - %+Der/adda ! VV - %+Der/ahtti ! VV - %+Der/alla ! VV - %+Der/asti ! VV - %+Der/at ! QA ! check this! - %+Der/easti ! VV - %+Der/gielat ! QA - %+Der/jagáš ! QA - %+Der/lágaš ! QA - %+Der/lágáš ! QA ! - check this! - %+Der/lágan ! AA ! Check these! - %+Der/lágán ! AA ! Check these! - %+Der/PassL ! VV - long passive + %+Der%/Dimin ! NN + %+Der%/eapmi ! VN + %+Der%/adda ! VV + %+Der%/ahtti ! VV + %+Der%/alla ! VV + %+Der%/asti ! VV + %+Der%/at ! QA ! check this! + %+Der%/easti ! VV + %+Der%/gielat ! QA + %+Der%/jagáš ! QA + %+Der%/lágaš ! QA + %+Der%/lágáš ! QA ! - check this! + %+Der%/lágan ! AA ! Check these! + %+Der%/lágán ! AA ! Check these! + %+Der%/PassL ! VV - long passive ! Der#3 - %+Der/amoš ! VN - %+Der/eamoš ! VN - %+Der/geahtes ! VA - %+Der/goahti ! VV - %+Der/muš ! VN - %+Der/supmi ! VN - %+Der/n ! VN - %+Der/vuohta ! AN - %+Der/upmi ! VN + %+Der%/amoš ! VN + %+Der%/eamoš ! VN + %+Der%/geahtes ! VA + %+Der%/goahti ! VV + %+Der%/muš ! VN + %+Der%/supmi ! VN + %+Der%/n ! VN + %+Der%/vuohta ! AN + %+Der%/upmi ! VN ! Der#other ! Other/unclassified derivations, can appear in all positions: -%+Der/veara ! NA# -%+Der/viđá ! NA# -!%+Der/viđi ! NA# outcommented, since we have the noun vih0ti, gen:viđi +%+Der%/veara ! NA# +%+Der%/viđá ! NA# +!%+Der%/viđi ! NA# outcommented, since we have the noun vih0ti, gen:viđi -%+Der/PassS ! VV - short passive +%+Der%/PassS ! VV - short passive ! See lexicons NAMAT and SAS for these: -%+Der/agat -%+Der/bealat -%+Der/bealjat -%+Der/borat -%+Der/bánat -%+Der/dábat -%+Der/dáfot -%+Der/dáhtot -%+Der/dásat -%+Der/dávddat -%+Der/dávttat -%+Der/fárddat -%+Der/gaccat -%+Der/garat -%+Der/gearddat -%+Der/geažat -%+Der/gielat -%+Der/gieđat -%+Der/gieškkat -%+Der/gilggat -%+Der/girjjat -%+Der/guvllot -%+Der/heakkat -%+Der/hájat -%+Der/hámat -%+Der/ivnnat -%+Der/jagat -%+Der/jearggat -%+Der/jienat -%+Der/jierpmat -%+Der/joccat -%+Der/juolggat -%+Der/juvllat -%+Der/kultuvrrat -%+Der/asat -%+Der/lahkat -%+Der/lahtot -%+Der/lanjat -%+Der/leakkat -%+Der/liikkat -%+Der/linjjat -%+Der/lunddot -%+Der/luottat -%+Der/láiddat -%+Der/mearkkat -%+Der/mielat -%+Der/mohkat -%+Der/muđot -%+Der/máhtat -%+Der/mállet -%+Der/namat -%+Der/nađat -%+Der/nierat -%+Der/njunat -%+Der/njálmmat -%+Der/nuo0lus -%+Der/náittot -%+Der/sorttat -%+Der/nálat -%+Der/oaivvat -%+Der/oasat -%+Der/áissat -%+Der/olat -%+Der/orddat -%+Der/pláhtat -%+Der/rattat -%+Der/ravddat -%+Der/rávnnjat -%+Der/seagat -%+Der/seaibbat -%+Der/seainnat -%+Der/siiddot -%+Der/soajat -%+Der/sogat -%+Der/soarttat -%+Der/stábat -%+Der/stávval -%+Der/suorat -%+Der/suorggat -%+Der/suorpmat -%+Der/suttat -%+Der/sánat -%+Der/sávnnjat -%+Der/šlájat -%+Der/uvssat -%+Der/uvssot -%+Der/varat -%+Der/vigat -%+Der/vuovttat -%+Der/vuđot -%+Der/váillat -%+Der/váimmot -%+Der/válddat -%+Der/váttot -%+Der/ávjjot -%+Der/čalmmat -%+Der/čeavžžat -%+Der/čiegat -%+Der/čielggat -%+Der/čoalat -%+Der/čoarvvat -%+Der/čuolmmat -%+Der/čuvddat -%+Der/hámat -%+Der/vahkkosaš -%+Der/čiegahas -%+Der/luohkálaš -%+Der/luohkkálaš -%+Der/viidosaš -%+Der/kilosaš -%+Der/diibmosaš -%+Der/dássásaš -%+Der/mánnosaš -%+Der/buddásaš -%+Der/geardásaš -%+Der/áigásaš -%+Der/ahkásaš -%+Der/jahkásaš -%+Der/beaivásaš -%+Der/vahkosaš -%+Der/mannosaš -%+Der/siessat +%+Der%/agat +%+Der%/bealat +%+Der%/bealjat +%+Der%/borat +%+Der%/bánat +%+Der%/dábat +%+Der%/dáfot +%+Der%/dáhtot +%+Der%/dásat +%+Der%/dávddat +%+Der%/dávttat +%+Der%/fárddat +%+Der%/gaccat +%+Der%/garat +%+Der%/gearddat +%+Der%/geažat +%+Der%/gielat +%+Der%/gieđat +%+Der%/gieškkat +%+Der%/gilggat +%+Der%/girjjat +%+Der%/guvllot +%+Der%/heakkat +%+Der%/hájat +%+Der%/hámat +%+Der%/ivnnat +%+Der%/jagat +%+Der%/jearggat +%+Der%/jienat +%+Der%/jierpmat +%+Der%/joccat +%+Der%/juolggat +%+Der%/juvllat +%+Der%/kultuvrrat +%+Der%/asat +%+Der%/lahkat +%+Der%/lahtot +%+Der%/lanjat +%+Der%/leakkat +%+Der%/liikkat +%+Der%/linjjat +%+Der%/lunddot +%+Der%/luottat +%+Der%/láiddat +%+Der%/mearkkat +%+Der%/mielat +%+Der%/mohkat +%+Der%/muđot +%+Der%/máhtat +%+Der%/mállet +%+Der%/namat +%+Der%/nađat +%+Der%/nierat +%+Der%/njunat +%+Der%/njálmmat +%+Der%/nuo0lus +%+Der%/náittot +%+Der%/sorttat +%+Der%/nálat +%+Der%/oaivvat +%+Der%/oasat +%+Der%/áissat +%+Der%/olat +%+Der%/orddat +%+Der%/pláhtat +%+Der%/rattat +%+Der%/ravddat +%+Der%/rávnnjat +%+Der%/seagat +%+Der%/seaibbat +%+Der%/seainnat +%+Der%/siiddot +%+Der%/soajat +%+Der%/sogat +%+Der%/soarttat +%+Der%/stábat +%+Der%/stávval +%+Der%/suorat +%+Der%/suorggat +%+Der%/suorpmat +%+Der%/suttat +%+Der%/sánat +%+Der%/sávnnjat +%+Der%/šlájat +%+Der%/uvssat +%+Der%/uvssot +%+Der%/varat +%+Der%/vigat +%+Der%/vuovttat +%+Der%/vuđot +%+Der%/váillat +%+Der%/váimmot +%+Der%/válddat +%+Der%/váttot +%+Der%/ávjjot +%+Der%/čalmmat +%+Der%/čeavžžat +%+Der%/čiegat +%+Der%/čielggat +%+Der%/čoalat +%+Der%/čoarvvat +%+Der%/čuolmmat +%+Der%/čuvddat +%+Der%/hámat +%+Der%/vahkkosaš +%+Der%/čiegahas +%+Der%/luohkálaš +%+Der%/luohkkálaš +%+Der%/viidosaš +%+Der%/kilosaš +%+Der%/diibmosaš +%+Der%/dássásaš +%+Der%/mánnosaš +%+Der%/buddásaš +%+Der%/geardásaš +%+Der%/áigásaš +%+Der%/ahkásaš +%+Der%/jahkásaš +%+Der%/beaivásaš +%+Der%/vahkosaš +%+Der%/mannosaš +%+Der%/siessat ! Der#end @@ -264,18 +264,18 @@ Alphabet Sets Useless = ! Keep consistent with tags in sme-lex.txt:36..62 - %+Use/Sub - %+Use/Marg - %+Use/Circ - %+Use/CircN - %+Use/Ped - %+Use/NG - %+Dial/%-KJ - %+Dial/%-GG - %+Dial/%-GS - %+Dial/KJ - %+Dial/GG - %+Dial/GS + %+Use%/Sub + %+Use%/Marg + %+Use%/Circ + %+Use%/CircN + %+Use%/Ped + %+Use%/NG + %+Dial%/%-KJ + %+Dial%/%-GG + %+Dial%/%-GS + %+Dial%/KJ + %+Dial%/GG + %+Dial%/GS %+Guess %+Allegro @@ -286,155 +286,155 @@ Useless = ! Keep consistent with tags in sme-lex.txt:36..62 %+AllCmp %+DefSgGenCmp %+DefPlGenCmp %+DefCmp - %+CmpN/Sg %+CmpN/SgN %+CmpN/SgG %+CmpN/PlG - %+CmpN/First %+CmpN/Last %+CmpN/None %+CmpN/Only - %+CmpN/SgLeft %+CmpN/SgNomLeft %+CmpN/SgGenLeft %+CmpN/PlGenLeft - %+CmpN/All %+CmpN/Def %+CmpN/DefSgGen %+CmpN/DefPlGen + %+CmpN%/Sg %+CmpN%/SgN %+CmpN%/SgG %+CmpN%/PlG + %+CmpN%/First %+CmpN%/Last %+CmpN%/None %+CmpN%/Only + %+CmpN%/SgLeft %+CmpN%/SgNomLeft %+CmpN%/SgGenLeft %+CmpN%/PlGenLeft + %+CmpN%/All %+CmpN%/Def %+CmpN%/DefSgGen %+CmpN%/DefPlGen ; UnhandledDerivation = - %+Der/adda - %+Der/amoš - %+Der/asti - %+Der/eamoš - %+Der/easti - %+Der/geahtes - %+Der/heapmi - %+Der/huhtti - %+Der/huvva - %+Der/l - %+Der/laš - %+Der/lágan - %+Der/meahttun - %+Der/stuvva - %+Der/supmi - %+Der/upmi - %+Der/viđá + %+Der%/adda + %+Der%/amoš + %+Der%/asti + %+Der%/eamoš + %+Der%/easti + %+Der%/geahtes + %+Der%/heapmi + %+Der%/huhtti + %+Der%/huvva + %+Der%/l + %+Der%/laš + %+Der%/lágan + %+Der%/meahttun + %+Der%/stuvva + %+Der%/supmi + %+Der%/upmi + %+Der%/viđá ! From lexicons NAMAT and SAS: - %+Der/agat - %+Der/bealat - %+Der/bealjat - %+Der/borat - %+Der/bánat - %+Der/dábat - %+Der/dáfot - %+Der/dáhtot - %+Der/dásat - %+Der/dávddat - %+Der/dávttat - %+Der/fárddat - %+Der/gaccat - %+Der/garat - %+Der/gearddat - %+Der/geažat - %+Der/gielat - %+Der/gieđat - %+Der/gieškkat - %+Der/gilggat - %+Der/girjjat - %+Der/guvllot - %+Der/heakkat - %+Der/hájat - %+Der/hámat - %+Der/ivnnat - %+Der/jagat - %+Der/jearggat - %+Der/jienat - %+Der/jierpmat - %+Der/joccat - %+Der/juolggat - %+Der/juvllat - %+Der/kultuvrrat - %+Der/asat - %+Der/lahkat - %+Der/lahtot - %+Der/lanjat - %+Der/leakkat - %+Der/liikkat - %+Der/linjjat - %+Der/lunddot - %+Der/luottat - %+Der/láiddat - %+Der/mearkkat - %+Der/mielat - %+Der/mohkat - %+Der/muđot - %+Der/máhtat - %+Der/mállet - %+Der/namat - %+Der/nađat - %+Der/nierat - %+Der/njunat - %+Der/njálmmat - %+Der/nuo0lus - %+Der/náittot - %+Der/sorttat - %+Der/nálat - %+Der/oaivvat - %+Der/oasat - %+Der/áissat - %+Der/olat - %+Der/orddat - %+Der/pláhtat - %+Der/rattat - %+Der/ravddat - %+Der/rávnnjat - %+Der/seagat - %+Der/seaibbat - %+Der/seainnat - %+Der/siiddot - %+Der/soajat - %+Der/sogat - %+Der/soarttat - %+Der/stábat - %+Der/stávval - %+Der/suorat - %+Der/suorggat - %+Der/suorpmat - %+Der/suttat - %+Der/sánat - %+Der/sávnnjat - %+Der/šlájat - %+Der/uvssat - %+Der/uvssot - %+Der/varat - %+Der/vigat - %+Der/vuovttat - %+Der/vuđot - %+Der/váillat - %+Der/váimmot - %+Der/válddat - %+Der/váttot - %+Der/ávjjot - %+Der/čalmmat - %+Der/čeavžžat - %+Der/čiegat - %+Der/čielggat - %+Der/čoalat - %+Der/čoarvvat - %+Der/čuolmmat - %+Der/čuvddat - %+Der/hámat - %+Der/vahkkosaš - %+Der/čiegahas - %+Der/luohkálaš - %+Der/luohkkálaš - %+Der/viidosaš - %+Der/kilosaš - %+Der/diibmosaš - %+Der/dássásaš - %+Der/mánnosaš - %+Der/buddásaš - %+Der/geardásaš - %+Der/áigásaš - %+Der/ahkásaš - %+Der/jahkásaš - %+Der/beaivásaš - %+Der/vahkosaš - %+Der/mannosaš - %+Der/siessat - %+Der/veara + %+Der%/agat + %+Der%/bealat + %+Der%/bealjat + %+Der%/borat + %+Der%/bánat + %+Der%/dábat + %+Der%/dáfot + %+Der%/dáhtot + %+Der%/dásat + %+Der%/dávddat + %+Der%/dávttat + %+Der%/fárddat + %+Der%/gaccat + %+Der%/garat + %+Der%/gearddat + %+Der%/geažat + %+Der%/gielat + %+Der%/gieđat + %+Der%/gieškkat + %+Der%/gilggat + %+Der%/girjjat + %+Der%/guvllot + %+Der%/heakkat + %+Der%/hájat + %+Der%/hámat + %+Der%/ivnnat + %+Der%/jagat + %+Der%/jearggat + %+Der%/jienat + %+Der%/jierpmat + %+Der%/joccat + %+Der%/juolggat + %+Der%/juvllat + %+Der%/kultuvrrat + %+Der%/asat + %+Der%/lahkat + %+Der%/lahtot + %+Der%/lanjat + %+Der%/leakkat + %+Der%/liikkat + %+Der%/linjjat + %+Der%/lunddot + %+Der%/luottat + %+Der%/láiddat + %+Der%/mearkkat + %+Der%/mielat + %+Der%/mohkat + %+Der%/muđot + %+Der%/máhtat + %+Der%/mállet + %+Der%/namat + %+Der%/nađat + %+Der%/nierat + %+Der%/njunat + %+Der%/njálmmat + %+Der%/nuo0lus + %+Der%/náittot + %+Der%/sorttat + %+Der%/nálat + %+Der%/oaivvat + %+Der%/oasat + %+Der%/áissat + %+Der%/olat + %+Der%/orddat + %+Der%/pláhtat + %+Der%/rattat + %+Der%/ravddat + %+Der%/rávnnjat + %+Der%/seagat + %+Der%/seaibbat + %+Der%/seainnat + %+Der%/siiddot + %+Der%/soajat + %+Der%/sogat + %+Der%/soarttat + %+Der%/stábat + %+Der%/stávval + %+Der%/suorat + %+Der%/suorggat + %+Der%/suorpmat + %+Der%/suttat + %+Der%/sánat + %+Der%/sávnnjat + %+Der%/šlájat + %+Der%/uvssat + %+Der%/uvssot + %+Der%/varat + %+Der%/vigat + %+Der%/vuovttat + %+Der%/vuđot + %+Der%/váillat + %+Der%/váimmot + %+Der%/válddat + %+Der%/váttot + %+Der%/ávjjot + %+Der%/čalmmat + %+Der%/čeavžžat + %+Der%/čiegat + %+Der%/čielggat + %+Der%/čoalat + %+Der%/čoarvvat + %+Der%/čuolmmat + %+Der%/čuvddat + %+Der%/hámat + %+Der%/vahkkosaš + %+Der%/čiegahas + %+Der%/luohkálaš + %+Der%/luohkkálaš + %+Der%/viidosaš + %+Der%/kilosaš + %+Der%/diibmosaš + %+Der%/dássásaš + %+Der%/mánnosaš + %+Der%/buddásaš + %+Der%/geardásaš + %+Der%/áigásaš + %+Der%/ahkásaš + %+Der%/jahkásaš + %+Der%/beaivásaš + %+Der%/vahkosaš + %+Der%/mannosaš + %+Der%/siessat + %+Der%/veara ; @@ -455,59 +455,59 @@ Derivation = ! Keep consistent with tags in sme-lex.txt:123..184 ! 1 2 3 POS type %+Der1 %+Der2 %+Der3 ! Der#1 - %+Der/t ! NN - %+Der/ár !ACRO>N - %+Der/Dimin ! NN (was: Der/aš & Der/š) - %+Der/laš ! NA - %+Der/meahttun ! VA - %+Der/d ! VV - %+Der/h ! VV - -hit/Causative - %+Der/huhtti ! VV - %+Der/j ! VV - %+Der/l ! VV - %+Der/st ! VV - %+Der/las ! VA * %+Der1%+Der2 - can only combine with Der3 - %+Der/heapmi ! NA * %+Der1%+Der2 - can only combine with Der3 - %+Der/lágan ! AA * %+Der1%+Der2 - can only combine with Der3 - %+Der/lágán ! AA * %+Der1%+Der2 - can only combine with Der3 - %+Der/halla ! VV * %+Der1%+Der2 - can only combine with Der3 - %+Der/huvva ! VV * %+Der1%+Der2 - can only combine with Der3 - %+Der/stuvva ! VV * %+Der1%+Der2 - can only combine with Der3 + %+Der%/t ! NN + %+Der%/ár !ACRO>N + %+Der%/Dimin ! NN (was: Der/aš & Der/š) + %+Der%/laš ! NA + %+Der%/meahttun ! VA + %+Der%/d ! VV + %+Der%/h ! VV - -hit/Causative + %+Der%/huhtti ! VV + %+Der%/j ! VV + %+Der%/l ! VV + %+Der%/st ! VV + %+Der%/las ! VA * %+Der1%+Der2 - can only combine with Der3 + %+Der%/heapmi ! NA * %+Der1%+Der2 - can only combine with Der3 + %+Der%/lágan ! AA * %+Der1%+Der2 - can only combine with Der3 + %+Der%/lágán ! AA * %+Der1%+Der2 - can only combine with Der3 + %+Der%/halla ! VV * %+Der1%+Der2 - can only combine with Der3 + %+Der%/huvva ! VV * %+Der1%+Der2 - can only combine with Der3 + %+Der%/stuvva ! VV * %+Der1%+Der2 - can only combine with Der3 ! Der#2 %+Actor - %+Der/Dimin ! NN - %+Der/eapmi ! VN - %+Der/adda ! VV - %+Der/ahtti ! VV - %+Der/alla ! VV - %+Der/asti ! VV - %+Der/at ! QA ! check this! - %+Der/easti ! VV - %+Der/gielat ! QA - %+Der/jagáš ! QA - %+Der/lágaš ! QA - %+Der/lágáš ! QA ! - check this! - %+Der/lágan ! AA ! Check these! - %+Der/lágán ! AA ! Check these! - %+Der/PassL ! VV - long passive + %+Der%/Dimin ! NN + %+Der%/eapmi ! VN + %+Der%/adda ! VV + %+Der%/ahtti ! VV + %+Der%/alla ! VV + %+Der%/asti ! VV + %+Der%/at ! QA ! check this! + %+Der%/easti ! VV + %+Der%/gielat ! QA + %+Der%/jagáš ! QA + %+Der%/lágaš ! QA + %+Der%/lágáš ! QA ! - check this! + %+Der%/lágan ! AA ! Check these! + %+Der%/lágán ! AA ! Check these! + %+Der%/PassL ! VV - long passive ! Der#3 - %+Der/amoš ! VN - %+Der/eamoš ! VN - %+Der/geahtes ! VA - %+Der/goahti ! VV - %+Der/muš ! VN - %+Der/supmi ! VN - %+Der/n ! VN - %+Der/vuohta ! AN - %+Der/upmi ! VN + %+Der%/amoš ! VN + %+Der%/eamoš ! VN + %+Der%/geahtes ! VA + %+Der%/goahti ! VV + %+Der%/muš ! VN + %+Der%/supmi ! VN + %+Der%/n ! VN + %+Der%/vuohta ! AN + %+Der%/upmi ! VN ! Der#other ! Other/unclassified derivations, can appear in all positions: - %+Der/veara ! NA# - %+Der/viđá ! NA# - !%+Der/viđi ! NA# outcommented, since we have the noun vih0ti, gen:viđi + %+Der%/veara ! NA# + %+Der%/viđá ! NA# + !%+Der%/viđi ! NA# outcommented, since we have the noun vih0ti, gen:viđi - %+Der/PassS ! VV - short passive + %+Der%/PassS ! VV - short passive ; @@ -518,19 +518,19 @@ Rules "Delete Use-tags etc. that we don't use" -Useless:0 <=> _ ; +U:0 <=> _ ; where U in Useless; !! Removing tags with hfst-substitute will give duplicate analyses, so !! we do it here instead. "Only allow those derivations that survive bidix/transfer" -UnhandledDerivation /<= _ ; +U /<= _ ; where U in UnhandledDerivation; !! Fail if analysis contains derivations that are not handled in !! bidix/transfer. "No derivations of derivations" -Derivation /<= Derivation+ PartOfSpeech+ [ %+TV | %+IV | [] ] _ ; +D /<= Derivation+ PartOfSpeech+ [ %+TV | %+IV | [] ] _ ; where D in Derivation; !! Although Der/foo and Der/bar might be handled by bidix/transfer, !! Der/foo N Der/bar isn't necessarily. @@ -543,8 +543,8 @@ Derivation /<= Derivation+ PartOfSpeech+ [ %+TV | %+IV | [] ] _ ; !! PartOfSpeech tag in those contexts where the double derivation has !! bidix entries: "Allow +V+Der2+Der/PassL+V+Der3+Der/n+N (removing inner PoS tag)" -%+V:0 <=> %+V Transitivity %+Der2 %+Der/PassL _ %+Der3 %+Der/n %+N ; +%+V:0 <=> %+V Transitivity %+Der2 %+Der%/PassL _ %+Der3 %+Der%/n %+N ; "Allow +V+Der1+Der/j+V+Der2+Der/PassL+V (removing inner PoS tag)" -%+V:0 <=> %+V Transitivity %+Der1 %+Der/j _ %+Der2 %+Der/PassL %+V ; +%+V:0 <=> %+V Transitivity %+Der1 %+Der%/j _ %+Der2 %+Der%/PassL %+V ;