diff --git a/src/fst/Makefile.am b/src/fst/Makefile.am index 20b6481a..fd2302df 100644 --- a/src/fst/Makefile.am +++ b/src/fst/Makefile.am @@ -552,6 +552,7 @@ generator-raw-gt-desc.simple.hfst: generator-raw-gt-desc.simple.weightless.hfst | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mus' -a 10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ng' -a 10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/v' -a 10 -A \ + | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/vus' -a 10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tav' -a 10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nud' -a 10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mata' -a 10 -A \ @@ -625,7 +626,7 @@ generator-raw-gt-desc.simple.hfst: generator-raw-gt-desc.simple.weightless.hfst | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Neg' -a 1 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sup' -a 0 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Inf' -a 0 -A \ - | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ger' -a 0 -A \ + | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ger' -a 5 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prc' -a 0 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Foc/gi' -a 0 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Emph' -a 0 -A \ @@ -748,6 +749,7 @@ guesser-raw.weighted.hfst: guesser-raw.simple.hfst | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mus' -a -10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/ng' -a -10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/v' -a -10 -A \ + | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/vus' -a 10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/tav' -a -10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/nud' -a -10 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Der/mata' -a -10 -A \ @@ -821,7 +823,7 @@ guesser-raw.weighted.hfst: guesser-raw.simple.hfst | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Neg' -a 1 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Sup' -a 0 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Inf' -a 0 -A \ - | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ger' -a 0 -A \ + | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Ger' -a 5 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Prc' -a 0 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Foc/gi' -a 0 -A \ | $(HFST_REWEIGHT) $(MORE_VERBOSITY) $(HFST_FLAGS) -S '+Emph' -a 0 -A \ diff --git a/src/fst/filters/block-derivations.est.xfscript b/src/fst/filters/block-derivations.est.xfscript index 7179e321..7d0a793e 100644 --- a/src/fst/filters/block-derivations.est.xfscript +++ b/src/fst/filters/block-derivations.est.xfscript @@ -29,7 +29,8 @@ define BadDer1 [ [~[?* ("+Guess") [["+N"] | ["+Num" "+Card"]]] "+Der/kond"] | [~[?* [[ ("+Guess") "+A"] | [("+Guess") "+A" "+Comp"] | [("+Guess") "+A" "+Superl"] | [ ZZ {tama} ("+Guess") "+V"] | [ ZZ {dama} ("+Guess") "+V"] | - [ {hooldama} ("+Guess") "+V"] | [ {soendama} ("+Guess") "+V"] | [ {tsema} ("+Guess") "+V"] | [ ? ? ? {lema} ("+Guess") "+V"]]] "+Der/us"] | + [ {hooldama} ("+Guess") "+V"] | [ {soendama} ("+Guess") "+V"] | + [ {tsema} ("+Guess") "+V"] | [ ? ? ? {lema} ("+Guess") "+V"]]] "+Der/us"] | [~[?* [[ ZZ {tama} ("+Guess") "+V"] | [ ZZ {dama} ("+Guess") "+V"]]] "+Der/is"] | [~[?* [{uma} | {tellima}] ("+Guess") "+V"] "+Der/mus"] | # devalveeruma - devalveerumus [~[?* {eerima} ("+Guess") "+V"] "+Der/ng"] | # devalveerima - devalveering @@ -65,7 +66,10 @@ define BadDer2 [ # some suffix sequences are actually bad define BadDer3 [ - [[ "+Der/nu" | "+Der/us" | "+Der/lane" ] "+N" "+Der/ti"] + [[ "+Der/nu" | "+Der/us" | "+Der/lane" ] "+N" "+Der/ti"] | + [[ "+Der/v" | "+Der/tav" | "+Der/nud" | "+Der/matu" | "+Der/tamatu" | "+Der/tu" ] "+A" "+Der/sti"] | + [ "+Der/v" "+A" "+Der/us"] | + [[ "+Der/lik" | "+Der/line" ] "+A" "+Der/sti"] ] ; # some words are not suitable for certain suffixes diff --git a/src/fst/morphology/affixes/verbs.lexc b/src/fst/morphology/affixes/verbs.lexc index b082b38e..cc7782cf 100644 --- a/src/fst/morphology/affixes/verbs.lexc +++ b/src/fst/morphology/affixes/verbs.lexc @@ -414,7 +414,8 @@ LEXICON SUPINE_V @R.Part.One@ SUPINE_MA_FORMS ; ! These can be 1st part of a compound : MINE_DERIVATION ; ! elamine : JA_DERIVATION ; ! elaja, õpetaja, õpetajanna, õpetajake, õpetajalik, ... - : V_DERIVATION ; ! õpetav, õpetavalt, õpetavus, õpetavam, ... + : V_DERIVATION ; ! õpetav, õpetavalt, õpetavam, ... + : VUS_DERIVATION ; ! õpetavus : MATU_DERIVATION ; ! segamatu, segamatult, segamatus, segamatum, ... : MATA_DERIVATION ; ! segamata : MUS_DERIVATION ; ! küllastumus, promoveerumus etc @@ -461,11 +462,12 @@ LEXICON INFINITIVE ! infinitive, gerund (des-form) :%>D7 A_INFINITIVE ; -LEXICON A_INFINITIVE ! common part of an infinitive, gerund (des-form); - ! also alone in a subparadigm of a few old words +LEXICON A_INFINITIVE ! common part of an infinitive, gerund (des-form); + ! also alone in a subparadigm of a few old words - @R.Part.One@ INF_FORMS ; ! This can be 1st part of a compound + @R.Part.One@ INF_FORMS ; ! This can be 1st part of a compound +Ger:es NO_COMPOUND ; + GER_DERIVATION ; ! pool+magades, rõht+kirjutades; LEXICON INF_FORMS +Inf:a GI ; @@ -508,6 +510,14 @@ LEXICON PL3 LEXICON NO_COMPOUND ! this form cannot participate in a compound word @R.Part.One@@P.Part.Bad@ GI ; +! gerund is an exception in compounding and derivation ... +LEXICON GER_DERIVATION ! 2nd part of a compound + @R.Part.Two@@P.Part.Bad@@R.POS.Pref@ GER_DERIVATION_SUF ; ! only 2 components: prefix + verb + +LEXICON GER_DERIVATION_SUF ! A derived adverb (but still tagged as +Verb+Ger) + +Ger:es GI ; + + ! paradigmatic derivation, i.e. very regular ! @R.Part.One@ means that this is not a latter part of a compound ! @R.Case.Par@ and @R.Case.Gen@ mean that this is a latter part of a compound @@ -516,8 +526,10 @@ LEXICON NO_COMPOUND ! this form cannot participate in a compound word LEXICON NU_DERIVATION @R.Part.One@@P.POS.N@ NU_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Par@@P.POS.N@ NU_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.N@ NU_DERIVATION_SUF ; @R.Part.One@@P.POS.A@ NUD_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Par@@P.POS.A@ NUD_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.N@ NUD_DERIVATION_SUF ; LEXICON NU_DERIVATION_SUF +Der/nu+N:»nu AASTA ; ! derivation: elanu etc @@ -529,6 +541,7 @@ LEXICON NUD_DERIVATION_SUF LEXICON TU_DERIVATION ! NB! the morpheme border + d/t is assigned before this lexicon @R.Part.One@@P.POS.A@ TU_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Gen@@P.POS.A@ TU_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.A@ TU_DERIVATION_SUF ; LEXICON TU_DERIVATION_SUF ! NB! the morpheme border + d/t is assigned before this lexicon +Der/tu+A:u AASTA ; ! derivation: elatu etc @@ -537,6 +550,7 @@ LEXICON TU_DERIVATION_SUF ! NB! the morpheme border + d/t is assigned before LEXICON MATA_DERIVATION @R.Part.One@@P.POS.A@ MATA_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Par@@P.POS.A@ MATA_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.A@ MATA_DERIVATION_SUF ; LEXICON MATA_DERIVATION_SUF +Der/mata+A:»mata GI ; ! elamata @@ -544,6 +558,7 @@ LEXICON MATA_DERIVATION_SUF LEXICON MATU_DERIVATION @R.Part.One@@P.POS.N@ MATU_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Gen@@P.POS.N@ MATU_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.N@ MATU_DERIVATION_SUF ; LEXICON MATU_DERIVATION_SUF +Der/matu+A:»matu AASTA ; ! segamatu etc @@ -551,6 +566,7 @@ LEXICON MATU_DERIVATION_SUF LEXICON TAMATU_DERIVATION ! NB! the morpheme border + d/t is assigned before this lexicon @R.Part.One@@P.POS.A@ TAMATU_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Par@@P.POS.A@ TAMATU_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.A@ TAMATU_DERIVATION_SUF ; LEXICON TAMATU_DERIVATION_SUF ! NB! the morpheme border + d/t is assigned before this lexicon +Der/tamatu+A:amatu AASTA ; ! segatamatu etc @@ -558,13 +574,22 @@ LEXICON TAMATU_DERIVATION_SUF ! NB! the morpheme border + d/t is assigned be LEXICON V_DERIVATION @R.Part.One@@P.POS.A@ V_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Par@@P.POS.A@ V_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.A@ V_DERIVATION_SUF ; LEXICON V_DERIVATION_SUF +Der/v+A:»v MAGUS ; ! elav, elava etc +LEXICON VUS_DERIVATION + @R.Part.One@@P.POS.N@ VUS_DERIVATION_SUF ; ! no previous part; no restrictions + @R.Case.Gen@@P.POS.N@@P.Der.us@@C.NeedNoun@ VUS_DERIVATION_SUF ; + +LEXICON VUS_DERIVATION_SUF + +Der/vus+N:»vus OLULINE ; ! elavus, ... + LEXICON TAV_DERIVATION ! NB! the morpheme border + d/t is assigned before this lexicon @R.Part.One@@P.POS.A@ TAV_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Par@@P.POS.A@ TAV_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.A@ TAV_DERIVATION_SUF ; LEXICON TAV_DERIVATION_SUF ! NB! the morpheme border + d/t is assigned before this lexicon +Der/tav+A:av MAGUS ; ! elatav, elatava etc @@ -572,6 +597,7 @@ LEXICON TAV_DERIVATION_SUF ! NB! the morpheme border + d/t is assigned befo LEXICON MINE_DERIVATION @R.Part.One@@P.POS.N@ MINE_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Gen@@P.POS.N@ MINE_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.N@ MINE_DERIVATION_SUF ; LEXICON MINE_DERIVATION_SUF +Der/mine+N:»mine OLULINE ; ! elamine @@ -579,6 +605,7 @@ LEXICON MINE_DERIVATION_SUF LEXICON JA_DERIVATION @R.Part.One@@P.POS.N@ JA_DERIVATION_SUF ; ! no previous part; no restrictions @R.Case.Gen@@P.POS.N@ JA_DERIVATION_SUF ; + @R.Case.Sem@@P.POS.N@ JA_DERIVATION_SUF ; LEXICON JA_DERIVATION_SUF +Der/ja+N:»ja AASTA ; ! elaja, elajat etc @@ -593,7 +620,7 @@ LEXICON IS_DERIVATION_SUF LEXICON MUS_DERIVATION @R.Part.One@@P.POS.N@ MUS_DERIVATION_SUF ; ! no previous part; no restrictions - @R.Case.Gen@@P.POS.N@ MUS_DERIVATION_SUF ; + @R.Case.Gen@@P.POS.N@@P.Der.us@@C.NeedNoun@ MUS_DERIVATION_SUF ; LEXICON MUS_DERIVATION_SUF +Der/mus+N:»mus OLULINE ; ! promoveerumus, elamus, ... @@ -601,6 +628,7 @@ LEXICON MUS_DERIVATION_SUF LEXICON US_DERIVATION_VERB ! us also changes A -> N; directing to that continuation class @R.Part.One@ US_DERIVATION ; ! no previous part; no restrictions @R.Case.Gen@ US_DERIVATION ; + @R.Case.Sem@ US_DERIVATION ; LEXICON NG_DERIVATION @R.Part.One@@P.POS.N@ NG_DERIVATION_SUF ; ! no previous part; no restrictions diff --git a/src/fst/morphology/root.lexc b/src/fst/morphology/root.lexc index 0a1c8764..a6af66f0 100644 --- a/src/fst/morphology/root.lexc +++ b/src/fst/morphology/root.lexc @@ -456,6 +456,9 @@ Multichar_Symbols +Der/mus !!= * `@CODE@` !!€ küllastumus: küllastuma+V+Der/mus+N+Sg+Nom ++Der/vus !!= * `@CODE@` +!!€ elavus: elama+V+Der/vus+N+Sg+Nom + +Der/ng !!= * `@CODE@` !!€ devalveering: devalveerima+V+Der/ng+N+Sg+Nom @@ -713,9 +716,9 @@ Multichar_Symbols @D.Stem.topelt@ @P.Stem.Guessed@ ! stem is guessed, it is not from the lexicon @R.Stem.Guessed@ - @D.Stem.Guessed@ ! if a lemma has it, then this lemma is restricted in its ability to be the last part of a compound + @D.Stem.Guessed@ ! if a word has it, then this word cannot follow a guessed stem @C.Stem@ - + !! A special condition that is used for filtering derivations and compounds @P.NeedAdj.On@ ! +A or +Der/A @R.NeedAdj@ @@ -1224,16 +1227,17 @@ LEXICON Latter !!= * `@CODE@` the latter part of a compound @R.POS.ACRMinus@ AnyLatterVerb ; @R.POS.Pref@ AnyLatterVerb ; ! ebalugemine + @R.POS.Pref@ Verbs ; ! gerund: poolmagades @R.POS.N@@R.Case.Gen@ LatterVerb ; ! lauajooksja @R.POS.N@@R.Case.Par@ LatterVerb ; ! laudajooksnud @R.POS.N@@R.Case.Nom@@R.Stem.Nom@ AnyLatterVerb ; ! map orig. case flag for derivation needs - @R.POS.N@@R.Case.Sem@ AnyLatterVerb ; ! map orig. case flag for derivation needs + @R.POS.N@@R.Case.Sem@ LatterVerb ; ! @R.POS.N@@R.Case.Short@ AnyLatterVerb ; ! investeerimisnõustamine @R.POS.A@@R.Case.Short@ AnyLatterVerb ; ! map orig. case flag for derivation needs - @R.POS.A@@R.Case.Sem@ AnyLatterVerb ; ! map orig. case flag for derivation needs + @R.POS.A@@R.Case.Sem@ LatterVerb ; ! @R.POS.AComp@@R.Case.Sem@ AnyLatterVerb ; ! map orig. case flag for derivation needs @R.POS.GA@@P.Case.Gen@ LatterVerb ; ! eestivihkaja @@ -1253,7 +1257,7 @@ LEXICON Latter !!= * `@CODE@` the latter part of a compound ! NB only some pronouns ! @R.POS.Pron@@R.Case.Gen@ LatterVerb ; @R.POS.Pron@@R.Case.Par@ LatterVerb ; - @R.POS.Pron@@R.Case.Sem@ AnyLatterVerb ; + @R.POS.Pron@@R.Case.Sem@ LatterVerb ; @R.POS.V@@D.Case@ AnyLatterVerb ; ! infinitive @R.POS.V@@R.Case.Sem@ AnyLatterVerb ; ! -ma, -mas etc diff --git a/src/fst/morphology/stems/adverbs.lexc b/src/fst/morphology/stems/adverbs.lexc index 97e8ac9a..6cb42739 100644 --- a/src/fst/morphology/stems/adverbs.lexc +++ b/src/fst/morphology/stems/adverbs.lexc @@ -2,8 +2,10 @@ ! CompoundingAdverbs and NonCompoundingAdverbs ; + LEXICON CompoundingAdverbs + @P.Stem.topelt@järel+Adv:@P.Stem.topelt@järel GI "weight: 5 " ; @D.Stem.Guessed@@P.Stem.topelt@koos+Adv:@D.Stem.Guessed@@P.Stem.topelt@k˘oos GI "weight: 4 " ; @P.Stem.topelt@kõrval+Adv:@P.Stem.topelt@kõrval GI "weight: 5 " ; @@ -449,11 +451,10 @@ pooleli+Adv:pooleli GI "weight: 7 " ; poolt+Adv:p˘oolt GI "weight: 4 " ; praokil+Adv:pr˘aokil GI "weight: 11 " ; praokile+Adv:pr˘aokile GI "weight: 11 " ; -@D.Stem.Guessed@@P.Stem.vähe@puht+Adv:@D.Stem.Guessed@@P.Stem.vähe@p˘uht GI "weight: 9 " ; -@D.Stem.Guessed@@P.Stem.vähe@vähe+Adv:@D.Stem.Guessed@@P.Stem.vähe@vähe GI "weight: 5 " ; puhevil+Adv:puhevil GI "weight: 11 " ; puhevile+Adv:puhevile GI "weight: 11 " ; puhta+Adv:p˘uhta GI "weight: 11 " ; +@D.Stem.Guessed@@P.Stem.vähe@puht+Adv:@D.Stem.Guessed@@P.Stem.vähe@p˘uht GI "weight: 9 " ; puhvi+Adv:p˘uhvi GI "weight: 11 " ; pungi+Adv:p˘ungi GI "weight: 10 " ; pungil+Adv:pungil GI "weight: 9 " ; @@ -714,6 +715,7 @@ võlgu+Adv:v˘õlgu GI "weight: 8 " ; võõriti+Adv:võõriti GI "weight: 11 " ; võõrsil+Adv:v˘õõrsil GI "weight: 8 " ; võõrsile+Adv:v˘õõrsile GI "weight: 11 " ; +@D.Stem.Guessed@@P.Stem.vähe@vähe+Adv:@D.Stem.Guessed@@P.Stem.vähe@vähe GI "weight: 5 " ; vähem+Adv:vähem GI "weight: 5 " ; välja+Adv:v˘älʲja GI "weight: 3 " ; vääri+Adv:vääri GI "weight: 11 " ; @@ -752,8 +754,10 @@ väärt+Adv:v˘äärt GI "weight: 7 " ; @D.Stem.Guessed@ülle+Adv:@D.Stem.Guessed@˘ülle GI "weight: 10 " ; ümber+Adv:˘ümber GI "weight: 5 " ; + LEXICON NonCompoundingAdverbs + aasta-aastalt+Adv:˘aasta-˘aastalt GI "weight: 9 " ; aastaringselt+Adv:˘aasta#r˘ingselt GI "weight: 10 " ; aastates+Adv:˘aastates GI "weight: 11 " ; diff --git a/src/fst/morphology/stems/prefixes.lexc b/src/fst/morphology/stems/prefixes.lexc index f9d60e67..6ca0cd13 100644 --- a/src/fst/morphology/stems/prefixes.lexc +++ b/src/fst/morphology/stems/prefixes.lexc @@ -97,6 +97,7 @@ piko+Pref:piko»- # ; pisi+Pref:pisi»- # ; polaar+Pref:pol˘aar»- # ; poliit+Pref:pol˘iit»- # ; +pool+Pref:p˘ool»- # ; pop+Pref:p˘op»- # ; pseudo+Pref:pseudo»- # ; psühho+Pref:psühho»- # ; diff --git a/src/import/DB_EMWV_2008_adverbs.sed b/src/import/DB_EMWV_2008_adverbs.sed new file mode 100644 index 00000000..474cbba0 --- /dev/null +++ b/src/import/DB_EMWV_2008_adverbs.sed @@ -0,0 +1,258 @@ +s/@alal+/__DB&/ +s/@alasti+/__DB&/ +s/@alla+/__DB&/ +s/@alles+/__DB&/ +s/@alt+/__DB&/ +s/@ammuli+/__DB&/ +s/@asemele+/__DB&/ +s/@avali+/__DB&/ +s/@avalikuks+/__DB&/ +s/@edasi+/__DB&/ +s/@eemal+/__DB&/ +s/@eemale+/__DB&/ +s/@eemalt+/__DB&/ +s/@ees+/__DB&/ +s/@eest+/__DB&/ +s/@endamisi+/__DB&/ +s/@eraldi+/__DB&/ +s/@esile+/__DB&/ +s/@ette+/__DB&/ +s/@harali+/__DB&/ +s/@harki+/__DB&/ +s/@hiljaks+/__DB&/ +s/@hukka+/__DB&/ +s/@huupi+/__DB&/ +s/@hästi+/__DB&/ +s/@igavesti+/__DB&/ +s/@ilma+/__DB&/ +s/@ilmsiks+/__DB&/ +s/@istukile+/__DB&/ +s/@istuli+/__DB&/ +s/@jala+/__DB&/ +s/@jalgsi+/__DB&/ +s/@jaole+/__DB&/ +s/@jokki+/__DB&/ +s/@jommi+/__DB&/ +s/@juurde+/__DB&/ +s/@juures+/__DB&/ +s/@jõlli+/__DB&/ +s/@jälil+/__DB&/ +s/@jälile+/__DB&/ +s/@järel+/__DB&/ +s/@järele+/__DB&/ +s/@järgi+/__DB&/ +s/@kaardu+/__DB&/ +s/@kaasa+/__DB&/ +s/@kaasas+/__DB&/ +s/@kahasse+/__DB&/ +s/@kaksiti+/__DB&/ +s/@kaldu+/__DB&/ +s/@kallale+/__DB&/ +s/@kaotsi+/__DB&/ +s/@katki+/__DB&/ +s/@kauaks+/__DB&/ +s/@kaugele+/__DB&/ +s/@kaugemale+/__DB&/ +s/@kiiva+/__DB&/ +s/@kikki+/__DB&/ +s/@kinni+/__DB&/ +s/@kipra+/__DB&/ +s/@kissi+/__DB&/ +s/@kiuste+/__DB&/ +s/@kohal+/__DB&/ +s/@kohevile+/__DB&/ +s/@kokku+/__DB&/ +s/@kooldu+/__DB&/ +s/@koomale+/__DB&/ +s/@koos+/__DB&/ +s/@krussi+/__DB&/ +s/@krässu+/__DB&/ +s/@kukil+/__DB&/ +s/@kukile+/__DB&/ +s/@kummi+/__DB&/ +s/@kummuli+/__DB&/ +s/@kurdu+/__DB&/ +s/@kuuti+/__DB&/ +s/@kõginal+/__DB&/ +s/@kõhuli+/__DB&/ +s/@kõrval+/__DB&/ +s/@kõrvale+/__DB&/ +s/@kõrvalt+/__DB&/ +s/@kõrvu+/__DB&/ +s/@kõverasse+/__DB&/ +s/@kõõrdi+/__DB&/ +s/@kõõriti+/__DB&/ +s/@kähku+/__DB&/ +s/@käibele+/__DB&/ +s/@käkru+/__DB&/ +s/@kämpu+/__DB&/ +s/@kängu+/__DB&/ +s/@käpuli+/__DB&/ +s/@könksu+/__DB&/ +s/@könni+/__DB&/ +s/@kössi+/__DB&/ +s/@külge+/__DB&/ +s/@küliti+/__DB&/ +s/@küljes+/__DB&/ +s/@lagedale+/__DB&/ +s/@laginal+/__DB&/ +s/@lahinal+/__DB&/ +s/@lahku+/__DB&/ +s/@lahti+/__DB&/ +s/@lahus+/__DB&/ +s/@laiali+/__DB&/ +s/@laokile+/__DB&/ +s/@ligemale+/__DB&/ +s/@ligi+/__DB&/ +s/@ligidale+/__DB&/ +s/@liiga+/__DB&/ +s/@liikvele+/__DB&/ +s/@lonti+/__DB&/ +s/@looja+/__DB&/ +s/@loppi+/__DB&/ +s/@lõhki+/__DB&/ +s/@läbi+/__DB&/ +s/@lähedale+/__DB&/ +s/@lähemale+/__DB&/ +s/@lähestikku+/__DB&/ +s/@lömmi+/__DB&/ +s/@lörri+/__DB&/ +s/@lössi+/__DB&/ +s/@maha+/__DB&/ +s/@maoli+/__DB&/ +s/@marraskile+/__DB&/ +s/@meelega+/__DB&/ +s/@mikski+/__DB&/ +s/@mitte+/__DB&/ +s/@mossi+/__DB&/ +s/@mujale+/__DB&/ +s/@munele+/__DB&/ +s/@mõistu+/__DB&/ +s/@mõlki+/__DB&/ +s/@mööda+/__DB&/ +s/@müginal+/__DB&/ +s/@mürinal+/__DB&/ +s/@naerukile+/__DB&/ +s/@nihu+/__DB&/ +s/@norgu+/__DB&/ +s/@nurja+/__DB&/ +s/@nähtavale+/__DB&/ +s/@nässu+/__DB&/ +s/@otse+/__DB&/ +s/@pahinal+/__DB&/ +s/@pahuksisse+/__DB&/ +s/@paigale+/__DB&/ +s/@paigalt+/__DB&/ +s/@palju+/__DB&/ +s/@paljuks+/__DB&/ +s/@peale+/__DB&/ +s/@pealt+/__DB&/ +s/@pihta+/__DB&/ +s/@pikali+/__DB&/ +s/@pinevile+/__DB&/ +s/@pingule+/__DB&/ +s/@pladinal+/__DB&/ +s/@plaginal+/__DB&/ +s/@pommi+/__DB&/ +s/@pooleli+/__DB&/ +s/@puhevile+/__DB&/ +s/@purju+/__DB&/ +s/@putku+/__DB&/ +s/@puudu+/__DB&/ +s/@põlvili+/__DB&/ +s/@pärale+/__DB&/ +s/@pärani+/__DB&/ +s/@päriseks+/__DB&/ +s/@püksata+/__DB&/ +s/@püsti+/__DB&/ +s/@raagu+/__DB&/ +s/@ratsa+/__DB&/ +s/@rihti+/__DB&/ +s/@rikki+/__DB&/ +s/@ringi+/__DB&/ +s/@ripakile+/__DB&/ +s/@rippu+/__DB&/ +s/@ristamisi+/__DB&/ +s/@sakri+/__DB&/ +s/@salajas+/__DB&/ +s/@sarvist+/__DB&/ +s/@sassi+/__DB&/ +s/@sees+/__DB&/ +s/@segamini+/__DB&/ +s/@segi+/__DB&/ +s/@selili+/__DB&/ +s/@seliti+/__DB&/ +s/@silmitsi+/__DB&/ +s/@sirgu+/__DB&/ +s/@siruli+/__DB&/ +s/@sisse+/__DB&/ +s/@soiku+/__DB&/ +s/@sosinal+/__DB&/ +s/@sõõna+/__DB&/ +s/@süüdi+/__DB&/ +s/@taas+/__DB&/ +s/@taga+/__DB&/ +s/@tagant+/__DB&/ +s/@tagasi+/__DB&/ +s/@taha+/__DB&/ +s/@takka+/__DB&/ +s/@tallel+/__DB&/ +s/@tallele+/__DB&/ +s/@tarvis+/__DB&/ +s/@tasa+/__DB&/ +s/@teisale+/__DB&/ +s/@teisiti+/__DB&/ +s/@tibamisi+/__DB&/ +s/@torti+/__DB&/ +s/@tuksi+/__DB&/ +s/@tuksu+/__DB&/ +s/@turki+/__DB&/ +s/@turri+/__DB&/ +s/@tuttu+/__DB&/ +s/@täis+/__DB&/ +s/@tönkamisi+/__DB&/ +s/@umbes+/__DB&/ +s/@ummuksisse+/__DB&/ +s/@unarule+/__DB&/ +s/@untsu+/__DB&/ +s/@uppi+/__DB&/ +s/@vahel+/__DB&/ +s/@vahele+/__DB&/ +s/@vahelt+/__DB&/ +s/@vaiki+/__DB&/ +s/@vait+/__DB&/ +s/@vaja+/__DB&/ +s/@vajaka+/__DB&/ +s/@vakka+/__DB&/ +s/@valla+/__DB&/ +s/@vallali+/__DB&/ +s/@valmis+/__DB&/ +s/@vargil+/__DB&/ +s/@varinal+/__DB&/ +s/@varjule+/__DB&/ +s/@vastakuti+/__DB&/ +s/@vastas+/__DB&/ +s/@vastastikku+/__DB&/ +s/@vastu+/__DB&/ +s/@vilksti+/__DB&/ +s/@viltu+/__DB&/ +s/@vussi+/__DB&/ +s/@võidu+/__DB&/ +s/@võõrsil+/__DB&/ +s/@võõrsile+/__DB&/ +s/@välja+/__DB&/ +s/@väljas+/__DB&/ +s/@õieli+/__DB&/ +s/@õitsele+/__DB&/ +s/@ära+/__DB&/ +s/@ärevile+/__DB&/ +s/@ühes+/__DB&/ +s/@ühte+/__DB&/ +s/@üksi+/__DB&/ +s/@ülal+/__DB&/ +s/@üle+/__DB&/ +s/@üles+/__DB&/ +s/@üleval+/__DB&/ +s/@ülle+/__DB&/ +s/@ült+/__DB&/ +s/@ümber+/__DB&/ diff --git a/src/import/fsgt2final.sh b/src/import/fsgt2final.sh index eed10c45..e0672d42 100755 --- a/src/import/fsgt2final.sh +++ b/src/import/fsgt2final.sh @@ -201,52 +201,37 @@ cat superlative_adjectives.tmp1 \ cat superlative_adjectives.tmp2 >> superlative_adjectives.protolexc - - # find short adverbs: # grep '^[^aeiouõäöü]*[aeiouõäöü]*[^aeiouõäöü][^aeiouõäöü][aeiouõäöü][aeiouõäöü]*[^aeiouõäöü]*[^aeiouõäöü]i*+[^#=]*$' -# lisaks (1), lisaks (2) +# adverbs need to be grouped according to their potential to partiocipate in compounding cat fs_gt.noninfl.tmp1 | grep '+Adv' \ | grep '\(^järel+\)\|\(^koos+\)\|\(^kõrval+\)\|\(^otse+\)\|\(^piki+\)\|\(^püsti+\)\|\(^ratsa+\)\|\(^taga+\)\|\(^topelt+\)\|\(^vallas+\)\|\(^vastas+\)\|\(^vastu+\)\|\(^üle+\)' \ > tmpadv.0 cat fs_gt.noninfl.tmp1 | grep '+Adv' \ | grep -v '\(^järel+\)\|\(^koos+\)\|\(^kõrval+\)\|\(^otse+\)\|\(^piki+\)\|\(^püsti+\)\|\(^ratsa+\)\|\(^taga+\)\|\(^topelt+\)\|\(^vallas+\)\|\(^vastas+\)\|\(^vastu+\)\|\(^üle+\)' \ +| sort -u \ > tmpadv.alg +#-------------- cat tmpadv.alg \ -| grep '\(^all+\)\|\(^alt+\)\|\(^eel+\)\|\(^ees+\)\|\(^ise+\)\|\(^jae+\)\|\(^oma+\)\|\(^pea+\)\|\(^ula+\)\|\(^õue+\)\|\(^ära+\)\|\(^üle+\)\|\(^....+\)\|\(^umbes+\)\|\(^....[^s]+[^#-]*$\)\|\(^...ks+[^#-]*$\)\|\(^...*li+[^#-]*$\)\|\(^...*il+[^#-]*$\)\|\(^...*ile+[^#-]*$\)\|\(^[^aeiouõäöü]*[aeiouõäöü][aeiouõäöü]*[^aeiouõäöü][^aeiouõäöü]*si+\)\|\(^hiljuti+\)\|\(^kaheti+\)\|\(^kolmeti+\)\|\(^kunagi+\)\|\(^mitmeti+\)\|\(^teisiti+\)\|\(^tükati+\)\|\(^võõriti+\)\|\(^uuesti+\)\|\(^valesti+\)' \ -| grep -v '\(^miks+\)\|\(^näos+\)\|\(^egas+\)\|\(^kuis+\)\|\(^siis+\)\|\(^teps+\)\|\(^aina+\)\|\(^aiva+\)\|\(^eele+\)\|\(^eelt+\)\|\(^ikka+\)\|\(^istu+\)\|\(^jalu+\)\|\(^jaol+\)\|\(^jokk+\)\|\(^juba+\)\|\(^just+\)\|\(^jõle+\)\|\(^jönt+\)\|\(^kohe+\)\|\(^kole+\)\|\(^kord+\)\|\(^kuhu+\)\|\(^kuna+\)\|\(^küll+\)\|\(^loga+\)\|\(^loha+\)\|\(^losa+\)\|\(^mant+\)\|\(^manu+\)\|\(^nagu+\)\|\(^nõka+\)\|\(^nõus+\)\|\(^nüüd+\)\|\(^olgu+\)\|\(^puha+\)\|\(^põsi+\)\|\(^päta+\)\|\(^seep+\)\|\(^seni+\)\|\(^siva+\)\|\(^sugu+\)\|\(^tuna+\)\|\(^täna+\)\|\(^töhe+\)\|\(^vaid+\)\|\(^vaja+\)\|\(^veel+\)\|\(^vist+\)\|\(^väga+\)\|\(^õige+\)\|\(^õkva+\)\|\(^ähmi+\)\|\(^äkki+\)\|\(^ängi+\)\|\(^äsja+\)\|\(^+ühti\)\|\(^üsna+\)\|\(^abiga+\)\|\(^eduga+\)\|\(^hulga+\)\|\(^jõuga+\)\|\(^liiga+\)\|\(^lõõga+\)\|\(^punga+\)\|\(^seega+\)\|\(^tõega+\)' \ -> tmpadv.1 - -# lisaks (3) -cat tmpadv.alg \ -| grep '\(^alasti+\)\|\(^alles+\)\|\(^edasi+\)\|\(^eemale+\)\|\(^eemalt+\)\|\(^eraldi+\)\|\(^halvasti+\)\|\(^juurde+\)\|\(^järele+\)\|\(^kaotsi+\)\|\(^kaugele+\)\|\(^kaugelt+\)\|\(^kergelt+\)\|\(^kergesti+\)\|\(^kindlaks+\)\|\(^klaariks+\)\|\(^käsitsi+\)\|\(^kõrgelt+\)\|\(^kõrval+\)\|\(^kõrvalt+\)\|\(^kõvaks+\)\|\(^kõrvuti+\)\|\(^külili+\)\|\(^laiali+\)\|\(^raskesti+\)\|\(^seni+\)\|\(^sisse+\)\|\(^tagant+\)\|\(^tagasi+\)\|\(^viimati+\)\|\(^võistu+\)\|\(^vääriti+\)\|\(^äsja+\)' \ ->> tmpadv.1 - -# NB! see loend olgu sama, mis lisaks (2) -cat tmpadv.alg \ -| grep '\(^miks+\)\|\(^näos+\)\|\(^egas+\)\|\(^kuis+\)\|\(^siis+\)\|\(^teps+\)\|\(^aina+\)\|\(^aiva+\)\|\(^eele+\)\|\(^eelt+\)\|\(^ikka+\)\|\(^istu+\)\|\(^jalu+\)\|\(^jaol+\)\|\(^jokk+\)\|\(^juba+\)\|\(^just+\)\|\(^jõle+\)\|\(^jönt+\)\|\(^kohe+\)\|\(^kole+\)\|\(^kord+\)\|\(^kuhu+\)\|\(^kuna+\)\|\(^küll+\)\|\(^loga+\)\|\(^loha+\)\|\(^losa+\)\|\(^mant+\)\|\(^manu+\)\|\(^nagu+\)\|\(^nõka+\)\|\(^nõus+\)\|\(^nüüd+\)\|\(^olgu+\)\|\(^puha+\)\|\(^põsi+\)\|\(^päta+\)\|\(^seep+\)\|\(^seni+\)\|\(^siva+\)\|\(^sugu+\)\|\(^tuna+\)\|\(^täna+\)\|\(^töhe+\)\|\(^vaid+\)\|\(^vaja+\)\|\(^veel+\)\|\(^vist+\)\|\(^väga+\)\|\(^õige+\)\|\(^õkva+\)\|\(^ähmi+\)\|\(^äkki+\)\|\(^ängi+\)\|\(^äsja+\)\|\(^+ühti\)\|\(^üsna+\)\|\(^abiga+\)\|\(^eduga+\)\|\(^hulga+\)\|\(^jõuga+\)\|\(^liiga+\)\|\(^lõõga+\)\|\(^punga+\)\|\(^seega+\)\|\(^tõega+\)' \ -> tmpadv.2 +| grep '\(^all+\)\|\(^alt+\)\|\(^eel+\)\|\(^ees+\)\|\(^ise+\)\|\(^jae+\)\|\(^oma+\)\|\(^pea+\)\|\(^ula+\)\|\(^õue+\)\|\(^ära+\)\|\(^üle+\)\|\(^umbes+\)\|\(^hiljuti+\)\|\(^kaheti+\)\|\(^kolmeti+\)\|\(^kunagi+\)\|\(^mitmeti+\)\|\(^teisiti+\)\|\(^tükati+\)\|\(^võõriti+\)\|\(^uuesti+\)\|\(^valesti+\)\|\(^alasti+\)\|\(^alles+\)\|\(^edasi+\)\|\(^eemale+\)\|\(^eemalt+\)\|\(^eraldi+\)\|\(^halvasti+\)\|\(^juurde+\)\|\(^järele+\)\|\(^kaotsi+\)\|\(^kaugele+\)\|\(^kaugelt+\)\|\(^kergelt+\)\|\(^kergesti+\)\|\(^kindlaks+\)\|\(^klaariks+\)\|\(^käsitsi+\)\|\(^kõrgelt+\)\|\(^kõrval+\)\|\(^kõrvalt+\)\|\(^kõvaks+\)\|\(^kõrvuti+\)\|\(^külili+\)\|\(^laiali+\)\|\(^raskesti+\)\|\(^seni+\)\|\(^sisse+\)\|\(^tagant+\)\|\(^tagasi+\)\|\(^viimati+\)\|\(^võistu+\)\|\(^vääriti+\)\|\(^äsja+\)' \ +> uustmpadv.1 -#> adverbs.tmp2 -# NB! see loend olgu sama, mis lisaks (1) cat tmpadv.alg \ -| grep -v '\(^all+\)\|\(^alt+\)\|\(^eel+\)\|\(^ees+\)\|\(^ise+\)\|\(^jae+\)\|\(^oma+\)\|\(^pea+\)\|\(^ula+\)\|\(^õue+\)\|\(^ära+\)\|\(^üle+\)\|\(^....+\)\|\(^umbes+\)\|\(^....[^s]+[^#-]*$\)\|\(^...ks+[^#-]*$\)\|\(^...*li+[^#-]*$\)\|\(^...*il+[^#-]*$\)\|\(^...*ile+[^#-]*$\)\|\(^[^aeiouõäöü]*[aeiouõäöü][aeiouõäöü]*[^aeiouõäöü][^aeiouõäöü]*si+\)\|\(^hiljuti+\)\|\(^kaheti+\)\|\(^kolmeti+\)\|\(^kunagi+\)\|\(^mitmeti+\)\|\(^teisiti+\)\|\(^tükati+\)\|\(^võõriti+\)\|\(^uuesti+\)\|\(^valesti+\)' \ ->> tmpadv.2 -#>> adverbs.tmp2 +| grep '\(^....+\)\|\(^....[^s]+[^#-]*$\)\|\(^...ks+[^#-]*$\)\|\(^...*li+[^#-]*$\)\|\(^...*il+[^#-]*$\)\|\(^...*ile+[^#-]*$\)\|\(^[^aeiouõäöü]*[aeiouõäöü][aeiouõäöü]*[^aeiouõäöü][^aeiouõäöü]*si+\)' \ +| grep -v '\(^miks+\)\|\(^näos+\)\|\(^egas+\)\|\(^kuis+\)\|\(^siis+\)\|\(^teps+\)\|\(^aina+\)\|\(^aiva+\)\|\(^eele+\)\|\(^eelt+\)\|\(^ikka+\)\|\(^istu+\)\|\(^jalu+\)\|\(^jaol+\)\|\(^jokk+\)\|\(^juba+\)\|\(^just+\)\|\(^jõle+\)\|\(^jönt+\)\|\(^kohe+\)\|\(^kole+\)\|\(^kord+\)\|\(^kuhu+\)\|\(^kuna+\)\|\(^küll+\)\|\(^loga+\)\|\(^loha+\)\|\(^losa+\)\|\(^mant+\)\|\(^manu+\)\|\(^nagu+\)\|\(^nõka+\)\|\(^nõus+\)\|\(^nüüd+\)\|\(^olgu+\)\|\(^puha+\)\|\(^põsi+\)\|\(^päta+\)\|\(^seep+\)\|\(^seni+\)\|\(^siva+\)\|\(^sugu+\)\|\(^tuna+\)\|\(^täna+\)\|\(^töhe+\)\|\(^vaid+\)\|\(^vaja+\)\|\(^veel+\)\|\(^vist+\)\|\(^väga+\)\|\(^õige+\)\|\(^õkva+\)\|\(^ähmi+\)\|\(^äkki+\)\|\(^ängi+\)\|\(^äsja+\)\|\(^+ühti\)\|\(^üsna+\)\|\(^abiga+\)\|\(^eduga+\)\|\(^hulga+\)\|\(^jõuga+\)\|\(^liiga+\)\|\(^lõõga+\)\|\(^punga+\)\|\(^seega+\)\|\(^tõega+\)' \ +>> uustmpadv.1 -# NB! see loend olgu sama, mis lisaks (3) -cat tmpadv.2 | grep -v '\(^alasti+\)\|\(^alles+\)\|\(^edasi+\)\|\(^eemale+\)\|\(^eemalt+\)\|\(^eraldi+\)\|\(^halvasti+\)\|\(^juurde+\)\|\(^järele+\)\|\(^kaotsi+\)\|\(^kaugele+\)\|\(^kaugelt+\)\|\(^kergelt+\)\|\(^kergesti+\)\|\(^kindlaks+\)\|\(^klaariks+\)\|\(^käsitsi+\)\|\(^kõrgelt+\)\|\(^kõrval+\)\|\(^kõrvalt+\)\|\(^kõvaks+\)\|\(^kõrvuti+\)\|\(^külili+\)\|\(^laiali+\)\|\(^raskesti+\)\|\(^seni+\)\|\(^sisse+\)\|\(^tagant+\)\|\(^tagasi+\)\|\(^viimati+\)\|\(^võistu+\)\|\(^vääriti+\)\|\(^äsja+\)' \ -> tmpadv.3 +cat tmpadv.0 uustmpadv.1 | sort -u > uustmpadv.1.srt -#>> adverbs.protolexc +# create the tool for marking +cat uustmpadv.1.srt | sed 's/^\([^+]*\)+.*$/s\/^\1+\/__CMP__\&\//' > cmptmpadv.sed -#echo 'LEXICON Adverbs\n\n CompoundingAdverbs ;\n @P.Part.Bad@ PlainAdverbs ;\n\n' > adverbs.protolexc echo '!LEXICON Adverbs would be too unspecific; instead we have\n! CompoundingAdverbs and NonCompoundingAdverbs ;\n\n' > adverbs.protolexc -echo 'LEXICON CompoundingAdverbs\n' >> adverbs.protolexc +echo '\nLEXICON CompoundingAdverbs\n\n' >> adverbs.protolexc cat tmpadv.0 \ | sed '/^üle+/!s/^\([^:]*\):\([^;]*;\)\(.*\)/@P.Stem.topelt@\1:@P.Stem.topelt@\2\3/' \ | sed '/^üle+/s/^\([^:]*\):\([^;]*;\)\(.*\)/@P.Stem.üle@\1:@P.Stem.üle@\2\3/' \ @@ -257,10 +242,13 @@ cat tmpadv.0 \ | sed '/^....+/s/^\([^:]*+Adv\):\([^;]*;\)\(.*\)/@D.Stem.Guessed@\1:@D.Stem.Guessed@\2\3/' \ >> adverbs.protolexc -cat tmpadv.1 \ +cat tmpadv.alg \ +| sed -f cmptmpadv.sed \ +| grep '__CMP__' \ +| sed 's/__CMP__//' \ +\ | sed '/^vähe+/s/^\([^:]*\):\([^;]*;\)\(.*\)/@P.Stem.vähe@\1:@P.Stem.vähe@\2\3/' \ | sed '/^puht+/s/^\([^:]*\):\([^;]*;\)\(.*\)/@P.Stem.vähe@\1:@P.Stem.vähe@\2\3/' \ -| sort -u \ \ | sed '/@...+/s/^\(@.\.[^@]*@[^:@]*+Adv\):\([^;]*;\)\(.*\)/@D.Stem.Guessed@\1:@D.Stem.Guessed@\2\3/' \ | sed '/^...+/s/^\([^:]*+Adv\):\([^;]*;\)\(.*\)/@D.Stem.Guessed@\1:@D.Stem.Guessed@\2\3/' \ @@ -268,10 +256,21 @@ cat tmpadv.1 \ | sed '/^....+/s/^\([^:]*+Adv\):\([^;]*;\)\(.*\)/@D.Stem.Guessed@\1:@D.Stem.Guessed@\2\3/' \ >> adverbs.protolexc -echo '\nLEXICON NonCompoundingAdverbs\n' >> adverbs.protolexc +echo '\n\nLEXICON NonCompoundingAdverbs\n\n' >> adverbs.protolexc + +cat tmpadv.alg \ +| sed -f cmptmpadv.sed \ +| grep -v '__CMP__' \ +>> adverbs.protolexc -cat tmpadv.3 | sort -u >> adverbs.protolexc -#cat adverbs.tmp2 | sort -u >> adverbs.protolexc +# märgi siia need, mis esinevad mitmesõnaliste verbide andmebaasis lihtmäärsõnana +cat adverbs.protolexc \ +| sed 's/^/@/' \ +| sed -f DB_EMWV_2008_adverbs.sed \ +| sed 's/^@//' \ +| sed 's/^__DB@/__DB__/' \ +| sed 's/^\(.*\)__DB@/__DB__\1@/' \ +> adverbs.protolexc.vers2 echo 'LEXICON Adpositions\n' > adpositions.protolexc cat fs_gt.noninfl.tmp1 | grep '+Adp' >> adpositions.protolexc @@ -379,6 +378,7 @@ LC_COLLATE=C join -t+ -a 1 -a 2 -o 1.1 2.1 2.2 head_esiosad fs_gt.inflecting.tmp | sed '/^[k]*ost+/s/heaesi//' \ | sed '/^õpe+/s/heaesi//' \ | sed '/^anne+/s/heaesi//' \ +| sed '/^pool+.*TAUD/s/heaesi//' \ | sed 's/^[^+]*+//' \ | sed 's/+N_Usage/+N+Usage/' > fs_gt.inflecting.tmp1.tagged #----