with TEXT_IO; with LATIN_FILE_NAMES; use LATIN_FILE_NAMES; with STRINGS_PACKAGE; use STRINGS_PACKAGE; with CONFIG; use CONFIG; with UNIQUES_PACKAGE; use UNIQUES_PACKAGE; with ADDONS_PACKAGE; use ADDONS_PACKAGE; with WORD_PARAMETERS; use WORD_PARAMETERS; with PREFACE; with DEVELOPER_PARAMETERS; use DEVELOPER_PARAMETERS; with LINE_STUFF; use LINE_STUFF; with ENGLISH_SUPPORT_PACKAGE; use ENGLISH_SUPPORT_PACKAGE; package body WORD_PACKAGE is INFLECTIONS_SECTIONS_FILE : LEL_SECTION_IO.FILE_TYPE; procedure PAUSE(OUTPUT : TEXT_IO.FILE_TYPE) is use CONFIG; PAUSE_LINE : STRING(1..300); PAUSE_LAST : INTEGER := 0; begin if WORDS_MDEV(PAUSE_IN_SCREEN_OUTPUT) then if METHOD = INTERACTIVE then if TEXT_IO.NAME(OUTPUT) = -- TEXT_IO.NAME(TEXT_IO.STANDARD_OUTPUT) then -- TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, -- " MORE - hit RETURN/ENTER to continue"); -- TEXT_IO.GET_LINE(TEXT_IO.STANDARD_INPUT, PAUSE_LINE, PAUSE_LAST); TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, ""); end if; elsif METHOD = COMMAND_LINE_INPUT then TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, ""); -- TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, -- " MORE - hit RETURN/ENTER to continue"); -- TEXT_IO.GET_LINE(TEXT_IO.STANDARD_INPUT, PAUSE_LINE, PAUSE_LAST); elsif METHOD = COMMAND_LINE_FILES then null; -- Do not PAUSE end if; end if; exception when others => TEXT_IO.PUT_LINE("Unexpected exception in PAUSE"); end PAUSE; function MIN(A, B : INTEGER) return INTEGER is begin if A <= B then return A; end if; return B; end MIN; function LTU(C, D : CHARACTER) return BOOLEAN is begin if (D = 'v') then if (C < 'u') then return TRUE; else return FALSE; end if; elsif (D = 'j') then if (C < 'i') then return TRUE; else return FALSE; end if; elsif (D = 'V') then if (C < 'U') then return TRUE; else return FALSE; end if; elsif (D = 'J') then if (C < 'I') then return TRUE; else return FALSE; end if; else return C < D; end if; end LTU; function EQU(C, D : CHARACTER) return BOOLEAN is begin if (D = 'u') or (D = 'v') then if (C = 'u') or (C = 'v') then return TRUE; else return FALSE; end if; elsif (D = 'i') or (D = 'j') then if (C = 'i') or (C = 'j') then return TRUE; else return FALSE; end if; elsif (D = 'U') or (D = 'V') then if (C = 'U') or (C = 'V') then return TRUE; else return FALSE; end if; elsif (D = 'I') or (D = 'J') then if (C = 'I') or (C = 'J') then return TRUE; else return FALSE; end if; else return C = D; end if; end EQU; function GTU(C, D : CHARACTER) return BOOLEAN is begin if D = 'u' then if (C > 'v') then return TRUE; else return FALSE; end if; elsif D = 'i' then if (C > 'j') then return TRUE; else return FALSE; end if; elsif D = 'U' then if (C > 'V') then return TRUE; else return FALSE; end if; elsif D = 'I' then if (C > 'J') then return TRUE; else return FALSE; end if; else return C > D; end if; end GTU; function LTU(S, T : STRING) return BOOLEAN is begin for I in 1..S'LENGTH loop -- Not TRIMed, so same length if EQU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then null; elsif GTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then return FALSE; elsif LTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then return TRUE; end if; end loop; return FALSE; end LTU; function GTU(S, T : STRING) return BOOLEAN is begin for I in 1..S'LENGTH loop -- Not TRIMed, so same length if EQU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then null; elsif LTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then return FALSE; elsif GTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then return TRUE; end if; end loop; return FALSE; end GTU; function EQU(S, T : STRING) return BOOLEAN is begin if S'LENGTH /= T'LENGTH then return FALSE; end if; for I in 1..S'LENGTH loop if not EQU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then return FALSE; end if; end loop; return TRUE; end EQU; procedure RUN_UNIQUES(S : in STRING; UNIQUE_FOUND : out BOOLEAN; PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is SL : constant STRING -- BAD NAME!!!!!!!!!!!!!!!!!! := LOWER_CASE(TRIM(S)); ST : constant STEM_TYPE := HEAD(SL, MAX_STEM_SIZE); UNQL : UNIQUE_LIST; -- Unique list for a letter begin UNIQUE_FOUND := FALSE; if SL(SL'FIRST) = 'v' then UNQL := UNQ('u'); -- Unique list for a letter elsif SL(SL'FIRST) = 'j' then UNQL := UNQ('i'); -- Unique list for a letter else UNQL := UNQ(SL(SL'FIRST)); -- Unique list for a letter end if; --TEXT_IO.NEW_LINE; --TEXT_IO.PUT_LINE("Called UNIQUES with =>" & SL & "|"); --TEXT_IO.NEW_LINE; --TEXT_IO.PUT_LINE("UNQL "); while UNQL /= null loop -- If there is a match, add to PA --TEXT_IO.PUT_LINE("UNIQUE =>" & UNQL.PR.STEM); --if ST = LOWER_CASE(UNQL.PR.STEM) then if EQU(ST, LOWER_CASE(UNQL.STEM)) then PA_LAST := PA_LAST + 1; PA(PA_LAST) := (UNQL.STEM, (UNQL.QUAL, 0, NULL_ENDING_RECORD, X, X), UNIQUE, UNQL.MNPC); --TEXT_IO.PUT_LINE("UNIQUE HIT *********" & INTEGER'IMAGE(PA_LAST)); UNIQUE_FOUND := TRUE; end if; UNQL := UNQL.SUCC; end loop; end RUN_UNIQUES; procedure RUN_INFLECTIONS(S : in STRING; SL : in out SAL; RESTRICTION : DICT_RESTRICTION := REGULAR) is -- Trys all possible inflections against the input word in S -- and constructs a STEM_LIST of those that survive SL use LEL_SECTION_IO; use INFLECTION_RECORD_IO; WORD : constant STRING := LOWER_CASE(TRIM(S)); LAST_OF_WORD : constant CHARACTER := WORD(WORD'LAST); LENGTH_OF_WORD : constant INTEGER := WORD'LENGTH; STEM_LENGTH : INTEGER := 0; PR : PARSE_RECORD; M : INTEGER := 1; begin --TEXT_IO.NEW_LINE; --TEXT_IO.PUT_LINE("Called RUN_INFLECTIONS with =>" & WORD & "|"); if WORD'LENGTH = 0 then SL(M) := NULL_PARSE_RECORD; return; end if; SA := NOT_A_STEM_ARRAY; -- Add all of these to list of possible ending records -- since the blank ending agrees with everything -- PACK/PRON have no blank endings if ((RESTRICTION /= PACK_ONLY) and (RESTRICTION /= QU_PRON_ONLY)) and then (WORD'LENGTH <= MAX_STEM_SIZE) then for I in BELF(0, ' ')..BELL(0, ' ') loop PR := (WORD & NULL_STEM_TYPE(LENGTH_OF_WORD+1..STEM_TYPE'LENGTH), BEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC); SL(M) := PR; M := M + 1; end loop; SA(LENGTH_OF_WORD) := PR.STEM; -- Is always a possibility (null ending) end if; -- Here we read in the INFLECTIONS_SECTION that is applicable if RESTRICTION = REGULAR then case LAST_OF_WORD is when 'a' | 'c' | 'd' | 'e' | 'i' => READ(INFLECTIONS_SECTIONS_FILE, LEL, 1); when 'm' | 'n' | 'o' | 'r' => READ(INFLECTIONS_SECTIONS_FILE, LEL, 2); when 's' => READ(INFLECTIONS_SECTIONS_FILE, LEL, 3); when 't' | 'u' => READ(INFLECTIONS_SECTIONS_FILE, LEL, 4); when others => --PUT_LINE("Only blank inflections are found"); return; end case; elsif RESTRICTION = PACK_ONLY or RESTRICTION = QU_PRON_ONLY then READ(INFLECTIONS_SECTIONS_FILE, LEL, 4); end if; -- Now do the non-blank endings -- Only go to LENGTH_OF_WORD for Z in reverse 1..MIN(MAX_ENDING_SIZE, LENGTH_OF_WORD) loop -- Check if Z agrees with a PDL SIZE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! -- Maybe make PDL on size, if it has to be a list, or order by size if array if LELL(Z, LAST_OF_WORD) > 0 then -- Any likely inflections at all for I in LELF(Z, LAST_OF_WORD)..LELL(Z, LAST_OF_WORD) loop if EQU(LOWER_CASE(LEL(I).ENDING.SUF(1..Z)), LOWER_CASE(WORD(WORD'LAST-Z+1..WORD'LAST))) then -- Add to list of possible ending records --STEM_LENGTH := WORD'LENGTH - LEL(I).ENDING.SIZE; STEM_LENGTH := WORD'LENGTH - Z; --PUT(STEM_LENGTH); --TEXT_IO.PUT_LINE("#######################################################"); if STEM_LENGTH <= MAX_STEM_SIZE then -- Reject too long words -- Check if LEL IR agrees with PDL IR !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! PR := (WORD(WORD'FIRST..STEM_LENGTH) & NULL_STEM_TYPE(STEM_LENGTH+1..MAX_STEM_SIZE), LEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC); SL(M) := PR; M := M + 1; SA(STEM_LENGTH) := PR.STEM; -- Gets set dozens of times -- Could order the endings by length (suffix sort) so length changes slowly --PUT_LINE("LENGTH = " & INTEGER'IMAGE(STEM_LENGTH) --& " SA =>" & PR.STEM & "|"); end if; end if; end loop; end if; end loop; --TEXT_IO.PUT_LINE("RUN_INF FOUND STEMS = " & INTEGER'IMAGE(M-1)); --exception --when others => --TEXT_IO.PUT_LINE("exception RUN_INF FOUND STEMS = " & INTEGER'IMAGE(M-1)); --raise; end RUN_INFLECTIONS; procedure TRY_TO_LOAD_DICTIONARY(D_K : DICTIONARY_KIND) is begin -- PUT_LINE("Trying to load " & DICTIONARY_KIND'IMAGE(D_K) & -- " dictionary from STEMFILE "); STEM_IO.OPEN(STEM_FILE(D_K), STEM_IO.IN_FILE, ADD_FILE_NAME_EXTENSION(STEM_FILE_NAME, DICTIONARY_KIND'IMAGE(D_K))); DICT_IO.OPEN(DICT_FILE(D_K), DICT_IO.IN_FILE, ADD_FILE_NAME_EXTENSION(DICT_FILE_NAME, DICTIONARY_KIND'IMAGE(D_K))); LOAD_INDICES_FROM_INDX_FILE(ADD_FILE_NAME_EXTENSION(INDX_FILE_NAME, DICTIONARY_KIND'IMAGE(D_K)), D_K); DICTIONARY_AVAILABLE(D_K) := TRUE; -- PUT_LINE("Successfully loaded " & DICTIONARY_KIND'IMAGE(D_K) & -- " dictionary from STEMFILE "); exception when others => --PUT_LINE("Failed to load " & DICTIONARY_KIND'IMAGE(D_K) & -- " dictionary from STEMFILE "); DICTIONARY_AVAILABLE(D_K) := FALSE; end TRY_TO_LOAD_DICTIONARY; procedure DICTIONARY_SEARCH(SSA : STEM_ARRAY_TYPE; PREFIX : PREFIX_ITEM; SUFFIX : SUFFIX_ITEM; D_K : DICTIONARY_KIND; RESTRICTION : DICT_RESTRICTION := REGULAR) is -- Prepares a PDL list of possible dictionary hits -- Search a dictionary (D_K) looking for all stems that match -- any of the stems that are physically possible with Latin inflections use STEM_IO; --type NAT_32 is range 0..2**31-1; --############### J, J1, J2, JJ : STEM_IO.COUNT := 0; INDEX_ON : constant STRING := SSA(SSA'LAST); INDEX_FIRST, INDEX_LAST : STEM_IO.COUNT := 0; DS : DICTIONARY_STEM; FIRST_TRY, SECOND_TRY : BOOLEAN := TRUE; function FIRST_TWO(W : STRING) return STRING is -- 'v' could be represented by 'u', like the new Oxford Latin Dictionary -- Fixes the first two letters of a word/stem which can be done right S : constant STRING := LOWER_CASE(W); SS : STRING(W'RANGE) := W; function UI(C : CHARACTER) return CHARACTER is begin if (C = 'v') then return 'u'; elsif (C = 'V') then return 'U'; elsif (C = 'j') then return 'i'; elsif (C = 'J') then return 'I'; else return C; end if; end UI; begin if S'LENGTH = 1 then SS(S'FIRST) := UI(W(S'FIRST)); else SS(S'FIRST) := UI(W(S'FIRST)); SS(S'FIRST+1) := UI(W(S'FIRST+1)); end if; return SS; end FIRST_TWO; procedure LOAD_PDL is begin case RESTRICTION is when REGULAR => if not (DS.PART.POFS = PACK or (DS.PART.POFS = PRON and then (DS.PART.PRON.DECL.WHICH = 1))) then PDL_INDEX := PDL_INDEX + 1; PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K); end if; when PACK_ONLY => if DS.PART.POFS = PACK then PDL_INDEX := PDL_INDEX + 1; PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K); end if; when QU_PRON_ONLY => if DS.PART.POFS = PRON and then (DS.PART.PRON.DECL.WHICH = 1) then PDL_INDEX := PDL_INDEX + 1; PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K); end if; when others => PDL_INDEX := PDL_INDEX + 1; PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K); end case; end LOAD_PDL; begin -- Now go through the dictionary list DL for the first letters -- and make a reduced dictionary list PDL --TEXT_IO.PUT_LINE("Entering DICTIONARY_SEARCH PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX)); if D_K = LOCAL then INDEX_FIRST := FIRST_INDEX((FIRST_TWO(INDEX_ON)(1), 'a'), D_K); INDEX_LAST := LAST_INDEX((FIRST_TWO(INDEX_ON)(1), 'a'), D_K); else INDEX_FIRST := FIRST_INDEX(FIRST_TWO(INDEX_ON), D_K); INDEX_LAST := LAST_INDEX(FIRST_TWO(INDEX_ON), D_K); end if; if INDEX_FIRST > 0 and then INDEX_FIRST <= INDEX_LAST then J1 := STEM_IO.COUNT(INDEX_FIRST); --###################### J2 := STEM_IO.COUNT(INDEX_LAST); STEM_ARRAY_LOOP: for K in SSA'RANGE loop if TRIM(SSA(K))'LENGTH > 1 then -- This may be checking for 0 and 1 letter SSAs which are done elsewhere --TEXT_IO.PUT(INTEGER'IMAGE(K) & " SSA(K) =>" ); --TEXT_IO.PUT_LINE(SSA(K)); if D_K = LOCAL then -- Special processing for unordered DICT.LOC for J in J1..J2 loop -- Sweep exaustively through the scope SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(J)); READ(STEM_FILE(D_K), DS); if EQU(LOWER_CASE(DS.STEM), SSA(K)) then --TEXT_IO.PUT_LINE("HIT LOC = " & DS.STEM & " - " & SSA(K)); LOAD_PDL; end if; end loop; else -- Regular dictionaries FIRST_TRY := TRUE; SECOND_TRY := TRUE; J := (J1 + J2) / 2; BINARY_SEARCH: loop if (J1 = J2-1) or (J1 = J2) then if FIRST_TRY then --TEXT_IO.PUT_LINE("FIRST_TRY"); J := J1; FIRST_TRY := FALSE; elsif SECOND_TRY then --TEXT_IO.PUT_LINE("SECOND_TRY"); J := J2; SECOND_TRY := FALSE; else --TEXT_IO.PUT_LINE("THIRD_TRY exit BINARY_SEARCH"); JJ := J; exit BINARY_SEARCH; end if; end if; SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(J)); READ(STEM_FILE(D_K), DS); if LTU(LOWER_CASE(DS.STEM), SSA(K)) then J1 := J; J := (J1 + J2) / 2; elsif GTU(LOWER_CASE(DS.STEM), SSA(K)) then J2 := J; J := (J1 + J2) / 2; else for I in reverse J1..J loop SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(I)); READ(STEM_FILE(D_K), DS); if EQU(LOWER_CASE(DS.STEM), SSA(K)) then JJ := I; --TEXT_IO.PUT_LINE("PDL STEM " & DS.STEM & " " & INTEGER'IMAGE(INTEGER(DS.MNPC))); LOAD_PDL; else exit; end if; end loop; for I in J+1..J2 loop SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(I)); READ(STEM_FILE(D_K), DS); if EQU(LOWER_CASE(DS.STEM), SSA(K)) then JJ := I; --TEXT_IO.PUT_LINE("PDL STEM " & DS.STEM & " " & INTEGER'IMAGE(INTEGER(DS.MNPC))); LOAD_PDL; else exit BINARY_SEARCH; end if; end loop; exit BINARY_SEARCH; end if; end loop BINARY_SEARCH; J1 := JJ; J2 := STEM_IO.COUNT(INDEX_LAST); end if; -- On LOCAL check end if; -- On LENGTH > 1 end loop STEM_ARRAY_LOOP; end if; --TEXT_IO.PUT_LINE("Leaving DICTIONARY_SEARCH PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX)); -- exception -- when others => --TEXT_IO.PUT_LINE("exception DICTIONARY_SEARCH PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX)); -- raise; end DICTIONARY_SEARCH; procedure SEARCH_DICTIONARIES(SSA : in STEM_ARRAY_TYPE; PREFIX : PREFIX_ITEM; SUFFIX : SUFFIX_ITEM; RESTRICTION : DICT_RESTRICTION := REGULAR) is use STEM_IO; FC : CHARACTER := ' '; begin --PUT_LINE("Entering SEARCH_DICTIONARIES"); PDL := (others => NULL_PRUNED_DICTIONARY_ITEM); PDL_INDEX := 0; --PUT_LINE("Search for blank stems"); -- BDL is always used, so it is loaded initially and not called from disk -- Check all stems of the dictionary entry against the reduced stems -- Determine if there is a pure blank " " stem if LEN(SSA(SSA'FIRST)) = 0 then -- a size would help? --PUT("HIT on blank stem I = ");PUT('1'); --PUT(" STEM = ");PUT_LINE(BDL(1).STEM); --PDL := new PRUNED_DICTIONARY_ITEM'(BDL(1), GENERAL, PDL); PDL_INDEX := PDL_INDEX + 1; PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(BDL(1), GENERAL); end if; -- Now there is only one blank stem (2 of to_be), but need not always be so -- Determine if there is a blank stem (SC = ' ') -- Prepare for the posibility that one stem is short but there are others FC := ' '; if SSA(SSA'FIRST)(1) = ' ' then if SSA'LENGTH > 1 and then SSA(SSA'FIRST+1)(2) = ' ' then FC := SSA(SSA'FIRST+1)(1); end if; elsif SSA(SSA'FIRST)(2) = ' ' then FC := SSA(SSA'FIRST)(1); end if; -- If there is a single letter stem (FC /= ' ') then if FC /= ' ' then for I in 2..BDL_LAST loop -- Check all stems of the dictionary entry against the reduced stems --if LOWER_CASE(BDL(I).STEM(1)) = FC then if EQU(LOWER_CASE(BDL(I).STEM(1)), FC) then --PUT("HIT on 1 letter stem I = ");PUT(I);PUT(" STEM = ");PUT_LINE(BDL(I).STEM); PDL_INDEX := PDL_INDEX + 1; PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(BDL(I), GENERAL); -- D_K set to GENERAL, but should not SPE have a chance? !!!!!!!!! end if; end loop; end if; if SSA'LENGTH = 0 then -- PUT_LINE("Empty stem array, don't bother searching"); return; -- elsif LEN(SSA(SSA'LAST)) <= 1 then -- PUT_LINE("No two letter stems, have done searching"); -- else -- PUT_LINE("Searching Dictionaries"); end if; for D_K in DICTIONARY_KIND loop if DICTIONARY_AVAILABLE(D_K) then if not IS_OPEN(STEM_FILE(D_K)) then OPEN(STEM_FILE(D_K), STEM_IO.IN_FILE, ADD_FILE_NAME_EXTENSION(STEM_FILE_NAME, DICTIONARY_KIND'IMAGE(D_K))); end if; DICTIONARY_SEARCH(SSA, PREFIX, SUFFIX, D_K, RESTRICTION); CLOSE(STEM_FILE(D_K)); --?????? end if; end loop; --TEXT_IO.PUT_LINE("Leaving SEARCH_DICTIONARY PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX)); end SEARCH_DICTIONARIES; procedure CHANGE_LANGUAGE(C : CHARACTER) is begin if UPPER_CASE(C) = 'L' then LANGUAGE := LATIN_TO_ENGLISH; PREFACE.PUT_LINE("Language changed to " & LANGUAGE_TYPE'IMAGE(LANGUAGE)); elsif UPPER_CASE(C) = 'E' then if ENGLISH_DICTIONARY_AVAILABLE(GENERAL) then LANGUAGE:= ENGLISH_TO_LATIN; PREFACE.PUT_LINE("Language changed to " & LANGUAGE_TYPE'IMAGE(LANGUAGE)); PREFACE.PUT_LINE("Input a single English word (+ part of speech - N, ADJ, V, PREP, ...)"); else PREFACE.PUT_LINE("No English dictionary available"); end if; else PREFACE.PUT_LINE("Bad LANGAUGE input - no change, remains " & LANGUAGE_TYPE'IMAGE(LANGUAGE)); end if; exception when others => PREFACE.PUT_LINE("Bad LANGAUGE input - no change, remains " & LANGUAGE_TYPE'IMAGE(LANGUAGE)); end CHANGE_LANGUAGE; procedure WORD(RAW_WORD : in STRING; PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is INPUT_WORD : constant STRING := LOWER_CASE(RAW_WORD); PA_SAVE : INTEGER := PA_LAST; UNIQUE_FOUND : BOOLEAN := FALSE; SS, SSS : SAL := (others => NULL_PARSE_RECORD); procedure ORDER_STEMS(SX : in out SAL) is use INFLECTION_RECORD_IO; use DICT_IO; HITS : INTEGER := 0; SL : SAL := SX; SL_LAST : INTEGER := 0; SM : PARSE_RECORD; begin if SX(1) = NULL_PARSE_RECORD then return; end if; --PUT_LINE("ORDERing_STEMS"); for I in SL'RANGE loop exit when SL(I) = NULL_PARSE_RECORD; SL_LAST := SL_LAST + 1; end loop; --PUT_LINE("In ORDER SL_LAST = " & INTEGER'IMAGE(SL_LAST)); -- Bubble sort since this list should usually be very small (1-5) HIT_LOOP: loop HITS := 0; SWITCH: begin -- Need to remove duplicates in ARRAY_STEMS -- This sort is very sloppy -- One problem is that it can mix up some of the order of PREFIX, XXX, LOC -- I ought to do this for every set of results from different approaches -- not just in one fell swoop at the end !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! INNER_LOOP: for I in 1..SL_LAST-1 loop if SL(I+1) /= NULL_PARSE_RECORD then if (SL(I+1).MNPC < SL(I).MNPC) or else (SL(I+1).MNPC = SL(I).MNPC and then SL(I+1).IR.ENDING.SIZE < SL(I).IR.ENDING.SIZE) or else (SL(I+1).MNPC = SL(I).MNPC and then SL(I+1).IR.ENDING.SIZE = SL(I).IR.ENDING.SIZE and then SL(I+1).IR.QUAL < SL(I).IR.QUAL) or else (SL(I+1).MNPC = SL(I).MNPC and then SL(I+1).IR.ENDING.SIZE = SL(I).IR.ENDING.SIZE and then SL(I+1).IR.QUAL = SL(I).IR.QUAL and then SL(I+1).D_K < SL(I).D_K) then --PUT(SL(I+1).IR.QUAL); PUT(" < "); PUT(SL(I).IR.QUAL); NEW_LINE; SM := SL(I); SL(I) := SL(I+1); SL(I+1) := SM; SL(I+1) := SM; HITS := HITS + 1; --else --PUT(SL(I+1).IR.QUAL); PUT(" >= "); PUT(SL(I).IR.QUAL); NEW_LINE; end if; else exit INNER_LOOP; end if; end loop INNER_LOOP; end SWITCH; --NEW_LINE; --PUT_LINE("In ORDER HITS = " & INTEGER'IMAGE(HITS )); --NEW_LINE; --for I in 1..SL_LAST loop --PUT(SL(I)); NEW_LINE; --end loop; --PUT_LINE("--------------------------------------------------------"); exit when HITS = 0; end loop HIT_LOOP; SX := SL; end ORDER_STEMS; procedure ARRAY_STEMS(SX : in SAL; PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is SL : SAL := SX; OPR : PARSE_RECORD := NULL_PARSE_RECORD; begin if SL(1) = NULL_PARSE_RECORD then return; else OPR := NULL_PARSE_RECORD; for I in SL'RANGE loop if SL(I) /= NULL_PARSE_RECORD then --PUT('*'); PUT(SL(I)); NEW_LINE; SUPRESS_KEY_CHECK: declare function "<=" (A, B : PARSE_RECORD) return BOOLEAN is use DICT_IO; begin -- !!!!!!!!!!!!!!!!!!!!!!!!!! if A.IR.QUAL = B.IR.QUAL and then A.MNPC = B.MNPC then return TRUE; else return FALSE; end if; end "<="; begin if SL(I) <= OPR then -- Get rid of duplicates, if ORDER is OK --PUT('-'); PUT(SL(I)); NEW_LINE; null; else PA_LAST := PA_LAST + 1; --PUT('+'); PUT(SL(I)); NEW_LINE; --PUT("SAL PR /= OPR and PA_LAST incremented to "); PUT(PA_LAST); NEW_LINE; PA(PA_LAST) := SL(I); OPR := SL(I); end if; end SUPRESS_KEY_CHECK; else exit; end if; end loop; end if; --TEXT_IO.PUT_LINE("At the end of ARRAY_STEMS"); --for I in 1..PA_LAST loop --PUT(PA(I).STEM); PUT(" "); PUT(PA(I).IR); NEW_LINE; --PUT(PA(I).D_K); PUT("> "); PUT(PA(I).AAMNPC.MNPC); NEW_LINE; --end loop; end ARRAY_STEMS; procedure REDUCE_STEM_LIST(SL : in SAL; SXX : in out SAL; -- Need in out if want to print it at the end --procedure REDUCE_STEM_LIST(SL : in SAL; SXX : out SAL; PREFIX : in PREFIX_ITEM := NULL_PREFIX_ITEM; SUFFIX : in SUFFIX_ITEM := NULL_SUFFIX_ITEM) is MNPC_PART : MNPC_TYPE := NULL_MNPC; PDL_PART : PART_ENTRY; COM : COMPARISON_TYPE := X; NUM_SORT : NUMERAL_SORT_TYPE := X; LS : INTEGER := 0; M : INTEGER := 0; PDL_KEY : STEM_KEY_TYPE; PDL_P : PART_OF_SPEECH_TYPE; SL_KEY : STEM_KEY_TYPE; SL_P : PART_OF_SPEECH_TYPE; function "<=" (LEFT, RIGHT : PART_OF_SPEECH_TYPE) return BOOLEAN is begin if RIGHT = LEFT or else (LEFT = PACK and RIGHT = PRON) or else RIGHT = X then return TRUE; else return FALSE; end if; end "<="; function "<=" (LEFT, RIGHT : GENDER_TYPE) return BOOLEAN is begin if (RIGHT = LEFT or else (RIGHT = C and LEFT /= N) or else (RIGHT = X)) then return TRUE; else return FALSE; end if; end "<="; function "<=" (LEFT, RIGHT : STEM_KEY_TYPE) return BOOLEAN is begin if (RIGHT = LEFT or else (RIGHT = 0)) then return TRUE; else return FALSE; end if; end "<="; begin SXX := (others => NULL_PARSE_RECORD); -- Essentially initializing --for J in 1..PDL_INDEX loop --NUMBER_IN_PDL := NUMBER_IN_PDL + 1; --TEXT_IO.PUT(INTEGER'IMAGE(NUMBER_IN_PDL)); --TEXT_IO.PUT(" PDL "); TEXT_IO.PUT(PDL(J).DS.STEM); --PART_ENTRY_IO.PUT(PDL(J).DS.PART); --TEXT_IO.PUT(INTEGER'IMAGE(PDL(J).DS.KEY)); TEXT_IO.NEW_LINE; --end loop; --TEXT_IO.PUT_LINE("****************************"); --if WORDS_MODE(WRITE_STATISTICS_FILE) then --declare -- PSTAT : PRUNED_DICTIONARY_LIST := PDL; --begin -- TEXT_IO.PUT(STATS, "Number of PDL DICT hits"); -- TEXT_IO.SET_COL(STATS, 30); -- INTEGER_IO.PUT(STATS, NUMBER_IN_PDL); -- TEXT_IO.SET_COL(STATS, 40); -- TEXT_IO.PUT(STATS, INPUT_WORD); -- TEXT_IO.NEW_LINE(STATS); --end; --end if; ------------------------------------------------------------- -- For the reduced dictionary list PDL M := 0; ON_PDL: for J in 1..PDL_INDEX loop PDL_PART := PDL(J).DS.PART; PDL_KEY := PDL(J).DS.KEY; MNPC_PART := PDL(J).DS.MNPC; -- Is there any point in going through the process for this PDL PDL_P := PDL(J).DS.PART.POFS; -- Used only for FIX logic below -- If there is no SUFFIX then carry on if (SUFFIX = NULL_SUFFIX_ITEM) then -- No suffix working, fall through --PUT_LINE("No SUFFIX in REDUCE - Fall through to PREFIX check "); null; elsif (PDL_P = N and then PDL_PART.N.DECL = (9, 8)) or -- No suffix for (PDL_P = ADJ and then PDL_PART.ADJ.DECL = (9, 8)) then -- abbreviations -- Can be no suffix on abbreviation"); goto END_OF_PDL_LOOP; else -- There is SUFFIX, see if it agrees with PDL if PDL_P <= SUFFIX.ENTR.ROOT and then -- Does SUFFIX agree in ROOT ((PDL_KEY <= SUFFIX.ENTR.ROOT_KEY) or else ((PDL_KEY = 0) and then ((PDL_P = N) or (PDL_P = ADJ) or (PDL_P = V)) and then ((SUFFIX.ENTR.ROOT_KEY = 1) or (SUFFIX.ENTR.ROOT_KEY = 2)))) then --PUT_LINE("HIT HIT HIT HIT HIT HIT HIT HIT HIT SUFFIX SUFFIX in REDUCE"); case SUFFIX.ENTR.TARGET.POFS is -- Transform PDL_PART to TARGET when N => PDL_PART := (N, SUFFIX.ENTR.TARGET.N); when PRON => PDL_PART := (PRON, SUFFIX.ENTR.TARGET.PRON); when ADJ => PDL_PART := (ADJ, SUFFIX.ENTR.TARGET.ADJ); when NUM => PDL_PART := (NUM, SUFFIX.ENTR.TARGET.NUM); when ADV => PDL_PART := (ADV, SUFFIX.ENTR.TARGET.ADV); when V => PDL_PART := (V, SUFFIX.ENTR.TARGET.V); when others => null; -- No others so far, except X = all end case; PDL_KEY := SUFFIX.ENTR.TARGET_KEY; PDL_P := PDL_PART.POFS; -- Used only for FIX logic below --PUT(" Changed to "); PUT(PDL_PART); PUT(PDL_KEY); NEW_LINE; else --PUT_LINE("In REDUCE_STEM_LIST There is no legal suffix"); -- exit; goto END_OF_PDL_LOOP; end if; end if; if (PREFIX = NULL_PREFIX_ITEM) then -- No PREFIX, drop through --PUT_LINE("No PREFIX in REDUCE - Fall through to MATCHing "); null; elsif (PDL_P = N and then PDL_PART.N.DECL = (9, 8)) or -- No prefix for (PDL_P = ADJ and then PDL_PART.ADJ.DECL = (9, 8)) or -- abbreviations (PDL_P = INTERJ or PDL_P = CONJ) then -- or INTERJ or CONJ --PUT_LINE("In REDUCE_STEM_LIST no prefix on abbreviationi, interj, conj"); goto END_OF_PDL_LOOP; else -- Check if PREFIX agrees --PUT("PREFIX in REDUCE "); --PUT(PDL_P); PUT(" <= "); PUT(PREFIX.ENTR.ROOT); PUT(" OR"); NEW_LINE; --PUT(" "); PUT(PDL_PART); PUT(" <= "); --PUT(PREFIX.ENTR.ROOT); NEW_LINE; if (PDL_P = PREFIX.ENTR.ROOT) or -- = ROOT (PDL_PART.POFS = PREFIX.ENTR.ROOT) then -- or part mod by suf --PUT_LINE("PREFIX in REDUCE PART HIT"); null; elsif (PREFIX.ENTR.ROOT = X) then -- or ROOT = X if PDL_P = N or PDL_P = PRON or PDL_P = ADJ or PDL_P = ADV or PDL_P = V then -- Dont prefix PREP, CONJ, ... -- PUT_LINE("PREFIX in REDUCE X HIT"); null; end if; else goto END_OF_PDL_LOOP; --PUT_LINE("In REDUCE_STEM_LIST There is no legal prefix"); -- exit; end if; end if; -- SUFFIX and PREFIX either agree or don't exist (agrees with everything) --PUT("ON_PDL:+ "); PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); --PUT(PDL(J).DS.KEY); NEW_LINE; --PUT_LINE(ADD_PREFIX(PDL(J).DS.STEM, PREFIX) & "|"); --PUT_LINE(ADD_SUFFIX(ADD_PREFIX(PDL(J).DS.STEM, PREFIX), SUFFIX) & "|"); LS := LEN(ADD_SUFFIX(ADD_PREFIX(PDL(J).DS.STEM, PREFIX), SUFFIX)); --PUT("LS = LEN of "); PUT(ADD_SUFFIX(ADD_PREFIX(PDL(J).DS.STEM, PREFIX), SUFFIX --PUT(" = "); PUT(LS); NEW_LINE; --TEXT_IO.PUT_LINE("Entering ON_SL loop"); ON_SL: for I in SL'RANGE loop exit when SL(I) = NULL_PARSE_RECORD; --TEXT_IO.PUT("SL(I) "); PARSE_RECORD_IO.PUT(SL(I)); TEXT_IO.NEW_LINE; --TEXT_IO.PUT(" PDL "); TEXT_IO.PUT(PDL(J).DS.STEM); --PART_ENTRY_IO.PUT(PDL(J).DS.PART); --TEXT_IO.PUT(INTEGER'IMAGE(PDL(J).DS.KEY)); TEXT_IO.NEW_LINE; --TEXT_IO.PUT("LS "); TEXT_IO.PUT(INTEGER'IMAGE(LS)); --TEXT_IO.PUT(" LEN SL(I).STEM "); TEXT_IO.PUT_LINE(INTEGER'IMAGE(LEN(SL(I).STEM))); if LS = LEN(SL(I).STEM) then -- Scan through the whole unreduced stem list -- Single out those stems that match (pruned) dictionary entries --^^^^^^^^^^^^^^^^^should be able to do this better with new arrangement SL_KEY := SL(I).IR.KEY; SL_P := SL(I).IR.QUAL.POFS; if ( ((PDL_KEY <= SL(I).IR.KEY) ) or else ((PDL_KEY = 0) and then (((PDL_P = N) or (PDL_P = ADJ) or (PDL_P = V)) and then ((SL(I).IR.KEY = 1) or (SL(I).IR.KEY = 2)) )) ) and then -- and KEY ( PDL_PART.POFS = EFF_PART(SL(I).IR.QUAL.POFS) ) then --TEXT_IO.PUT_LINE("####################### PDL - SL MATCH ############"); if (PDL_PART.POFS = N and then PDL_PART.N.DECL <= SL(I).IR.QUAL.N.DECL and then PDL_PART.N.GENDER <= SL(I).IR.QUAL.N.GENDER) then --TEXT_IO.PUT_LINE(" HIT N "); -- Need to transfer the gender of the noun dictionary item M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => N, N => ( PDL_PART.N.DECL, SL(I).IR.QUAL.N.CS, SL(I).IR.QUAL.N.NUMBER, PDL_PART.N.GENDER ) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif (PDL_PART.POFS = PRON and then PDL_PART.PRON.DECL <= SL(I).IR.QUAL.PRON.DECL) then --PUT(" HIT PRON "); -- Need to transfer the kind of the pronoun dictionary item M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => PRON, PRON => ( PDL_PART.PRON.DECL, SL(I).IR.QUAL.PRON.CS, SL(I).IR.QUAL.PRON.NUMBER, SL(I).IR.QUAL.PRON.GENDER ) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif (PDL_PART.POFS = ADJ) and then (PDL_PART.ADJ.DECL <= SL(I).IR.QUAL.ADJ.DECL) and then ((SL(I).IR.QUAL.ADJ.CO <= PDL_PART.ADJ.CO ) or ((SL(I).IR.QUAL.ADJ.CO = X) or (PDL_PART.ADJ.CO = X))) then -- Note the reversal on comparisom --PUT(" HIT ADJ "); -- Need to transfer the gender of the dictionary item -- Need to transfer the CO of the ADJ dictionary item if PDL_PART.ADJ.CO in POS..SUPER then -- If the dictionary entry has a unique CO, use it COM := PDL_PART.ADJ.CO; else -- Otherwise, the entry is X, generate a CO from KEY COM := ADJ_COMP_FROM_KEY(PDL_KEY); end if; M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => ADJ, ADJ => ( PDL_PART.ADJ.DECL, SL(I).IR.QUAL.ADJ.CS, SL(I).IR.QUAL.ADJ.NUMBER, SL(I).IR.QUAL.ADJ.GENDER, COM ) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif (PDL_PART.POFS = NUM) and then (PDL_PART.NUM.DECL <= SL(I).IR.QUAL.NUM.DECL) and then (PDL_KEY = SL(I).IR.KEY) then --PUT(" HIT NUM "); if PDL_PART.NUM.SORT = X then -- If the entry is X, generate a CO from KEY NUM_SORT:= NUM_SORT_FROM_KEY(PDL_KEY); else -- Otherwise, the dictionary entry has a unique CO, use it NUM_SORT := PDL_PART.NUM.SORT; end if; M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => NUM, NUM => ( PDL_PART.NUM.DECL, SL(I).IR.QUAL.NUM.CS, SL(I).IR.QUAL.NUM.NUMBER, SL(I).IR.QUAL.NUM.GENDER, NUM_SORT) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif (PDL_PART.POFS = ADV) and then ((PDL_PART.ADV.CO <= SL(I).IR.QUAL.ADV.CO ) or ((SL(I).IR.QUAL.ADV.CO = X) or (PDL_PART.ADV.CO = X))) then --PUT(" HIT ADV "); -- Need to transfer the CO of the ADV dictionary item if PDL_PART.ADV.CO in POS..SUPER then -- If the dictionary entry has a unique CO, use it COM := PDL_PART.ADV.CO; else -- The entry is X and we need to generate a COMP from the KEY COM := ADV_COMP_FROM_KEY(PDL_KEY); end if; M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => ADV, ADV => ( CO => COM) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif (PDL_PART.POFS = V) then --TEXT_IO.PUT_LINE("V found, now check CON"); if SL(I).IR.QUAL.POFS = V and then (PDL_PART.V.CON <= SL(I).IR.QUAL.V.CON) then --TEXT_IO.PUT(" HIT V "); M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => V, V => ( PDL_PART.V.CON, SL(I).IR.QUAL.V.TENSE_VOICE_MOOD, SL(I).IR.QUAL.V.PERSON, SL(I).IR.QUAL.V.NUMBER ) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif SL(I).IR.QUAL.POFS = VPAR and then (PDL_PART.V.CON <= SL(I).IR.QUAL.VPAR.CON) then --PUT(" HIT VPAR "); M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => VPAR, VPAR => ( PDL_PART.V.CON, SL(I).IR.QUAL.VPAR.CS, SL(I).IR.QUAL.VPAR.NUMBER, SL(I).IR.QUAL.VPAR.GENDER, SL(I).IR.QUAL.VPAR.TENSE_VOICE_MOOD ) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); elsif SL(I).IR.QUAL.POFS = SUPINE and then (PDL_PART.V.CON <= SL(I).IR.QUAL.SUPINE.CON) then --PUT(" HIT SUPINE"); M := M + 1; SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX), IR => ( QUAL => ( POFS => SUPINE, SUPINE => ( PDL_PART.V.CON, SL(I).IR.QUAL.SUPINE.CS, SL(I).IR.QUAL.SUPINE.NUMBER, SL(I).IR.QUAL.SUPINE.GENDER) ), KEY => SL(I).IR.KEY, ENDING => SL(I).IR.ENDING, AGE => SL(I).IR.AGE, FREQ => SL(I).IR.FREQ), D_K => PDL(J).D_K, MNPC => MNPC_PART); end if; elsif PDL_PART.POFS = PREP and then PDL_PART.PREP.OBJ = SL(I).IR.QUAL.PREP.OBJ then --PUT(" HIT PREP "); M := M + 1; SXX(M) := (SUBTRACT_PREFIX(SL(I).STEM, PREFIX), SL(I).IR, PDL(J).D_K, MNPC_PART); elsif PDL_PART.POFS = CONJ then --PUT(" HIT CONJ "); M := M + 1; SXX(M) := (SUBTRACT_PREFIX(SL(I).STEM, PREFIX), SL(I).IR, PDL(J).D_K, MNPC_PART); elsif PDL_PART.POFS = INTERJ then --PUT(" HIT INTERJ "); M := M + 1; SXX(M) := (SUBTRACT_PREFIX(SL(I).STEM, PREFIX), SL(I).IR, PDL(J).D_K, MNPC_PART); end if; --TEXT_IO.NEW_LINE; PUT(SL(I).IR.QUAL); TEXT_IO.PUT(" -- "); --TEXT_IO.PUT(PDL(J).DS.STEM); PUT(PDL_PART); TEXT_IO.NEW_LINE; end if; end if; <> null; end loop ON_SL; --TEXT_IO.PUT("In RED_ST_L after loop ON_SL M = "); --TEXT_IO.PUT(INTEGER'IMAGE(M)); TEXT_IO.NEW_LINE; <> null; end loop ON_PDL; --for I in 1..M loop --TEXT_IO.PUT(INTEGER'IMAGE(I)); TEXT_IO.PUT(" "); --PARSE_RECORD_IO.PUT(SXX(I)); TEXT_IO.NEW_LINE; --end loop; end REDUCE_STEM_LIST; procedure APPLY_PREFIX(SA : in STEM_ARRAY_TYPE; SUFFIX : in SUFFIX_ITEM; SX : in SAL; SXX : in out SAL; PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is -- Worry about the stem changing re-cipio from capio -- Correspondence of parts, need EFF for VPAR -- The prefixes should be ordered with the longest/most likely first SSA : STEM_ARRAY; L : INTEGER := 0; --use TEXT_IO; --use INFLECTIONS_PACKAGE.INTEGER_IO; begin --PUT_LINE("Entering APPLY_PREFIX"); SXX := (others => NULL_PARSE_RECORD); -- !!!!!!!!!!!!!!!!!!!!!!! if WORDS_MDEV(USE_PREFIXES) then --PUT(NUMBER_OF_PREFIXES); PUT(INTEGER(SA'LENGTH)); PUT(SA'LAST); NEW_LINE; for I in 1..NUMBER_OF_PREFIXES loop -- Loop through PREFIXES L := 0; for J in SA'RANGE loop -- Loop through stem array --PUT("J = "); PUT(J); PUT(" SA(J) = "); PUT(SA(J)); NEW_LINE; if (SA(J)(1) = PREFIXES(I).FIX(1)) then -- Cuts down a little -- do better if SUBTRACT_PREFIX(SA(J), PREFIXES(I)) /= HEAD(SA(J), MAX_STEM_SIZE) then --PUT_LINE("Hit on prefix " & PREFIXES(I).FIX); --PUT("I = "); PUT(I); PUT(" "); PUT(PREFIXES(I).FIX); PUT(" "); --PUT("J = "); PUT(J); PUT(" "); PUT(SA(J)); NEW_LINE; L := L + 1; -- We have a hit, make new stem array item SSA(L) := HEAD(SUBTRACT_PREFIX(SA(J), PREFIXES(I)), MAX_STEM_SIZE); -- And that has prefix subtracted to match dict --PUT("L = "); PUT(L); PUT(" "); PUT_LINE(SUBTRACT_PREFIX(SA(J), PREFIXES(I))); end if; -- with prefix subtracted stems end if; end loop; if L > 0 then -- There has been a prefix hit SEARCH_DICTIONARIES(SSA(1..L), -- So run new dictionary search PREFIXES(I), SUFFIX); if PDL_INDEX /= 0 then -- Dict search was successful --PUT_LINE("IN APPLY_PREFIX - PDL_INDEX not 0 after prefix " & PREFIXES(I).FIX); --PUT_LINE("REDUCE_STEM_LIST being called from APPLY_PREFIX ---- SUFFIX = " --& SUFFIX.FIX); REDUCE_STEM_LIST(SX, SXX, PREFIXES(I), SUFFIX); if SXX(1) /= NULL_PARSE_RECORD then -- There is reduced stem result PA_LAST := PA_LAST + 1; -- So add prefix line to parse array PA(PA_LAST).IR := ((PREFIX, NULL_PREFIX_RECORD), 0, NULL_ENDING_RECORD, X, X); PA(PA_LAST).STEM := HEAD(PREFIXES(I).FIX, MAX_STEM_SIZE); PA(PA_LAST).MNPC := DICT_IO.COUNT(PREFIXES(I).MNPC); PA(PA_LAST).D_K := ADDONS; exit; -- Because we accept only one prefix end if; end if; end if; end loop; -- Loop on I for PREFIXES end if; -- On USE_PREFIXES end APPLY_PREFIX; procedure APPLY_SUFFIX(SA : in STEM_ARRAY_TYPE; SX : in SAL; SXX : in out SAL; PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is SSA : STEM_ARRAY; L : INTEGER := 0; SUFFIX_HIT : INTEGER := 0; -- use TEXT_IO; -- use INFLECTIONS_PACKAGE.INTEGER_IO; begin --PUT_LINE("Entering APPLY_SUFFIX"); --PUT(NUMBER_OF_SUFFIXES); PUT(INTEGER(SA'LENGTH)); PUT(SA'LAST); NEW_LINE; for I in 1..NUMBER_OF_SUFFIXES loop -- Loop through SUFFIXES L := 0; -- Take as many as fit for J in SA'RANGE loop -- Loop through stem array if SUBTRACT_SUFFIX(SA(J), SUFFIXES(I)) /= HEAD(SA(J), MAX_STEM_SIZE) then --PUT("Hit on suffix " & SUFFIXES(I).FIX & " " & SUFFIXES(I).CONNECT & " "); --PUT(SUFFIXES(I).ENTR); NEW_LINE; --PUT("I = "); PUT(I); PUT(" "); PUT(SUFFIXES(I).FIX); PUT(" "); --PUT("J = "); PUT(J); PUT(" "); PUT(SA(J)); NEW_LINE; L := L + 1; -- We have a hit, make new stem array item SSA(L) := HEAD(SUBTRACT_SUFFIX(SA(J), SUFFIXES(I)), MAX_STEM_SIZE); -- And that has prefix subtracted to match dict --PUT("L = "); PUT(L); PUT(" "); PUT_LINE(SUBTRACT_SUFFIX(SA(J), SUFFIXES(I))); end if; end loop; -- Loop on J through SA if L > 0 then -- There has been a suffix hit SEARCH_DICTIONARIES(SSA(1..L), NULL_PREFIX_ITEM, SUFFIXES(I)); -- So run new dictionary search -- For suffixes we allow as many as match if PDL_INDEX /= 0 then -- Dict search was successful --PUT_LINE("IN APPLY_SUFFIX - PDL_INDEX not 0 after suffix " & SUFFIXES(I).FIX); --PUT_LINE("REDUCE_STEM_LIST called from APPLY_SUFFIX"); SUFFIX_HIT := I; REDUCE_STEM_LIST(SX, SXX, NULL_PREFIX_ITEM, SUFFIXES(I)); if SXX(1) /= NULL_PARSE_RECORD then -- There is reduced stem result PA_LAST := PA_LAST + 1; -- So add suffix line to parse array --PUT_LINE("REDUCE_STEM_LIST is not null so add suffix to parse array"); PA(PA_LAST).IR := ((SUFFIX, NULL_SUFFIX_RECORD), 0, NULL_ENDING_RECORD, X, X); PA(PA_LAST).STEM := HEAD( SUFFIXES(SUFFIX_HIT).FIX, MAX_STEM_SIZE); -- Maybe it would better if suffix.fix was of stem size PA(PA_LAST).MNPC := DICT_IO.COUNT(SUFFIXES(SUFFIX_HIT).MNPC); --PUT("SUFFIX MNPC "); PUT(SUFFIXES(SUFFIX_HIT).MNPC); NEW_LINE; PA(PA_LAST).D_K := ADDONS; --- for I in SXX'RANGE loop exit when SXX(I) = NULL_PARSE_RECORD; PA_LAST := PA_LAST + 1; PA(PA_LAST) := SXX(I); end loop; --- end if; else -- there is suffix (L /= 0) but no dictionary hit SUFFIX_HIT := I; --PUT_LINE(" -- there is suffix (L /= 0) but no dictionary hit"); --PUT("L = "); PUT(L); PUT(" "); --PUT("SUFFIX_HIT = "); PUT(SUFFIXES(I).FIX); NEW_LINE; APPLY_PREFIX(SSA(1..L), SUFFIXES(I), SX, SXX, PA, PA_LAST); --PUT_LINE("PREFIXES applied from APPLY_SUFFIXES"); if SXX(1) /= NULL_PARSE_RECORD then -- There is reduced stem result PA_LAST := PA_LAST + 1; -- So add suffix line to parse array --PUT_LINE("REDUCE_STEM_LIST is not null so add suffix to parse array"); PA(PA_LAST).IR := ((SUFFIX, NULL_SUFFIX_RECORD), 0, NULL_ENDING_RECORD, X, X); PA(PA_LAST).STEM := HEAD( SUFFIXES(SUFFIX_HIT).FIX, MAX_STEM_SIZE); PA(PA_LAST).MNPC := DICT_IO.COUNT(SUFFIXES(SUFFIX_HIT).MNPC); --PUT("SUFFIX MNPC "); PUT(SUFFIXES(SUFFIX_HIT).MNPC); NEW_LINE; PA(PA_LAST).D_K := ADDONS; for I in SXX'RANGE loop -- Set this set of results exit when SXX(I) = NULL_PARSE_RECORD; PA_LAST := PA_LAST + 1; PA(PA_LAST) := SXX(I); end loop; end if; end if; end if; -- with suffix subtracted stems end loop; -- Loop on I for SUFFIXES end APPLY_SUFFIX; procedure PRUNE_STEMS(INPUT_WORD : STRING; SX : in SAL; SXX : in out SAL) is J : INTEGER := 0; --SXX : SAL; begin --TEXT_IO.PUT_LINE("Entering PRUNE_STEMS INPUT_WORD = " & INPUT_WORD ); --TEXT_IO.PUT_LINE("In PRUNE PA_LAST = " & INTEGER'IMAGE(PA_LAST)); if SX(1) = NULL_PARSE_RECORD then return; end if; ----------------------------------------------------------------- GENERATE_REDUCED_STEM_ARRAY: begin --PUT_LINE("List of stems by size"); --NEW_LINE; J := 1; for Z in 0..MIN(MAX_STEM_SIZE, LEN(INPUT_WORD)) loop if SA(Z) /= NOT_A_STEM then --PUT(Z); PUT(J); PUT(" "); PUT_LINE(SA(Z)); SSA(J) := SA(Z); SSA_MAX := J; J := J + 1; end if; end loop; --PUT_LINE("SSA_MAX = " & INTEGER'IMAGE(SSA_MAX)); --NEW_LINE(2); end GENERATE_REDUCED_STEM_ARRAY; --TEXT_IO.PUT_LINE("PRUNE_STEMS checking (not) DO_ONLY_FIXES = " & BOOLEAN'IMAGE(WORDS_MDEV(DO_ONLY_FIXES))); if not WORDS_MDEV(DO_ONLY_FIXES) then -- Just bypass main dictionary search --TEXT_IO.PUT_LINE("Calling SEARCH_DICTIONARIES from PRUNE_STEMS --- General case"); SEARCH_DICTIONARIES(SSA(1..SSA_MAX), NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM); --TEXT_IO.PUT_LINE("Finished SEARCH_DICTIONARIES from PRUNE_STEMS --- General case"); end if; --TEXT_IO.PUT_LINE("PRUNE_STEMS passing over because of NOT DO_ONLY_FIXES"); -- --------------------------------------------------------------- --TEXT_IO.PUT_LINE("PRUNE_STEMS below NOT DO_ONLY_FIXES PA_LAST = " --& INTEGER'IMAGE(PA_LAST)); if (((PA_LAST = 0) and -- No Uniques or Syncope (PDL_INDEX = 0)) --) and then -- No dictionary match or WORDS_MDEV(DO_FIXES_ANYWAY)) and then WORDS_MODE(DO_FIXES) then ----So try prefixes and suffixes, Generate a new SAA array, search again --TEXT_IO.PUT_LINE(" PDL_INDEX = 0 after straight search ------ So APPLY_SUFFIX PA_LAST = " & INTEGER'IMAGE(PA_LAST)); if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix APPLY_PREFIX(SSA(1..SSA_MAX), NULL_SUFFIX_ITEM, SX, SXX, PA, PA_LAST); end if; -------------- if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix APPLY_SUFFIX(SSA(1..SSA_MAX), SX, SXX, PA, PA_LAST); if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix ----So try prefixes, Generate a new SAA array, search again --TEXT_IO.PUT_LINE(" PDL_INDEX = 0 after suffix search ----- So APPLY_PREFIX by itself PA_LAST = " & INTEGER'IMAGE(PA_LAST)); ----Need to use the new SSA, modified to include suffixes APPLY_PREFIX(SSA(1..SSA_MAX), NULL_SUFFIX_ITEM, SX, SXX, PA, PA_LAST); --TEXT_IO.PUT_LINE("PREFIXES applied PA_LAST = " & INTEGER'IMAGE(PA_LAST)); -------------- end if; -- Suffix failed end if; -- Suffix failed else --TEXT_IO.PUT_LINE(" PDL_INDEX not 0 after straight search ------ So REDUCE_STEMS PA_LAST = " & INTEGER'IMAGE(PA_LAST)); REDUCE_STEM_LIST(SX, SXX, NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM); if PA_LAST = 0 and then SXX(1) = NULL_PARSE_RECORD then --TEXT_IO.PUT_LINE("Although PDL_INDEX not 0 after straight search , SXX fails PA_LAST = " & INTEGER'IMAGE(PA_LAST)); -------------- if WORDS_MODE(DO_FIXES) then APPLY_SUFFIX(SSA(1..SSA_MAX), SX, SXX, PA, PA_LAST); --TEXT_IO.PUT_LINE("SUFFIXES applied PA_LAST = " & INTEGER'IMAGE(PA_LAST)); if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix ----So try prefixes, Generate a new SAA array, search again --TEXT_IO.PUT_LINE(" PDL_INDEX = 0 after suffix search ----- So APPLY_PREFIX by itself PA_LAST = " & INTEGER'IMAGE(PA_LAST)); ----Need to use the new SSA, modified to include suffixes APPLY_PREFIX(SSA(1..SSA_MAX), NULL_SUFFIX_ITEM, SX, SXX, PA, PA_LAST); --TEXT_IO.PUT_LINE("PREFIXES applied PA_LAST = " & INTEGER'IMAGE(PA_LAST)); -------------- end if; -- Suffix failed end if; -- If DO_FIXES then do end if; -- First search passed but SXX null end if; -- First search failed --TEXT_IO.PUT_LINE("End of PRUNE_STEMS PA_LAST = " & INTEGER'IMAGE(PA_LAST)); end PRUNE_STEMS; procedure PROCESS_PACKONS(INPUT_WORD : STRING; KEY : STEM_KEY_TYPE := 0) is STEM_LENGTH : INTEGER := 0; PR : PARSE_RECORD; M : INTEGER := 1; DE : DICTIONARY_ENTRY; MEAN : MEANING_TYPE; PACKON_FIRST_HIT : BOOLEAN := FALSE; SL, SL_NULLS : SAL := (others => NULL_PARSE_RECORD); function "<=" (LEFT, RIGHT : PRONOUN_KIND_TYPE) return BOOLEAN is begin if (RIGHT = LEFT or else RIGHT = X) then return TRUE; elsif (RIGHT = ADJECT and -- Just for PACK LEFT = INDEF) then return TRUE; else return FALSE; end if; end "<="; begin OVER_PACKONS: for K in PACKONS'RANGE loop -- Do whole set, more than one may apply --TEXT_IO.PUT_LINE("OVER_PACKONS K = "& INTEGER'IMAGE(K) & " PACKON = " & PACKONS(K).TACK); -- PACKON if the TACKON ENTRY is PRON FOR_EACH_PACKON: declare XWORD : constant STRING := SUBTRACT_TACKON(INPUT_WORD, PACKONS(K)); WORD : STRING(1..XWORD'LENGTH) := XWORD; PACKON_LENGTH : constant INTEGER := TRIM(PACKONS(K).TACK)'LENGTH; LAST_OF_WORD : CHARACTER := WORD(WORD'LAST); LENGTH_OF_WORD : constant INTEGER := WORD'LENGTH; begin --PUT_LINE("FOR_EACH_PACKON WORD = |"& WORD & "|"); --PUT_LINE("FOR_EACH_PACKON PACKON_LENGTH = "& INTEGER'IMAGE(PACKON_LENGTH)); --PUT_LINE("FOR_EACH_PACKON LENGTH_OF_WORD = "& INTEGER'IMAGE(LENGTH_OF_WORD)); SL := SL_NULLS; -- Initialize SL to nulls if WORD /= INPUT_WORD then --PUT_LINE("PROCESS_PACKONS Hit on PACKON " & PACKONS(K).TACK); PACKON_FIRST_HIT := TRUE; if PACKONS(K).TACK(1..3) = "dam" and LAST_OF_WORD = 'n' then WORD(WORD'LAST) := 'm'; -- Takes care of the m - > n shift with dam LAST_OF_WORD := 'm'; --PUT_LINE("PACKON = dam and LAST_OF_WORD = n => " & WORD); end if; -- No blank endings in these pronouns LEL_SECTION_IO.READ(INFLECTIONS_SECTIONS_FILE, LEL, 4); M := 0; ON_INFLECTS: for Z in reverse 1..MIN(6, LENGTH_OF_WORD) loop -- optimum for qu-pronouns --PUT("ON_INFLECTS Z = "); PUT(Z); PUT(" "); PUT(WORD(1..Z)); NEW_LINE; if PELL(Z, LAST_OF_WORD) > 0 then -- Any possible inflections at all for I in PELF(Z, LAST_OF_WORD)..PELL(Z, LAST_OF_WORD) loop --PUT("+");PUT(LEL(I)); PUT(WORD'LAST); PUT(WORD(WORD'LAST-Z+1..WORD'LAST)); --PUT(" "); PUT((LEL(I).ENDING.SUF(1..Z))); NEW_LINE; if (Z <= LENGTH_OF_WORD) and then ((EQU(LEL(I).ENDING.SUF(1..Z), WORD(WORD'LAST-Z+1..WORD'LAST))) and (LEL(I).QUAL.PRON.DECL <= PACKONS(K).ENTR.BASE.PACK.DECL)) then -- Have found an ending that is a possible match -- And INFLECT agrees with PACKON.BASE --PUT_LINE("INFLECTS HIT ------------------------------------------------------"); -- Add to list of possible ending records STEM_LENGTH := WORD'LENGTH - Z; PR := (HEAD(WORD(WORD'FIRST..STEM_LENGTH), MAX_STEM_SIZE), LEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC); M := M + 1; SL(M) := PR; SSA(1) := HEAD(WORD(WORD'FIRST.. WORD'FIRST+STEM_LENGTH-1), MAX_STEM_SIZE); --PUT_LINE("STEM_LENGTH = " & INTEGER'IMAGE(STEM_LENGTH)); --PUT_LINE("SSA(1) in PACKONS from real INFLECTS ->" & SSA(1) & '|'); -- may get set several times end if; end loop; end if; end loop ON_INFLECTS; -- Only one stem will emerge PDL_INDEX := 0; SEARCH_DICTIONARIES(SSA(1..1), NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM, PACK_ONLY); -- Now have a PDL, scan for agreement PDL_LOOP: for J in 1..PDL_INDEX loop -- Go through all dictionary hits to see --PUT_LINE("PACKON PDL_INDEX "); PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); NEW_LINE; -- M used here wher I is used in REDUCE, maybe make consistent M := 1; SL_LOOP: while SL(M) /= NULL_PARSE_RECORD loop -- Over all inflection hits -- if this stem is possible -- call up the meaning to check for "(w/-" DICT_IO.SET_INDEX(DICT_FILE(PDL(J).D_K), PDL(J).DS.MNPC); DICT_IO.READ(DICT_FILE(PDL(J).D_K), DE); MEAN := DE.MEAN; if (TRIM(MEAN)(1..4) = "(w/-" and then -- Does attached PACKON agree TRIM(MEAN)(5..4+PACKON_LENGTH) = TRIM(PACKONS(K).TACK)) then --PUT_LINE("Mean = PACK Hit Hit "); --PUT("MEAN|" & MEAN(1..4) & '|'); --PUT_LINE("PACK|" & TRIM(PACKONS(K).TACK) & '|'); --PUT("DECL PDL_INDEX "); PUT(PDL(J).DS.PART.PACK.DECL); --PUT(" <= ? SL "); --PUT(SL(M).IR.QUAL.PRON.DECL); --PUT(" <= ? PACKON "); --PUT(PACKONS(K).ENTR.BASE.PACK.DECL); NEW_LINE; if (PDL(J).DS.PART.PACK.DECL = SL(M).IR.QUAL.PRON.DECL) then -- or --PUT_LINE("DECL Hit Hit Hit Hit Hit Hit Hit Hit "); --PUT("KINDS PACKON "); --PUT(PACKONS(K).ENTR.BASE.PACK.KIND); --PUT(" <= ? PDL_KIND "); --PUT(PDL(J).DS.PART.PACK.KIND); --NEW_LINE; --if (PACKONS(K).ENTR.BASE.PACK.KIND <= -- PDL(J).DS.PART.PACK.KIND) then -- Then we have a hit and make a PA --PUT_LINE("KIND Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit "); if PACKON_FIRST_HIT then PA_LAST := PA_LAST + 1; PA(PA_LAST) := (PACKONS(K).TACK, ((TACKON, NULL_TACKON_RECORD), 0, NULL_ENDING_RECORD, X, X), ADDONS, DICT_IO.COUNT((PACKONS(K).MNPC))); PACKON_FIRST_HIT := FALSE; end if; PA_LAST := PA_LAST + 1; --PUT_LINE("PACKON PDL HIT PA_LAST = " & INTEGER'IMAGE(PA_LAST)); --PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); NEW_LINE; --PUT_LINE(MEAN); --PUT(PA(PA_LAST)); NEW_LINE; PA(PA_LAST) := (STEM => SL(M).STEM, IR => ( QUAL => ( POFS => PRON, PRON => ( PDL(J).DS.PART.PACK.DECL, SL(M).IR.QUAL.PRON.CS, SL(M).IR.QUAL.PRON.NUMBER, SL(M).IR.QUAL.PRON.GENDER )), KEY => SL(M).IR.KEY, ENDING => SL(M).IR.ENDING, AGE => SL(M).IR.AGE, FREQ => SL(M).IR.FREQ), D_K => PDL(J).D_K, MNPC => PDL(J).DS.MNPC); --end if; end if; end if; M := M + 1; end loop SL_LOOP; end loop PDL_LOOP; end if; end FOR_EACH_PACKON; PACKON_FIRST_HIT := FALSE; end loop OVER_PACKONS; end PROCESS_PACKONS; procedure PROCESS_QU_PRONOUNS(INPUT_WORD : STRING; QKEY : STEM_KEY_TYPE := 0) is WORD : constant STRING := LOWER_CASE(TRIM(INPUT_WORD)); LAST_OF_WORD : constant CHARACTER := WORD(WORD'LAST); LENGTH_OF_WORD : constant INTEGER := WORD'LENGTH; STEM_LENGTH : INTEGER := 0; M : INTEGER := 0; PR : PARSE_RECORD; SL : SAL := (others => NULL_PARSE_RECORD); begin --TEXT_IO.PUT_LINE("PROCESS_QU_PRONOUNS " & INPUT_WORD); -- No blank endings in these pronouns LEL_SECTION_IO.READ(INFLECTIONS_SECTIONS_FILE, LEL, 4); -- M used here while I is used in REDUCE, maybe make consistent M := 0; ON_INFLECTS: for Z in reverse 1..MIN(4, LENGTH_OF_WORD) loop -- optimized for qu-pronouns --PUT("ON_INFLECTS "); PUT(Z); PUT(" "); PUT(LAST_OF_WORD); NEW_LINE; if PELL(Z, LAST_OF_WORD) > 0 then -- Any possible inflections at all for I in PELF(Z, LAST_OF_WORD)..PELL(Z, LAST_OF_WORD) loop --PUT(LEL(I)); PUT(WORD'LAST); PUT(WORD'LAST-Z+1); NEW_LINE; if (Z <= LENGTH_OF_WORD) and then LEL(I).KEY = QKEY and then EQU(LEL(I).ENDING.SUF(1..Z), WORD(WORD'LAST-Z+1..WORD'LAST)) then -- Have found an ending that is a possible match --PUT_LINE("INFLECTS HIT --------------------------------------------"); -- Add to list of possible ending records STEM_LENGTH := WORD'LENGTH - Z; PR := (HEAD(WORD(WORD'FIRST..STEM_LENGTH), MAX_STEM_SIZE), LEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC); M := M + 1; SL(M) := PR; --PUT("M = "); PUT(M); PUT(" "); PUT(SL(M)); NEW_LINE; SSA(1) := HEAD(WORD(WORD'FIRST.. WORD'FIRST+STEM_LENGTH-1), MAX_STEM_SIZE); -- may get set several times end if; end loop; end if; end loop ON_INFLECTS; -- Only one stem will emerge PDL_INDEX := 0; SEARCH_DICTIONARIES(SSA(1..1), NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM, QU_PRON_ONLY); -- Now have a PDL, scan for agreement PDL_LOOP: for J in 1..PDL_INDEX loop -- Go through all dictionary hits to see M := 1; SL_LOOP: while SL(M) /= NULL_PARSE_RECORD loop -- Over all inflection hits --PUT("SL_LOOP M = "); PUT(M); PUT(" SL => "); PUT(SL(M)); NEW_LINE; NEW_LINE; --PUT("DECL PDL "); PUT(PDL(J).DS.PART.PRON.DECL); --PUT(" <= ? SL "); --PUT(SL(M).IR.QUAL.PRON.DECL); --NEW_LINE; if (PDL(J).DS.PART.PRON.DECL = SL(M).IR.QUAL.PRON.DECL) then --PUT_LINE("DECL Hit Hit Hit Hit Hit Hit Hit Hit "); PA_LAST := PA_LAST + 1; --PUT_LINE("QU_PRON PDL HIT PA_LAST = " & INTEGER'IMAGE(PA_LAST)); --PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); NEW_LINE; --PUT(PA(PA_LAST)); NEW_LINE; PA(PA_LAST) := (STEM => SL(M).STEM, IR => ( QUAL => ( POFS => PRON, PRON => ( PDL(J).DS.PART.PRON.DECL, SL(M).IR.QUAL.PRON.CS, SL(M).IR.QUAL.PRON.NUMBER, SL(M).IR.QUAL.PRON.GENDER )), KEY => SL(M).IR.KEY, ENDING => SL(M).IR.ENDING, AGE => SL(M).IR.AGE, FREQ => SL(M).IR.FREQ), D_K => PDL(J).D_K, MNPC => PDL(J).DS.MNPC); end if; M := M + 1; end loop SL_LOOP; -- PDL:= PDL.SUCC; end loop PDL_LOOP; end PROCESS_QU_PRONOUNS; procedure TRY_TACKONS(INPUT_WORD : STRING) is TACKON_HIT : BOOLEAN := FALSE; TACKON_ON : BOOLEAN := FALSE; TACKON_LENGTH : constant INTEGER := 0; J : INTEGER := 0; DE : DICTIONARY_ENTRY := NULL_DICTIONARY_ENTRY; MEAN : MEANING_TYPE := NULL_MEANING_TYPE; ENTERING_PA_LAST : INTEGER := PA_LAST; START_OF_LOOP : INTEGER := 5; -- 4 enclitics -- Hard number !!!!!!!!!!!!!!! END_OF_LOOP : INTEGER := NUMBER_OF_TACKONS; begin --TEXT_IO.PUT_LINE("TRYing TACKONS *************** INPUT_WORD = " & INPUT_WORD); LOOP_OVER_TACKONS: for I in START_OF_LOOP..END_OF_LOOP loop --PUT_LINE("TACKON #" & INTEGER'IMAGE(I) & " " & --SUBTRACT_TACKON(INPUT_WORD, TACKONS(I)) & " + " & TACKONS(I).TACK); REMOVE_A_TACKON: declare LESS : constant STRING := SUBTRACT_TACKON(INPUT_WORD, TACKONS(I)); begin --TEXT_IO.PUT_LINE("LESS = " & LESS); if LESS /= INPUT_WORD then -- LESS is less --========================================================== --RUN_UNIQUES(INPUT_WORD, UNIQUE_FOUND, PA, PA_LAST); --RUN_INFLECTIONS(LESS, SS); --PRUNE_STEMS(LESS, SS, SSS); --if SSS(1) /= NULL_PARSE_RECORD then --ORDER_STEMS(SSS); --ARRAY_STEMS(SSS, PA, PA_LAST); --SSS(1) := NULL_PARSE_RECORD; --end if; --========================================================== WORD(LESS, PA, PA_LAST); -- TEXT_IO.PUT("In TRY_TACKONS Left WORD "); -- TEXT_IO.PUT("PA_LAST = "); TEXT_IO.PUT(INTEGER'IMAGE(PA_LAST)); TEXT_IO.PUT(" "); -- TEXT_IO.PUT(TACKONS(I).TACK); -- TEXT_IO.NEW_LINE; ----------------------------------------- if PA_LAST > ENTERING_PA_LAST then -- we have a possible word --TEXT_IO.PUT("I = " & INTEGER'IMAGE(I) & " " & TACKONS(I).TACK & " TACKONS(I).ENTR.BASE.PART = "); --PART_OF_SPEECH_TYPE_IO.PUT(TACKONS(I).ENTR.BASE.PART); TEXT_IO.NEW_LINE; if TACKONS(I).ENTR.BASE.POFS = X then -- on PART (= X?) --PUT("TACKON X found "); PUT( TACKONS(I).TACK); NEW_LINE; --PUT("PA_LAST = "); PUT(PA_LAST); PUT(" "); --PUT("TACKON MNPC "); PUT( TACKONS(I).MNPC); NEW_LINE; TACKON_HIT := TRUE; --PUT("TACKON_HIT = "); PUT(TACKON_HIT); NEW_LINE; TACKON_ON := FALSE; else J := PA_LAST; while J >= ENTERING_PA_LAST+1 loop -- Sweep backwards over PA -- Sweeping up inapplicable fixes, -- although we only have TACKONs for X or PRON or ADJ - so far -- and there are no fixes for PRON - so far --TEXT_IO.PUT("J = " & INTEGER'IMAGE(J) & " PA(J).IR.QUAL = "); --QUALITY_RECORD_IO.PUT(PA(J).IR.QUAL); --TEXT_IO.NEW_LINE; if ((PA(J).IR.QUAL.POFS = PREFIX) and then (TACKON_ON)) then null; -- check PART TACKON_ON := FALSE; elsif ((PA(J).IR.QUAL.POFS = SUFFIX) and then (TACKON_ON)) then -- check PART null; TACKON_ON := FALSE; elsif PA(J).IR.QUAL.POFS = TACKONS(I).ENTR.BASE.POFS then DICT_IO.SET_INDEX(DICT_FILE(PA(J).D_K), PA(J).MNPC); DICT_IO.READ(DICT_FILE(PA(J).D_K), DE); MEAN := DE.MEAN; --TEXT_IO.PUT("J = " & INTEGER'IMAGE(J) & " PA(J).IR.QUAL = "); --QUALITY_RECORD_IO.PUT(PA(J).IR.QUAL); --TEXT_IO.NEW_LINE; -- check PART case TACKONS(I).ENTR.BASE.POFS is when N => if (PA(J).IR.QUAL.N.DECL <= TACKONS(I).ENTR.BASE.N.DECL) then -- Ignore GEN and KIND TACKON_HIT := TRUE; TACKON_ON := TRUE; end if; when PRON => -- Only one we have other than X --PUT("TACK/PA DECL "); PUT(PA(J).IR.QUAL.PRON.DECL); PUT(" - "); --PUT(TACKONS(I).ENTR.BASE.PRON.DECL); NEW_LINE; --PUT("TACK/PA KIND "); PUT(PA(J).IR.QUAL.PRON.KIND); PUT(" - "); --PUT(TACKONS(I).ENTR.BASE.PRON.KIND); NEW_LINE; if PA(J).IR.QUAL.PRON.DECL <= TACKONS(I).ENTR.BASE.PRON.DECL --and then --PA(J).IR.QUAL.PRON.KIND <= --TACKONS(I).ENTR.BASE.PRON.KIND then --PUT("TACKON PRON found HIT "); PUT( TACKONS(I).TACK); NEW_LINE; TACKON_HIT := TRUE; TACKON_ON := TRUE; else PA(J..PA_LAST-1) := PA(J+1..PA_LAST); PA_LAST := PA_LAST - 1; end if; when ADJ => -- Forego all checks, even on DECL of ADJ -- -cumque is the only one I have now -- if ....... TACKON_HIT := TRUE; TACKON_ON := TRUE; -- else -- PA(J..PA_LAST-1) := PA(J+1..PA_LAST); -- PA_LAST := PA_LAST - 1; -- end if; --when ADV => --when V => when others => PA(J..PA_LAST-1) := PA(J+1..PA_LAST); PA_LAST := PA_LAST - 1; end case; else -- check PART PA(J..PA_LAST-1) := PA(J+1..PA_LAST); PA_LAST := PA_LAST - 1; --PUT("J failed J & PA_LAST = "); PUT(J); PUT(" "); PUT(PA_LAST); NEW_LINE; end if; -- check PART J := J - 1; end loop; -- loop sweep over PA end if; -- on PART (= X?) --PUT_LINE("End if on PART = X ?"); ----------------------------------------- if TACKON_HIT then --PUT("Where it counts TACKON_HIT = "); PUT(TACKON_HIT); NEW_LINE; -- Put on TACKON PA_LAST := PA_LAST + 1; PA(ENTERING_PA_LAST+2..PA_LAST) := PA(ENTERING_PA_LAST+1..PA_LAST-1); PA(ENTERING_PA_LAST+1) := (TACKONS(I).TACK, ((TACKON, NULL_TACKON_RECORD), 0, NULL_ENDING_RECORD, X, X), ADDONS, DICT_IO.COUNT((TACKONS(I).MNPC))); --PUT("PA_LAST = "); PUT(PA_LAST); PUT(" "); --PUT("I = "); PUT(I); PUT(" TACKONS(I).TACK = "); PUT(TACKONS(I).TACK); --PUT_LINE("TACKON added"); --PUT_LINE("Now list the PA array after adding the found TACKON"); --for K in 1..PA_LAST loop --PUT("K = "); PUT(K); PUT(" PA(K) "); PUT(PA(K).D_K); PUT(PA(K).IR); NEW_LINE; --end loop; return; -- Be happy with one ??????? else null; --PUT("No TACKON_HIT, so no punitive TACKON PA_LAST is "); --PUT(PA_LAST); NEW_LINE; end if; -- TACKON_HIT end if; -- we have a possible word ----------------------------------------- end if; -- LESS is less end REMOVE_A_TACKON; end loop LOOP_OVER_TACKONS; --PUT_LINE("LEAVING TACKONS ******************************************* "); end TRY_TACKONS; begin -- WORD --TEXT_IO.PUT_LINE("Starting WORD INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST)); if TRIM(INPUT_WORD) = "" then return; end if; RUN_UNIQUES(INPUT_WORD, UNIQUE_FOUND, PA, PA_LAST); --if INPUT_WORD(INPUT_WORD'FIRST) in 'a'..'z' then -- CONSTRUCT_STEMS(INPUT_WORD, -- SEARCH_DICTIONARIES(); -- TRY_STEMS_AGAINST_INFLECTIONS; -- end if; --if INPUT_WORD(INPUT_WORD'FIRST) in 'a'..'z' then --TEXT_IO.PUT_LINE("After UNIQUES INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST)); QU: declare PA_QSTART : INTEGER := PA_LAST; PA_START : INTEGER := PA_LAST; SAVED_MODE_ARRAY : MODE_ARRAY := WORDS_MODE; QKEY : STEM_KEY_TYPE := 0; begin -- QU TICKONS(NUMBER_OF_TICKONS+1) := NULL_PREFIX_ITEM; WORDS_MODE := (others => FALSE); for I in 1..NUMBER_OF_TICKONS+1 loop declare Q_WORD : constant STRING := TRIM(SUBTRACT_TICKON(INPUT_WORD, TICKONS(I))); begin PA_LAST := PA_QSTART; PA(PA_LAST+1) := NULL_PARSE_RECORD; if (I = NUMBER_OF_TICKONS + 1) or else -- The prefix is a TICKON (Q_WORD /= INPUT_WORD) then -- and it matches the start of INPUT_WORD if I <= NUMBER_OF_TICKONS then -- Add to PA if --TEXT_IO.PUT_LINE("ADDING TICKON PA " & TICKONS(I).FIX); PA_LAST := PA_LAST + 1; -- So add prefix line to parse array PA(PA_LAST).STEM := HEAD(TICKONS(I).FIX, MAX_STEM_SIZE); PA(PA_LAST).IR := ((PREFIX, NULL_PREFIX_RECORD), 0, NULL_ENDING_RECORD, X, X); PA(PA_LAST).D_K := ADDONS; PA(PA_LAST).MNPC := DICT_IO.COUNT(TICKONS(I).MNPC); end if; if Q_WORD'LENGTH >= 3 and then -- qui is shortest QU_PRON ((Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "qu") or (Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "cu")) then if Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "qu" then QKEY := 1; PROCESS_QU_PRONOUNS(Q_WORD, QKEY); elsif Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "cu" then QKEY := 2; PROCESS_QU_PRONOUNS(Q_WORD, QKEY); end if; if PA_LAST <= PA_QSTART + 1 and then QKEY > 0 then -- If did not find a PACKON if Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "qu" then PROCESS_PACKONS(Q_WORD, QKEY); elsif Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "cu" then PROCESS_PACKONS(Q_WORD, QKEY); end if; else exit; end if; if PA_LAST > PA_QSTART + 1 then exit; end if; elsif INPUT_WORD'LENGTH >= 6 then -- aliqui as aliQU_PRON if INPUT_WORD(INPUT_WORD'FIRST..INPUT_WORD'FIRST+4) = "aliqu" then PROCESS_QU_PRONOUNS(INPUT_WORD, 1); elsif INPUT_WORD(INPUT_WORD'FIRST..INPUT_WORD'FIRST+4) = "alicu" then PROCESS_QU_PRONOUNS(INPUT_WORD, 2); end if; end if; if PA_LAST = PA_START + 1 then -- Nothing found PA_LAST := PA_START; -- Reset PA_LAST else exit; end if; end if; end; end loop; WORDS_MODE := SAVED_MODE_ARRAY; exception when others => WORDS_MODE := SAVED_MODE_ARRAY; end QU; --========================================================== RUN_INFLECTIONS(INPUT_WORD, SS); PRUNE_STEMS(INPUT_WORD, SS, SSS); --TEXT_IO.PUT_LINE("After PRUNE INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST)); if SSS(1) /= NULL_PARSE_RECORD then ORDER_STEMS(SSS); ARRAY_STEMS(SSS, PA, PA_LAST); SSS(1) := NULL_PARSE_RECORD; end if; --TEXT_IO.PUT_LINE("After ARRAY INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST)); --========================================================== if PA_LAST = PA_SAVE then TRY_TACKONS(INPUT_WORD); end if; --TEXT_IO.PUT_LINE("Out WORD INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST)); --TEXT_IO.SET_OUTPUT(TEXT_IO.STANDARD_OUTPUT); exception when STORAGE_ERROR => TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, "STORAGE_ERROR exception in WORD while processing =>" & RAW_WORD); PA_LAST := PA_SAVE; if WORDS_MODE(WRITE_UNKNOWNS_TO_FILE) then TEXT_IO.PUT(UNKNOWNS, RAW_WORD); TEXT_IO.SET_COL(UNKNOWNS, 21); TEXT_IO.PUT_LINE(UNKNOWNS, "======== STORAGE_ERROR "); end if; when others => if WORDS_MODE(WRITE_UNKNOWNS_TO_FILE) then TEXT_IO.PUT(UNKNOWNS, RAW_WORD); TEXT_IO.SET_COL(UNKNOWNS, 21); TEXT_IO.PUT_LINE(UNKNOWNS, "======== ERROR "); end if; PA_LAST := PA_SAVE; end WORD; procedure INITIALIZE_WORD_PACKAGE is begin -- Initializing WORD_PACKAGE ESTABLISH_INFLECTIONS_SECTION; LEL_SECTION_IO.OPEN(INFLECTIONS_SECTIONS_FILE, LEL_SECTION_IO.IN_FILE, INFLECTIONS_SECTIONS_NAME); TRY_TO_LOAD_DICTIONARY(GENERAL); TRY_TO_LOAD_DICTIONARY(SPECIAL); LOAD_LOCAL: begin -- First check if there is a LOC dictionary CHECK_FOR_LOCAL_DICTIONARY: declare DUMMY : TEXT_IO.FILE_TYPE; begin TEXT_IO.OPEN(DUMMY, TEXT_IO.IN_FILE, ADD_FILE_NAME_EXTENSION(DICTIONARY_FILE_NAME, "LOCAL")); -- Failure to OPEN will raise an exception, to be handled below TEXT_IO.CLOSE(DUMMY); end CHECK_FOR_LOCAL_DICTIONARY; -- If the above does not exception out, we can load LOC PREFACE.PUT("LOCAL "); DICT_LOC := NULL_DICTIONARY; LOAD_DICTIONARY(DICT_LOC, ADD_FILE_NAME_EXTENSION(DICTIONARY_FILE_NAME, "LOCAL")); -- Need to carry LOC through consistently on LOAD_D and LOAD_D_FILE LOAD_STEM_FILE(LOCAL); DICTIONARY_AVAILABLE(LOCAL) := TRUE; exception when others => DICTIONARY_AVAILABLE(LOCAL) := FALSE; end LOAD_LOCAL; LOAD_UNIQUES(UNQ, UNIQUES_FULL_NAME); LOAD_ADDONS(ADDONS_FULL_NAME); --TEXT_IO.PUT_LINE("Loaded ADDONS"); LOAD_BDL_FROM_DISK; --TEXT_IO.PUT_LINE("BDL loaded"); if not (DICTIONARY_AVAILABLE(GENERAL) or DICTIONARY_AVAILABLE(SPECIAL) or DICTIONARY_AVAILABLE(LOCAL)) then PREFACE.PUT_LINE("There are no main dictionaries - program will not do much"); PREFACE.PUT_LINE("Check that there are dictionary files in this subdirectory"); PREFACE.PUT_LINE("Except DICT.LOC that means DICTFILE, INDXFILE, STEMFILE"); end if; --TEXT_IO.PUT_LINE("Ready to load English"); TRY_TO_LOAD_ENGLISH_WORDS: begin ENGLISH_DICTIONARY_AVAILABLE(GENERAL) := FALSE; EWDS_DIRECT_IO.OPEN(EWDS_FILE, EWDS_DIRECT_IO.IN_FILE, "EWDSFILE.GEN"); ENGLISH_DICTIONARY_AVAILABLE(GENERAL) := TRUE; exception when others => PREFACE.PUT_LINE("No English available"); ENGLISH_DICTIONARY_AVAILABLE(GENERAL) := FALSE; end TRY_TO_LOAD_ENGLISH_WORDS; --put_line("WORD_PACKAGE INITIALIZED"); end INITIALIZE_WORD_PACKAGE; end WORD_PACKAGE;