2227 lines
101 KiB
Ada
2227 lines
101 KiB
Ada
with TEXT_IO;
|
|
with LATIN_FILE_NAMES; use LATIN_FILE_NAMES;
|
|
with STRINGS_PACKAGE; use STRINGS_PACKAGE;
|
|
with CONFIG; use CONFIG;
|
|
with UNIQUES_PACKAGE; use UNIQUES_PACKAGE;
|
|
with ADDONS_PACKAGE; use ADDONS_PACKAGE;
|
|
with WORD_PARAMETERS; use WORD_PARAMETERS;
|
|
with PREFACE;
|
|
with DEVELOPER_PARAMETERS; use DEVELOPER_PARAMETERS;
|
|
with LINE_STUFF; use LINE_STUFF;
|
|
with ENGLISH_SUPPORT_PACKAGE; use ENGLISH_SUPPORT_PACKAGE;
|
|
package body WORD_PACKAGE is
|
|
|
|
INFLECTIONS_SECTIONS_FILE : LEL_SECTION_IO.FILE_TYPE;
|
|
|
|
procedure PAUSE(OUTPUT : TEXT_IO.FILE_TYPE) is
|
|
use CONFIG;
|
|
PAUSE_LINE : STRING(1..300);
|
|
PAUSE_LAST : INTEGER := 0;
|
|
begin
|
|
if WORDS_MDEV(PAUSE_IN_SCREEN_OUTPUT) then
|
|
if METHOD = INTERACTIVE then
|
|
if TEXT_IO.NAME(OUTPUT) =
|
|
-- TEXT_IO.NAME(TEXT_IO.STANDARD_OUTPUT) then
|
|
-- TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT,
|
|
-- " MORE - hit RETURN/ENTER to continue");
|
|
-- TEXT_IO.GET_LINE(TEXT_IO.STANDARD_INPUT, PAUSE_LINE, PAUSE_LAST);
|
|
TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, "");
|
|
end if;
|
|
elsif METHOD = COMMAND_LINE_INPUT then
|
|
TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT, "");
|
|
-- TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT,
|
|
-- " MORE - hit RETURN/ENTER to continue");
|
|
-- TEXT_IO.GET_LINE(TEXT_IO.STANDARD_INPUT, PAUSE_LINE, PAUSE_LAST);
|
|
elsif METHOD = COMMAND_LINE_FILES then
|
|
null; -- Do not PAUSE
|
|
end if;
|
|
end if;
|
|
exception
|
|
when others =>
|
|
TEXT_IO.PUT_LINE("Unexpected exception in PAUSE");
|
|
end PAUSE;
|
|
|
|
function MIN(A, B : INTEGER) return INTEGER is
|
|
begin
|
|
if A <= B then
|
|
return A; end if;
|
|
return B;
|
|
end MIN;
|
|
|
|
|
|
function LTU(C, D : CHARACTER) return BOOLEAN is
|
|
begin
|
|
if (D = 'v') then
|
|
if (C < 'u') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif (D = 'j') then
|
|
if (C < 'i') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif (D = 'V') then
|
|
if (C < 'U') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif (D = 'J') then
|
|
if (C < 'I') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
else
|
|
return C < D;
|
|
end if;
|
|
end LTU;
|
|
|
|
function EQU(C, D : CHARACTER) return BOOLEAN is
|
|
begin
|
|
if (D = 'u') or (D = 'v') then
|
|
if (C = 'u') or (C = 'v') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif (D = 'i') or (D = 'j') then
|
|
if (C = 'i') or (C = 'j') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif (D = 'U') or (D = 'V') then
|
|
if (C = 'U') or (C = 'V') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif (D = 'I') or (D = 'J') then
|
|
if (C = 'I') or (C = 'J') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
else
|
|
return C = D;
|
|
end if;
|
|
end EQU;
|
|
|
|
function GTU(C, D : CHARACTER) return BOOLEAN is
|
|
begin
|
|
if D = 'u' then
|
|
if (C > 'v') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif D = 'i' then
|
|
if (C > 'j') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif D = 'U' then
|
|
if (C > 'V') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
elsif D = 'I' then
|
|
if (C > 'J') then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
else
|
|
return C > D;
|
|
end if;
|
|
end GTU;
|
|
|
|
function LTU(S, T : STRING) return BOOLEAN is
|
|
begin
|
|
for I in 1..S'LENGTH loop -- Not TRIMed, so same length
|
|
if EQU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
null;
|
|
elsif GTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
return FALSE;
|
|
elsif LTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
return TRUE;
|
|
end if;
|
|
end loop;
|
|
return FALSE;
|
|
end LTU;
|
|
|
|
function GTU(S, T : STRING) return BOOLEAN is
|
|
begin
|
|
for I in 1..S'LENGTH loop -- Not TRIMed, so same length
|
|
if EQU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
null;
|
|
elsif LTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
return FALSE;
|
|
elsif GTU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
return TRUE;
|
|
end if;
|
|
end loop;
|
|
return FALSE;
|
|
end GTU;
|
|
|
|
|
|
function EQU(S, T : STRING) return BOOLEAN is
|
|
begin
|
|
if S'LENGTH /= T'LENGTH then
|
|
return FALSE;
|
|
end if;
|
|
|
|
for I in 1..S'LENGTH loop
|
|
if not EQU(S(S'FIRST+I-1), T(T'FIRST+I-1)) then
|
|
return FALSE;
|
|
end if;
|
|
end loop;
|
|
|
|
return TRUE;
|
|
end EQU;
|
|
|
|
|
|
procedure RUN_UNIQUES(S : in STRING; UNIQUE_FOUND : out BOOLEAN;
|
|
PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is
|
|
SL : constant STRING -- BAD NAME!!!!!!!!!!!!!!!!!!
|
|
:= LOWER_CASE(TRIM(S));
|
|
ST : constant STEM_TYPE := HEAD(SL, MAX_STEM_SIZE);
|
|
UNQL : UNIQUE_LIST; -- Unique list for a letter
|
|
begin
|
|
UNIQUE_FOUND := FALSE;
|
|
if SL(SL'FIRST) = 'v' then
|
|
UNQL := UNQ('u'); -- Unique list for a letter
|
|
elsif SL(SL'FIRST) = 'j' then
|
|
UNQL := UNQ('i'); -- Unique list for a letter
|
|
else
|
|
UNQL := UNQ(SL(SL'FIRST)); -- Unique list for a letter
|
|
end if;
|
|
|
|
--TEXT_IO.NEW_LINE;
|
|
--TEXT_IO.PUT_LINE("Called UNIQUES with =>" & SL & "|");
|
|
|
|
--TEXT_IO.NEW_LINE;
|
|
--TEXT_IO.PUT_LINE("UNQL ");
|
|
|
|
while UNQL /= null loop
|
|
-- If there is a match, add to PA
|
|
--TEXT_IO.PUT_LINE("UNIQUE =>" & UNQL.PR.STEM);
|
|
--if ST = LOWER_CASE(UNQL.PR.STEM) then
|
|
if EQU(ST, LOWER_CASE(UNQL.STEM)) then
|
|
PA_LAST := PA_LAST + 1;
|
|
PA(PA_LAST) := (UNQL.STEM,
|
|
(UNQL.QUAL,
|
|
0,
|
|
NULL_ENDING_RECORD,
|
|
X,
|
|
X),
|
|
UNIQUE,
|
|
UNQL.MNPC);
|
|
|
|
--TEXT_IO.PUT_LINE("UNIQUE HIT *********" & INTEGER'IMAGE(PA_LAST));
|
|
UNIQUE_FOUND := TRUE;
|
|
end if;
|
|
UNQL := UNQL.SUCC;
|
|
end loop;
|
|
|
|
end RUN_UNIQUES;
|
|
|
|
|
|
procedure RUN_INFLECTIONS(S : in STRING; SL : in out SAL;
|
|
RESTRICTION : DICT_RESTRICTION := REGULAR) is
|
|
-- Trys all possible inflections against the input word in S
|
|
-- and constructs a STEM_LIST of those that survive SL
|
|
use LEL_SECTION_IO;
|
|
use INFLECTION_RECORD_IO;
|
|
WORD : constant STRING := LOWER_CASE(TRIM(S));
|
|
LAST_OF_WORD : constant CHARACTER := WORD(WORD'LAST);
|
|
LENGTH_OF_WORD : constant INTEGER := WORD'LENGTH;
|
|
STEM_LENGTH : INTEGER := 0;
|
|
PR : PARSE_RECORD;
|
|
M : INTEGER := 1;
|
|
|
|
begin
|
|
--TEXT_IO.NEW_LINE;
|
|
--TEXT_IO.PUT_LINE("Called RUN_INFLECTIONS with =>" & WORD & "|");
|
|
if WORD'LENGTH = 0 then
|
|
SL(M) := NULL_PARSE_RECORD;
|
|
return;
|
|
end if;
|
|
|
|
SA := NOT_A_STEM_ARRAY;
|
|
|
|
-- Add all of these to list of possible ending records
|
|
-- since the blank ending agrees with everything
|
|
-- PACK/PRON have no blank endings
|
|
if ((RESTRICTION /= PACK_ONLY) and (RESTRICTION /= QU_PRON_ONLY)) and then
|
|
(WORD'LENGTH <= MAX_STEM_SIZE) then
|
|
for I in BELF(0, ' ')..BELL(0, ' ') loop
|
|
PR := (WORD & NULL_STEM_TYPE(LENGTH_OF_WORD+1..STEM_TYPE'LENGTH),
|
|
BEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC);
|
|
SL(M) := PR;
|
|
M := M + 1;
|
|
|
|
end loop;
|
|
SA(LENGTH_OF_WORD) := PR.STEM; -- Is always a possibility (null ending)
|
|
|
|
end if;
|
|
|
|
-- Here we read in the INFLECTIONS_SECTION that is applicable
|
|
if RESTRICTION = REGULAR then
|
|
case LAST_OF_WORD is
|
|
when 'a' | 'c' | 'd' | 'e' | 'i' =>
|
|
READ(INFLECTIONS_SECTIONS_FILE, LEL, 1);
|
|
when 'm' | 'n' | 'o' | 'r' =>
|
|
READ(INFLECTIONS_SECTIONS_FILE, LEL, 2);
|
|
when 's' =>
|
|
READ(INFLECTIONS_SECTIONS_FILE, LEL, 3);
|
|
when 't' | 'u' =>
|
|
READ(INFLECTIONS_SECTIONS_FILE, LEL, 4);
|
|
when others =>
|
|
--PUT_LINE("Only blank inflections are found");
|
|
return;
|
|
end case;
|
|
elsif RESTRICTION = PACK_ONLY or RESTRICTION = QU_PRON_ONLY then
|
|
READ(INFLECTIONS_SECTIONS_FILE, LEL, 4);
|
|
end if;
|
|
|
|
|
|
-- Now do the non-blank endings -- Only go to LENGTH_OF_WORD
|
|
for Z in reverse 1..MIN(MAX_ENDING_SIZE, LENGTH_OF_WORD) loop
|
|
|
|
-- Check if Z agrees with a PDL SIZE !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
-- Maybe make PDL on size, if it has to be a list, or order by size if array
|
|
if LELL(Z, LAST_OF_WORD) > 0 then -- Any likely inflections at all
|
|
|
|
for I in LELF(Z, LAST_OF_WORD)..LELL(Z, LAST_OF_WORD) loop
|
|
if EQU(LOWER_CASE(LEL(I).ENDING.SUF(1..Z)),
|
|
LOWER_CASE(WORD(WORD'LAST-Z+1..WORD'LAST))) then
|
|
-- Add to list of possible ending records
|
|
--STEM_LENGTH := WORD'LENGTH - LEL(I).ENDING.SIZE;
|
|
STEM_LENGTH := WORD'LENGTH - Z;
|
|
--PUT(STEM_LENGTH);
|
|
--TEXT_IO.PUT_LINE("#######################################################");
|
|
|
|
if STEM_LENGTH <= MAX_STEM_SIZE then -- Reject too long words
|
|
-- Check if LEL IR agrees with PDL IR !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
PR := (WORD(WORD'FIRST..STEM_LENGTH) &
|
|
NULL_STEM_TYPE(STEM_LENGTH+1..MAX_STEM_SIZE),
|
|
LEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC);
|
|
SL(M) := PR;
|
|
M := M + 1;
|
|
|
|
SA(STEM_LENGTH) := PR.STEM; -- Gets set dozens of times
|
|
-- Could order the endings by length (suffix sort) so length changes slowly
|
|
|
|
--PUT_LINE("LENGTH = " & INTEGER'IMAGE(STEM_LENGTH)
|
|
--& " SA =>" & PR.STEM & "|");
|
|
|
|
end if;
|
|
end if;
|
|
end loop;
|
|
|
|
end if;
|
|
end loop;
|
|
--TEXT_IO.PUT_LINE("RUN_INF FOUND STEMS = " & INTEGER'IMAGE(M-1));
|
|
--exception
|
|
--when others =>
|
|
--TEXT_IO.PUT_LINE("exception RUN_INF FOUND STEMS = " & INTEGER'IMAGE(M-1));
|
|
--raise;
|
|
end RUN_INFLECTIONS;
|
|
|
|
|
|
procedure TRY_TO_LOAD_DICTIONARY(D_K : DICTIONARY_KIND) is
|
|
begin
|
|
-- PUT_LINE("Trying to load " & DICTIONARY_KIND'IMAGE(D_K) &
|
|
-- " dictionary from STEMFILE ");
|
|
|
|
STEM_IO.OPEN(STEM_FILE(D_K), STEM_IO.IN_FILE,
|
|
ADD_FILE_NAME_EXTENSION(STEM_FILE_NAME,
|
|
DICTIONARY_KIND'IMAGE(D_K)));
|
|
DICT_IO.OPEN(DICT_FILE(D_K), DICT_IO.IN_FILE,
|
|
ADD_FILE_NAME_EXTENSION(DICT_FILE_NAME,
|
|
DICTIONARY_KIND'IMAGE(D_K)));
|
|
LOAD_INDICES_FROM_INDX_FILE(ADD_FILE_NAME_EXTENSION(INDX_FILE_NAME,
|
|
DICTIONARY_KIND'IMAGE(D_K)), D_K);
|
|
DICTIONARY_AVAILABLE(D_K) := TRUE;
|
|
-- PUT_LINE("Successfully loaded " & DICTIONARY_KIND'IMAGE(D_K) &
|
|
-- " dictionary from STEMFILE ");
|
|
|
|
exception
|
|
when others =>
|
|
--PUT_LINE("Failed to load " & DICTIONARY_KIND'IMAGE(D_K) &
|
|
-- " dictionary from STEMFILE ");
|
|
DICTIONARY_AVAILABLE(D_K) := FALSE;
|
|
end TRY_TO_LOAD_DICTIONARY;
|
|
|
|
|
|
|
|
procedure DICTIONARY_SEARCH(SSA : STEM_ARRAY_TYPE;
|
|
PREFIX : PREFIX_ITEM;
|
|
SUFFIX : SUFFIX_ITEM;
|
|
D_K : DICTIONARY_KIND;
|
|
RESTRICTION : DICT_RESTRICTION := REGULAR) is
|
|
-- Prepares a PDL list of possible dictionary hits
|
|
-- Search a dictionary (D_K) looking for all stems that match
|
|
-- any of the stems that are physically possible with Latin inflections
|
|
use STEM_IO;
|
|
|
|
--type NAT_32 is range 0..2**31-1; --###############
|
|
J, J1, J2, JJ : STEM_IO.COUNT := 0;
|
|
|
|
INDEX_ON : constant STRING := SSA(SSA'LAST);
|
|
INDEX_FIRST, INDEX_LAST : STEM_IO.COUNT := 0;
|
|
DS : DICTIONARY_STEM;
|
|
FIRST_TRY, SECOND_TRY : BOOLEAN := TRUE;
|
|
|
|
|
|
function FIRST_TWO(W : STRING) return STRING is
|
|
-- 'v' could be represented by 'u', like the new Oxford Latin Dictionary
|
|
-- Fixes the first two letters of a word/stem which can be done right
|
|
S : constant STRING := LOWER_CASE(W);
|
|
SS : STRING(W'RANGE) := W;
|
|
|
|
function UI(C : CHARACTER) return CHARACTER is
|
|
begin
|
|
if (C = 'v') then
|
|
return 'u';
|
|
elsif (C = 'V') then
|
|
return 'U';
|
|
elsif (C = 'j') then
|
|
return 'i';
|
|
elsif (C = 'J') then
|
|
return 'I';
|
|
else
|
|
return C;
|
|
end if;
|
|
end UI;
|
|
|
|
begin
|
|
|
|
if S'LENGTH = 1 then
|
|
SS(S'FIRST) := UI(W(S'FIRST));
|
|
else
|
|
SS(S'FIRST) := UI(W(S'FIRST));
|
|
SS(S'FIRST+1) := UI(W(S'FIRST+1));
|
|
end if;
|
|
|
|
return SS;
|
|
end FIRST_TWO;
|
|
|
|
|
|
procedure LOAD_PDL is
|
|
begin
|
|
case RESTRICTION is
|
|
when REGULAR =>
|
|
if not (DS.PART.POFS = PACK or
|
|
(DS.PART.POFS = PRON and then
|
|
(DS.PART.PRON.DECL.WHICH = 1))) then
|
|
PDL_INDEX := PDL_INDEX + 1;
|
|
PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K);
|
|
end if;
|
|
|
|
when PACK_ONLY =>
|
|
if DS.PART.POFS = PACK then
|
|
PDL_INDEX := PDL_INDEX + 1;
|
|
PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K);
|
|
end if;
|
|
|
|
when QU_PRON_ONLY =>
|
|
if DS.PART.POFS = PRON and then
|
|
(DS.PART.PRON.DECL.WHICH = 1) then
|
|
PDL_INDEX := PDL_INDEX + 1;
|
|
PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K);
|
|
end if;
|
|
|
|
when others =>
|
|
PDL_INDEX := PDL_INDEX + 1;
|
|
PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(DS, D_K);
|
|
end case;
|
|
|
|
end LOAD_PDL;
|
|
|
|
|
|
begin
|
|
-- Now go through the dictionary list DL for the first letters
|
|
-- and make a reduced dictionary list PDL
|
|
--TEXT_IO.PUT_LINE("Entering DICTIONARY_SEARCH PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX));
|
|
|
|
|
|
if D_K = LOCAL then
|
|
INDEX_FIRST := FIRST_INDEX((FIRST_TWO(INDEX_ON)(1), 'a'), D_K);
|
|
INDEX_LAST := LAST_INDEX((FIRST_TWO(INDEX_ON)(1), 'a'), D_K);
|
|
else
|
|
INDEX_FIRST := FIRST_INDEX(FIRST_TWO(INDEX_ON), D_K);
|
|
INDEX_LAST := LAST_INDEX(FIRST_TWO(INDEX_ON), D_K);
|
|
end if;
|
|
|
|
if INDEX_FIRST > 0 and then INDEX_FIRST <= INDEX_LAST then
|
|
|
|
|
|
J1 := STEM_IO.COUNT(INDEX_FIRST); --######################
|
|
J2 := STEM_IO.COUNT(INDEX_LAST);
|
|
|
|
STEM_ARRAY_LOOP:
|
|
for K in SSA'RANGE loop
|
|
if TRIM(SSA(K))'LENGTH > 1 then
|
|
-- This may be checking for 0 and 1 letter SSAs which are done elsewhere
|
|
--TEXT_IO.PUT(INTEGER'IMAGE(K) & " SSA(K) =>" );
|
|
--TEXT_IO.PUT_LINE(SSA(K));
|
|
|
|
if D_K = LOCAL then -- Special processing for unordered DICT.LOC
|
|
for J in J1..J2 loop -- Sweep exaustively through the scope
|
|
SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(J));
|
|
READ(STEM_FILE(D_K), DS);
|
|
|
|
if EQU(LOWER_CASE(DS.STEM), SSA(K)) then
|
|
--TEXT_IO.PUT_LINE("HIT LOC = " & DS.STEM & " - " & SSA(K));
|
|
LOAD_PDL;
|
|
end if;
|
|
end loop;
|
|
|
|
else -- Regular dictionaries
|
|
|
|
FIRST_TRY := TRUE;
|
|
|
|
SECOND_TRY := TRUE;
|
|
|
|
J := (J1 + J2) / 2;
|
|
|
|
BINARY_SEARCH:
|
|
loop
|
|
|
|
if (J1 = J2-1) or (J1 = J2) then
|
|
if FIRST_TRY then
|
|
--TEXT_IO.PUT_LINE("FIRST_TRY");
|
|
J := J1;
|
|
FIRST_TRY := FALSE;
|
|
elsif SECOND_TRY then
|
|
--TEXT_IO.PUT_LINE("SECOND_TRY");
|
|
J := J2;
|
|
SECOND_TRY := FALSE;
|
|
else
|
|
--TEXT_IO.PUT_LINE("THIRD_TRY exit BINARY_SEARCH");
|
|
JJ := J;
|
|
exit BINARY_SEARCH;
|
|
end if;
|
|
end if;
|
|
|
|
SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(J));
|
|
READ(STEM_FILE(D_K), DS);
|
|
|
|
if LTU(LOWER_CASE(DS.STEM), SSA(K)) then
|
|
J1 := J;
|
|
J := (J1 + J2) / 2;
|
|
elsif GTU(LOWER_CASE(DS.STEM), SSA(K)) then
|
|
J2 := J;
|
|
J := (J1 + J2) / 2;
|
|
else
|
|
for I in reverse J1..J loop
|
|
SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(I));
|
|
READ(STEM_FILE(D_K), DS);
|
|
|
|
if EQU(LOWER_CASE(DS.STEM), SSA(K)) then
|
|
JJ := I;
|
|
--TEXT_IO.PUT_LINE("PDL STEM " & DS.STEM & " " & INTEGER'IMAGE(INTEGER(DS.MNPC)));
|
|
|
|
LOAD_PDL;
|
|
|
|
else
|
|
exit;
|
|
end if;
|
|
end loop;
|
|
|
|
for I in J+1..J2 loop
|
|
SET_INDEX(STEM_FILE(D_K), STEM_IO.COUNT(I));
|
|
READ(STEM_FILE(D_K), DS);
|
|
|
|
if EQU(LOWER_CASE(DS.STEM), SSA(K)) then
|
|
JJ := I;
|
|
--TEXT_IO.PUT_LINE("PDL STEM " & DS.STEM & " " & INTEGER'IMAGE(INTEGER(DS.MNPC)));
|
|
|
|
LOAD_PDL;
|
|
|
|
else
|
|
exit BINARY_SEARCH;
|
|
end if;
|
|
end loop;
|
|
exit BINARY_SEARCH;
|
|
|
|
end if;
|
|
end loop BINARY_SEARCH;
|
|
J1 := JJ;
|
|
J2 := STEM_IO.COUNT(INDEX_LAST);
|
|
|
|
end if; -- On LOCAL check
|
|
end if; -- On LENGTH > 1
|
|
end loop STEM_ARRAY_LOOP;
|
|
end if;
|
|
--TEXT_IO.PUT_LINE("Leaving DICTIONARY_SEARCH PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX));
|
|
-- exception
|
|
-- when others =>
|
|
--TEXT_IO.PUT_LINE("exception DICTIONARY_SEARCH PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX));
|
|
-- raise;
|
|
end DICTIONARY_SEARCH;
|
|
|
|
|
|
|
|
|
|
|
|
procedure SEARCH_DICTIONARIES(SSA : in STEM_ARRAY_TYPE;
|
|
PREFIX : PREFIX_ITEM; SUFFIX : SUFFIX_ITEM;
|
|
RESTRICTION : DICT_RESTRICTION := REGULAR) is
|
|
use STEM_IO;
|
|
FC : CHARACTER := ' ';
|
|
begin
|
|
--PUT_LINE("Entering SEARCH_DICTIONARIES");
|
|
PDL := (others => NULL_PRUNED_DICTIONARY_ITEM);
|
|
PDL_INDEX := 0;
|
|
--PUT_LINE("Search for blank stems");
|
|
-- BDL is always used, so it is loaded initially and not called from disk
|
|
-- Check all stems of the dictionary entry against the reduced stems
|
|
|
|
-- Determine if there is a pure blank " " stem
|
|
if LEN(SSA(SSA'FIRST)) = 0 then -- a size would help?
|
|
--PUT("HIT on blank stem I = ");PUT('1');
|
|
--PUT(" STEM = ");PUT_LINE(BDL(1).STEM);
|
|
--PDL := new PRUNED_DICTIONARY_ITEM'(BDL(1), GENERAL, PDL);
|
|
PDL_INDEX := PDL_INDEX + 1;
|
|
PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(BDL(1), GENERAL);
|
|
end if;
|
|
-- Now there is only one blank stem (2 of to_be), but need not always be so
|
|
|
|
|
|
-- Determine if there is a blank stem (SC = ' ')
|
|
-- Prepare for the posibility that one stem is short but there are others
|
|
FC := ' ';
|
|
if SSA(SSA'FIRST)(1) = ' ' then
|
|
if SSA'LENGTH > 1 and then SSA(SSA'FIRST+1)(2) = ' ' then
|
|
FC := SSA(SSA'FIRST+1)(1);
|
|
end if;
|
|
elsif SSA(SSA'FIRST)(2) = ' ' then
|
|
FC := SSA(SSA'FIRST)(1);
|
|
end if;
|
|
|
|
-- If there is a single letter stem (FC /= ' ') then
|
|
if FC /= ' ' then
|
|
for I in 2..BDL_LAST loop
|
|
-- Check all stems of the dictionary entry against the reduced stems
|
|
--if LOWER_CASE(BDL(I).STEM(1)) = FC then
|
|
if EQU(LOWER_CASE(BDL(I).STEM(1)), FC) then
|
|
--PUT("HIT on 1 letter stem I = ");PUT(I);PUT(" STEM = ");PUT_LINE(BDL(I).STEM);
|
|
PDL_INDEX := PDL_INDEX + 1;
|
|
PDL(PDL_INDEX) := PRUNED_DICTIONARY_ITEM'(BDL(I), GENERAL);
|
|
-- D_K set to GENERAL, but should not SPE have a chance? !!!!!!!!!
|
|
end if;
|
|
end loop;
|
|
end if;
|
|
|
|
if SSA'LENGTH = 0 then
|
|
-- PUT_LINE("Empty stem array, don't bother searching");
|
|
return;
|
|
-- elsif LEN(SSA(SSA'LAST)) <= 1 then
|
|
-- PUT_LINE("No two letter stems, have done searching");
|
|
-- else
|
|
-- PUT_LINE("Searching Dictionaries");
|
|
end if;
|
|
|
|
for D_K in DICTIONARY_KIND loop
|
|
if DICTIONARY_AVAILABLE(D_K) then
|
|
if not IS_OPEN(STEM_FILE(D_K)) then
|
|
OPEN(STEM_FILE(D_K), STEM_IO.IN_FILE,
|
|
ADD_FILE_NAME_EXTENSION(STEM_FILE_NAME,
|
|
DICTIONARY_KIND'IMAGE(D_K)));
|
|
end if;
|
|
DICTIONARY_SEARCH(SSA, PREFIX, SUFFIX, D_K, RESTRICTION);
|
|
CLOSE(STEM_FILE(D_K)); --??????
|
|
end if;
|
|
end loop;
|
|
|
|
--TEXT_IO.PUT_LINE("Leaving SEARCH_DICTIONARY PDL_INDEX = " & INTEGER'IMAGE(PDL_INDEX));
|
|
|
|
end SEARCH_DICTIONARIES;
|
|
|
|
|
|
procedure CHANGE_LANGUAGE(C : CHARACTER) is
|
|
begin if UPPER_CASE(C) = 'L' then
|
|
LANGUAGE := LATIN_TO_ENGLISH;
|
|
PREFACE.PUT_LINE("Language changed to " & LANGUAGE_TYPE'IMAGE(LANGUAGE));
|
|
elsif UPPER_CASE(C) = 'E' then
|
|
if ENGLISH_DICTIONARY_AVAILABLE(GENERAL) then
|
|
LANGUAGE:= ENGLISH_TO_LATIN;
|
|
PREFACE.PUT_LINE("Language changed to " & LANGUAGE_TYPE'IMAGE(LANGUAGE));
|
|
PREFACE.PUT_LINE("Input a single English word (+ part of speech - N, ADJ, V, PREP, ...)");
|
|
else
|
|
PREFACE.PUT_LINE("No English dictionary available");
|
|
end if;
|
|
else
|
|
PREFACE.PUT_LINE("Bad LANGAUGE input - no change, remains " & LANGUAGE_TYPE'IMAGE(LANGUAGE));
|
|
end if;
|
|
exception
|
|
when others =>
|
|
PREFACE.PUT_LINE("Bad LANGAUGE input - no change, remains " & LANGUAGE_TYPE'IMAGE(LANGUAGE));
|
|
end CHANGE_LANGUAGE;
|
|
|
|
|
|
|
|
|
|
procedure WORD(RAW_WORD : in STRING;
|
|
PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is
|
|
|
|
INPUT_WORD : constant STRING := LOWER_CASE(RAW_WORD);
|
|
PA_SAVE : INTEGER := PA_LAST;
|
|
|
|
UNIQUE_FOUND : BOOLEAN := FALSE;
|
|
|
|
SS, SSS : SAL := (others => NULL_PARSE_RECORD);
|
|
|
|
|
|
procedure ORDER_STEMS(SX : in out SAL) is
|
|
use INFLECTION_RECORD_IO;
|
|
use DICT_IO;
|
|
HITS : INTEGER := 0;
|
|
SL : SAL := SX;
|
|
SL_LAST : INTEGER := 0;
|
|
SM : PARSE_RECORD;
|
|
begin
|
|
if SX(1) = NULL_PARSE_RECORD then
|
|
return; end if;
|
|
--PUT_LINE("ORDERing_STEMS");
|
|
|
|
for I in SL'RANGE loop
|
|
exit when SL(I) = NULL_PARSE_RECORD;
|
|
SL_LAST := SL_LAST + 1;
|
|
end loop;
|
|
--PUT_LINE("In ORDER SL_LAST = " & INTEGER'IMAGE(SL_LAST));
|
|
|
|
-- Bubble sort since this list should usually be very small (1-5)
|
|
HIT_LOOP:
|
|
loop
|
|
HITS := 0;
|
|
|
|
SWITCH:
|
|
begin
|
|
-- Need to remove duplicates in ARRAY_STEMS
|
|
-- This sort is very sloppy
|
|
-- One problem is that it can mix up some of the order of PREFIX, XXX, LOC
|
|
-- I ought to do this for every set of results from different approaches
|
|
-- not just in one fell swoop at the end !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
INNER_LOOP:
|
|
for I in 1..SL_LAST-1 loop
|
|
if SL(I+1) /= NULL_PARSE_RECORD then
|
|
if (SL(I+1).MNPC < SL(I).MNPC) or else
|
|
(SL(I+1).MNPC = SL(I).MNPC and then
|
|
SL(I+1).IR.ENDING.SIZE < SL(I).IR.ENDING.SIZE) or else
|
|
(SL(I+1).MNPC = SL(I).MNPC and then
|
|
SL(I+1).IR.ENDING.SIZE = SL(I).IR.ENDING.SIZE and then
|
|
SL(I+1).IR.QUAL < SL(I).IR.QUAL) or else
|
|
(SL(I+1).MNPC = SL(I).MNPC and then
|
|
SL(I+1).IR.ENDING.SIZE = SL(I).IR.ENDING.SIZE and then
|
|
SL(I+1).IR.QUAL = SL(I).IR.QUAL and then
|
|
SL(I+1).D_K < SL(I).D_K) then
|
|
--PUT(SL(I+1).IR.QUAL); PUT(" < "); PUT(SL(I).IR.QUAL); NEW_LINE;
|
|
SM := SL(I);
|
|
SL(I) := SL(I+1); SL(I+1) := SM;
|
|
SL(I+1) := SM;
|
|
HITS := HITS + 1;
|
|
--else
|
|
--PUT(SL(I+1).IR.QUAL); PUT(" >= "); PUT(SL(I).IR.QUAL); NEW_LINE;
|
|
end if;
|
|
else
|
|
exit INNER_LOOP;
|
|
end if;
|
|
end loop INNER_LOOP;
|
|
|
|
end SWITCH;
|
|
--NEW_LINE;
|
|
--PUT_LINE("In ORDER HITS = " & INTEGER'IMAGE(HITS ));
|
|
--NEW_LINE;
|
|
--for I in 1..SL_LAST loop
|
|
--PUT(SL(I)); NEW_LINE;
|
|
--end loop;
|
|
--PUT_LINE("--------------------------------------------------------");
|
|
|
|
exit when HITS = 0;
|
|
end loop HIT_LOOP;
|
|
SX := SL;
|
|
end ORDER_STEMS;
|
|
|
|
|
|
procedure ARRAY_STEMS(SX : in SAL;
|
|
PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is
|
|
SL : SAL := SX;
|
|
OPR : PARSE_RECORD := NULL_PARSE_RECORD;
|
|
begin
|
|
|
|
if SL(1) = NULL_PARSE_RECORD then
|
|
return;
|
|
else
|
|
|
|
OPR := NULL_PARSE_RECORD;
|
|
for I in SL'RANGE loop
|
|
if SL(I) /= NULL_PARSE_RECORD then
|
|
--PUT('*'); PUT(SL(I)); NEW_LINE;
|
|
|
|
SUPRESS_KEY_CHECK:
|
|
declare
|
|
|
|
function "<=" (A, B : PARSE_RECORD) return BOOLEAN is
|
|
use DICT_IO;
|
|
begin -- !!!!!!!!!!!!!!!!!!!!!!!!!!
|
|
if A.IR.QUAL = B.IR.QUAL and then
|
|
A.MNPC = B.MNPC then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
end "<=";
|
|
|
|
begin
|
|
if SL(I) <= OPR then -- Get rid of duplicates, if ORDER is OK
|
|
--PUT('-'); PUT(SL(I)); NEW_LINE;
|
|
null;
|
|
else
|
|
PA_LAST := PA_LAST + 1;
|
|
--PUT('+'); PUT(SL(I)); NEW_LINE;
|
|
--PUT("SAL PR /= OPR and PA_LAST incremented to "); PUT(PA_LAST); NEW_LINE;
|
|
PA(PA_LAST) := SL(I);
|
|
OPR := SL(I);
|
|
end if;
|
|
end SUPRESS_KEY_CHECK;
|
|
|
|
else
|
|
exit;
|
|
end if;
|
|
end loop;
|
|
|
|
end if;
|
|
|
|
--TEXT_IO.PUT_LINE("At the end of ARRAY_STEMS");
|
|
--for I in 1..PA_LAST loop
|
|
--PUT(PA(I).STEM); PUT(" "); PUT(PA(I).IR); NEW_LINE;
|
|
--PUT(PA(I).D_K); PUT("> "); PUT(PA(I).AAMNPC.MNPC); NEW_LINE;
|
|
--end loop;
|
|
|
|
end ARRAY_STEMS;
|
|
|
|
|
|
procedure REDUCE_STEM_LIST(SL : in SAL; SXX : in out SAL;
|
|
-- Need in out if want to print it at the end
|
|
--procedure REDUCE_STEM_LIST(SL : in SAL; SXX : out SAL;
|
|
PREFIX : in PREFIX_ITEM := NULL_PREFIX_ITEM;
|
|
SUFFIX : in SUFFIX_ITEM := NULL_SUFFIX_ITEM) is
|
|
MNPC_PART : MNPC_TYPE := NULL_MNPC;
|
|
PDL_PART : PART_ENTRY;
|
|
COM : COMPARISON_TYPE := X;
|
|
NUM_SORT : NUMERAL_SORT_TYPE := X;
|
|
LS : INTEGER := 0;
|
|
M : INTEGER := 0;
|
|
|
|
|
|
PDL_KEY : STEM_KEY_TYPE;
|
|
PDL_P : PART_OF_SPEECH_TYPE;
|
|
SL_KEY : STEM_KEY_TYPE;
|
|
SL_P : PART_OF_SPEECH_TYPE;
|
|
|
|
function "<=" (LEFT, RIGHT : PART_OF_SPEECH_TYPE) return BOOLEAN is
|
|
begin
|
|
if RIGHT = LEFT or else
|
|
(LEFT = PACK and RIGHT = PRON) or else
|
|
RIGHT = X then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
end "<=";
|
|
|
|
function "<=" (LEFT, RIGHT : GENDER_TYPE) return BOOLEAN is
|
|
begin
|
|
if (RIGHT = LEFT or else
|
|
(RIGHT = C and LEFT /= N) or else
|
|
(RIGHT = X)) then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
end "<=";
|
|
|
|
function "<=" (LEFT, RIGHT : STEM_KEY_TYPE) return BOOLEAN is
|
|
begin
|
|
if (RIGHT = LEFT or else
|
|
(RIGHT = 0)) then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
end "<=";
|
|
|
|
begin
|
|
SXX := (others => NULL_PARSE_RECORD); -- Essentially initializing
|
|
--for J in 1..PDL_INDEX loop
|
|
--NUMBER_IN_PDL := NUMBER_IN_PDL + 1;
|
|
--TEXT_IO.PUT(INTEGER'IMAGE(NUMBER_IN_PDL));
|
|
--TEXT_IO.PUT(" PDL "); TEXT_IO.PUT(PDL(J).DS.STEM);
|
|
--PART_ENTRY_IO.PUT(PDL(J).DS.PART);
|
|
--TEXT_IO.PUT(INTEGER'IMAGE(PDL(J).DS.KEY)); TEXT_IO.NEW_LINE;
|
|
--end loop;
|
|
--TEXT_IO.PUT_LINE("****************************");
|
|
|
|
--if WORDS_MODE(WRITE_STATISTICS_FILE) then
|
|
--declare
|
|
-- PSTAT : PRUNED_DICTIONARY_LIST := PDL;
|
|
--begin
|
|
-- TEXT_IO.PUT(STATS, "Number of PDL DICT hits");
|
|
-- TEXT_IO.SET_COL(STATS, 30);
|
|
-- INTEGER_IO.PUT(STATS, NUMBER_IN_PDL);
|
|
-- TEXT_IO.SET_COL(STATS, 40);
|
|
-- TEXT_IO.PUT(STATS, INPUT_WORD);
|
|
-- TEXT_IO.NEW_LINE(STATS);
|
|
--end;
|
|
--end if;
|
|
-------------------------------------------------------------
|
|
|
|
-- For the reduced dictionary list PDL
|
|
M := 0;
|
|
ON_PDL:
|
|
for J in 1..PDL_INDEX loop
|
|
|
|
|
|
|
|
PDL_PART := PDL(J).DS.PART;
|
|
PDL_KEY := PDL(J).DS.KEY;
|
|
MNPC_PART := PDL(J).DS.MNPC;
|
|
|
|
|
|
-- Is there any point in going through the process for this PDL
|
|
PDL_P := PDL(J).DS.PART.POFS; -- Used only for FIX logic below
|
|
|
|
-- If there is no SUFFIX then carry on
|
|
if (SUFFIX = NULL_SUFFIX_ITEM) then -- No suffix working, fall through
|
|
--PUT_LINE("No SUFFIX in REDUCE - Fall through to PREFIX check ");
|
|
null;
|
|
elsif
|
|
(PDL_P = N and then PDL_PART.N.DECL = (9, 8)) or -- No suffix for
|
|
(PDL_P = ADJ and then PDL_PART.ADJ.DECL = (9, 8)) then -- abbreviations
|
|
-- Can be no suffix on abbreviation");
|
|
goto END_OF_PDL_LOOP;
|
|
else -- There is SUFFIX, see if it agrees with PDL
|
|
if PDL_P <= SUFFIX.ENTR.ROOT and then -- Does SUFFIX agree in ROOT
|
|
((PDL_KEY <= SUFFIX.ENTR.ROOT_KEY) or else
|
|
((PDL_KEY = 0) and then
|
|
((PDL_P = N) or (PDL_P = ADJ) or (PDL_P = V)) and then
|
|
((SUFFIX.ENTR.ROOT_KEY = 1) or (SUFFIX.ENTR.ROOT_KEY = 2)))) then
|
|
--PUT_LINE("HIT HIT HIT HIT HIT HIT HIT HIT HIT SUFFIX SUFFIX in REDUCE");
|
|
case SUFFIX.ENTR.TARGET.POFS is -- Transform PDL_PART to TARGET
|
|
when N =>
|
|
PDL_PART := (N, SUFFIX.ENTR.TARGET.N);
|
|
when PRON =>
|
|
PDL_PART := (PRON, SUFFIX.ENTR.TARGET.PRON);
|
|
when ADJ =>
|
|
PDL_PART := (ADJ, SUFFIX.ENTR.TARGET.ADJ);
|
|
when NUM =>
|
|
PDL_PART := (NUM, SUFFIX.ENTR.TARGET.NUM);
|
|
when ADV =>
|
|
PDL_PART := (ADV, SUFFIX.ENTR.TARGET.ADV);
|
|
when V =>
|
|
PDL_PART := (V, SUFFIX.ENTR.TARGET.V);
|
|
when others =>
|
|
null; -- No others so far, except X = all
|
|
end case;
|
|
PDL_KEY := SUFFIX.ENTR.TARGET_KEY;
|
|
PDL_P := PDL_PART.POFS; -- Used only for FIX logic below
|
|
--PUT(" Changed to "); PUT(PDL_PART); PUT(PDL_KEY); NEW_LINE;
|
|
|
|
else
|
|
--PUT_LINE("In REDUCE_STEM_LIST There is no legal suffix");
|
|
-- exit;
|
|
goto END_OF_PDL_LOOP;
|
|
end if;
|
|
end if;
|
|
|
|
|
|
if (PREFIX = NULL_PREFIX_ITEM) then -- No PREFIX, drop through
|
|
--PUT_LINE("No PREFIX in REDUCE - Fall through to MATCHing ");
|
|
null;
|
|
elsif
|
|
(PDL_P = N and then PDL_PART.N.DECL = (9, 8)) or -- No prefix for
|
|
(PDL_P = ADJ and then PDL_PART.ADJ.DECL = (9, 8)) or -- abbreviations
|
|
(PDL_P = INTERJ or PDL_P = CONJ) then -- or INTERJ or CONJ
|
|
--PUT_LINE("In REDUCE_STEM_LIST no prefix on abbreviationi, interj, conj");
|
|
goto END_OF_PDL_LOOP;
|
|
else -- Check if PREFIX agrees
|
|
--PUT("PREFIX in REDUCE ");
|
|
--PUT(PDL_P); PUT(" <= "); PUT(PREFIX.ENTR.ROOT); PUT(" OR"); NEW_LINE;
|
|
--PUT(" "); PUT(PDL_PART); PUT(" <= ");
|
|
--PUT(PREFIX.ENTR.ROOT); NEW_LINE;
|
|
if (PDL_P = PREFIX.ENTR.ROOT) or -- = ROOT
|
|
(PDL_PART.POFS = PREFIX.ENTR.ROOT) then -- or part mod by suf
|
|
--PUT_LINE("PREFIX in REDUCE PART HIT");
|
|
null;
|
|
elsif (PREFIX.ENTR.ROOT = X) then -- or ROOT = X
|
|
if PDL_P = N or PDL_P = PRON or
|
|
PDL_P = ADJ or PDL_P = ADV or PDL_P = V then
|
|
-- Dont prefix PREP, CONJ, ...
|
|
-- PUT_LINE("PREFIX in REDUCE X HIT");
|
|
null;
|
|
end if;
|
|
else
|
|
goto END_OF_PDL_LOOP;
|
|
--PUT_LINE("In REDUCE_STEM_LIST There is no legal prefix");
|
|
-- exit;
|
|
end if;
|
|
end if;
|
|
|
|
-- SUFFIX and PREFIX either agree or don't exist (agrees with everything)
|
|
--PUT("ON_PDL:+ "); PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART);
|
|
--PUT(PDL(J).DS.KEY); NEW_LINE;
|
|
--PUT_LINE(ADD_PREFIX(PDL(J).DS.STEM, PREFIX) & "|");
|
|
--PUT_LINE(ADD_SUFFIX(ADD_PREFIX(PDL(J).DS.STEM, PREFIX), SUFFIX) & "|");
|
|
|
|
|
|
LS := LEN(ADD_SUFFIX(ADD_PREFIX(PDL(J).DS.STEM, PREFIX), SUFFIX));
|
|
--PUT("LS = LEN of "); PUT(ADD_SUFFIX(ADD_PREFIX(PDL(J).DS.STEM, PREFIX), SUFFIX
|
|
--PUT(" = "); PUT(LS); NEW_LINE;
|
|
|
|
--TEXT_IO.PUT_LINE("Entering ON_SL loop");
|
|
|
|
ON_SL:
|
|
for I in SL'RANGE loop
|
|
exit when SL(I) = NULL_PARSE_RECORD;
|
|
|
|
--TEXT_IO.PUT("SL(I) "); PARSE_RECORD_IO.PUT(SL(I)); TEXT_IO.NEW_LINE;
|
|
--TEXT_IO.PUT(" PDL "); TEXT_IO.PUT(PDL(J).DS.STEM);
|
|
--PART_ENTRY_IO.PUT(PDL(J).DS.PART);
|
|
--TEXT_IO.PUT(INTEGER'IMAGE(PDL(J).DS.KEY)); TEXT_IO.NEW_LINE;
|
|
--TEXT_IO.PUT("LS "); TEXT_IO.PUT(INTEGER'IMAGE(LS));
|
|
--TEXT_IO.PUT(" LEN SL(I).STEM "); TEXT_IO.PUT_LINE(INTEGER'IMAGE(LEN(SL(I).STEM)));
|
|
|
|
if LS = LEN(SL(I).STEM) then
|
|
|
|
|
|
|
|
|
|
-- Scan through the whole unreduced stem list
|
|
-- Single out those stems that match (pruned) dictionary entries
|
|
--^^^^^^^^^^^^^^^^^should be able to do this better with new arrangement
|
|
|
|
|
|
SL_KEY := SL(I).IR.KEY;
|
|
SL_P := SL(I).IR.QUAL.POFS;
|
|
|
|
|
|
if (
|
|
((PDL_KEY <= SL(I).IR.KEY) ) or else
|
|
((PDL_KEY = 0) and then
|
|
(((PDL_P = N) or (PDL_P = ADJ) or (PDL_P = V)) and then
|
|
((SL(I).IR.KEY = 1) or (SL(I).IR.KEY = 2)) ))
|
|
) and then -- and KEY
|
|
( PDL_PART.POFS = EFF_PART(SL(I).IR.QUAL.POFS) ) then
|
|
|
|
--TEXT_IO.PUT_LINE("####################### PDL - SL MATCH ############");
|
|
|
|
if
|
|
(PDL_PART.POFS = N and then
|
|
PDL_PART.N.DECL <= SL(I).IR.QUAL.N.DECL and then
|
|
PDL_PART.N.GENDER <= SL(I).IR.QUAL.N.GENDER) then
|
|
--TEXT_IO.PUT_LINE(" HIT N ");
|
|
-- Need to transfer the gender of the noun dictionary item
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => N,
|
|
N => (
|
|
PDL_PART.N.DECL,
|
|
SL(I).IR.QUAL.N.CS,
|
|
SL(I).IR.QUAL.N.NUMBER,
|
|
PDL_PART.N.GENDER ) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
elsif
|
|
(PDL_PART.POFS = PRON and then
|
|
PDL_PART.PRON.DECL <= SL(I).IR.QUAL.PRON.DECL) then
|
|
--PUT(" HIT PRON ");
|
|
-- Need to transfer the kind of the pronoun dictionary item
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => PRON,
|
|
PRON => (
|
|
PDL_PART.PRON.DECL,
|
|
SL(I).IR.QUAL.PRON.CS,
|
|
SL(I).IR.QUAL.PRON.NUMBER,
|
|
SL(I).IR.QUAL.PRON.GENDER ) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
elsif (PDL_PART.POFS = ADJ) and then
|
|
(PDL_PART.ADJ.DECL <= SL(I).IR.QUAL.ADJ.DECL) and then
|
|
((SL(I).IR.QUAL.ADJ.CO <= PDL_PART.ADJ.CO ) or
|
|
((SL(I).IR.QUAL.ADJ.CO = X) or (PDL_PART.ADJ.CO = X))) then
|
|
-- Note the reversal on comparisom
|
|
--PUT(" HIT ADJ ");
|
|
-- Need to transfer the gender of the dictionary item
|
|
-- Need to transfer the CO of the ADJ dictionary item
|
|
if PDL_PART.ADJ.CO in POS..SUPER then
|
|
-- If the dictionary entry has a unique CO, use it
|
|
COM := PDL_PART.ADJ.CO;
|
|
else
|
|
-- Otherwise, the entry is X, generate a CO from KEY
|
|
COM := ADJ_COMP_FROM_KEY(PDL_KEY);
|
|
end if;
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => ADJ,
|
|
ADJ => (
|
|
PDL_PART.ADJ.DECL,
|
|
SL(I).IR.QUAL.ADJ.CS,
|
|
SL(I).IR.QUAL.ADJ.NUMBER,
|
|
SL(I).IR.QUAL.ADJ.GENDER,
|
|
COM ) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
elsif (PDL_PART.POFS = NUM) and then
|
|
(PDL_PART.NUM.DECL <= SL(I).IR.QUAL.NUM.DECL) and then
|
|
(PDL_KEY = SL(I).IR.KEY) then
|
|
--PUT(" HIT NUM ");
|
|
if PDL_PART.NUM.SORT = X then
|
|
-- If the entry is X, generate a CO from KEY
|
|
NUM_SORT:= NUM_SORT_FROM_KEY(PDL_KEY);
|
|
else
|
|
-- Otherwise, the dictionary entry has a unique CO, use it
|
|
NUM_SORT := PDL_PART.NUM.SORT;
|
|
end if;
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => NUM,
|
|
NUM => (
|
|
PDL_PART.NUM.DECL,
|
|
SL(I).IR.QUAL.NUM.CS,
|
|
SL(I).IR.QUAL.NUM.NUMBER,
|
|
SL(I).IR.QUAL.NUM.GENDER,
|
|
NUM_SORT) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
|
|
elsif (PDL_PART.POFS = ADV) and then
|
|
((PDL_PART.ADV.CO <= SL(I).IR.QUAL.ADV.CO ) or
|
|
((SL(I).IR.QUAL.ADV.CO = X) or (PDL_PART.ADV.CO = X))) then
|
|
--PUT(" HIT ADV ");
|
|
-- Need to transfer the CO of the ADV dictionary item
|
|
if PDL_PART.ADV.CO in POS..SUPER then
|
|
-- If the dictionary entry has a unique CO, use it
|
|
COM := PDL_PART.ADV.CO;
|
|
else
|
|
-- The entry is X and we need to generate a COMP from the KEY
|
|
COM := ADV_COMP_FROM_KEY(PDL_KEY);
|
|
end if;
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => ADV,
|
|
ADV => (
|
|
CO => COM) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
elsif (PDL_PART.POFS = V) then
|
|
--TEXT_IO.PUT_LINE("V found, now check CON");
|
|
if SL(I).IR.QUAL.POFS = V and then
|
|
(PDL_PART.V.CON <= SL(I).IR.QUAL.V.CON) then
|
|
--TEXT_IO.PUT(" HIT V ");
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => V,
|
|
V => (
|
|
PDL_PART.V.CON,
|
|
SL(I).IR.QUAL.V.TENSE_VOICE_MOOD,
|
|
SL(I).IR.QUAL.V.PERSON,
|
|
SL(I).IR.QUAL.V.NUMBER ) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
elsif SL(I).IR.QUAL.POFS = VPAR and then
|
|
(PDL_PART.V.CON <= SL(I).IR.QUAL.VPAR.CON) then
|
|
--PUT(" HIT VPAR ");
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => VPAR,
|
|
VPAR => (
|
|
PDL_PART.V.CON,
|
|
SL(I).IR.QUAL.VPAR.CS,
|
|
SL(I).IR.QUAL.VPAR.NUMBER,
|
|
SL(I).IR.QUAL.VPAR.GENDER,
|
|
SL(I).IR.QUAL.VPAR.TENSE_VOICE_MOOD ) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
|
|
elsif SL(I).IR.QUAL.POFS = SUPINE and then
|
|
(PDL_PART.V.CON <= SL(I).IR.QUAL.SUPINE.CON) then
|
|
--PUT(" HIT SUPINE");
|
|
M := M + 1;
|
|
SXX(M) := (STEM => SUBTRACT_PREFIX(SL(I).STEM, PREFIX),
|
|
IR => (
|
|
QUAL => (
|
|
POFS => SUPINE,
|
|
SUPINE => (
|
|
PDL_PART.V.CON,
|
|
SL(I).IR.QUAL.SUPINE.CS,
|
|
SL(I).IR.QUAL.SUPINE.NUMBER,
|
|
SL(I).IR.QUAL.SUPINE.GENDER) ),
|
|
KEY => SL(I).IR.KEY,
|
|
ENDING => SL(I).IR.ENDING,
|
|
AGE => SL(I).IR.AGE,
|
|
FREQ => SL(I).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => MNPC_PART);
|
|
|
|
|
|
end if;
|
|
|
|
elsif PDL_PART.POFS = PREP and then
|
|
PDL_PART.PREP.OBJ = SL(I).IR.QUAL.PREP.OBJ then
|
|
--PUT(" HIT PREP ");
|
|
M := M + 1;
|
|
SXX(M) := (SUBTRACT_PREFIX(SL(I).STEM, PREFIX), SL(I).IR,
|
|
PDL(J).D_K, MNPC_PART);
|
|
|
|
elsif PDL_PART.POFS = CONJ then
|
|
--PUT(" HIT CONJ ");
|
|
M := M + 1;
|
|
SXX(M) := (SUBTRACT_PREFIX(SL(I).STEM, PREFIX), SL(I).IR,
|
|
PDL(J).D_K, MNPC_PART);
|
|
|
|
elsif PDL_PART.POFS = INTERJ then
|
|
--PUT(" HIT INTERJ ");
|
|
M := M + 1;
|
|
SXX(M) := (SUBTRACT_PREFIX(SL(I).STEM, PREFIX), SL(I).IR,
|
|
PDL(J).D_K, MNPC_PART);
|
|
|
|
|
|
end if;
|
|
|
|
--TEXT_IO.NEW_LINE; PUT(SL(I).IR.QUAL); TEXT_IO.PUT(" -- ");
|
|
--TEXT_IO.PUT(PDL(J).DS.STEM); PUT(PDL_PART); TEXT_IO.NEW_LINE;
|
|
|
|
end if;
|
|
end if;
|
|
<<END_OF_SL_LOOP>> null;
|
|
end loop ON_SL;
|
|
--TEXT_IO.PUT("In RED_ST_L after loop ON_SL M = ");
|
|
--TEXT_IO.PUT(INTEGER'IMAGE(M)); TEXT_IO.NEW_LINE;
|
|
|
|
|
|
<<END_OF_PDL_LOOP>> null;
|
|
end loop ON_PDL;
|
|
--for I in 1..M loop
|
|
--TEXT_IO.PUT(INTEGER'IMAGE(I)); TEXT_IO.PUT(" ");
|
|
--PARSE_RECORD_IO.PUT(SXX(I)); TEXT_IO.NEW_LINE;
|
|
--end loop;
|
|
end REDUCE_STEM_LIST;
|
|
|
|
|
|
|
|
procedure APPLY_PREFIX(SA : in STEM_ARRAY_TYPE; SUFFIX : in SUFFIX_ITEM;
|
|
SX : in SAL; SXX : in out SAL;
|
|
PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is
|
|
-- Worry about the stem changing re-cipio from capio
|
|
-- Correspondence of parts, need EFF for VPAR
|
|
-- The prefixes should be ordered with the longest/most likely first
|
|
SSA : STEM_ARRAY;
|
|
L : INTEGER := 0;
|
|
--use TEXT_IO;
|
|
--use INFLECTIONS_PACKAGE.INTEGER_IO;
|
|
|
|
begin
|
|
--PUT_LINE("Entering APPLY_PREFIX");
|
|
SXX := (others => NULL_PARSE_RECORD); -- !!!!!!!!!!!!!!!!!!!!!!!
|
|
|
|
if WORDS_MDEV(USE_PREFIXES) then
|
|
|
|
|
|
|
|
--PUT(NUMBER_OF_PREFIXES); PUT(INTEGER(SA'LENGTH)); PUT(SA'LAST); NEW_LINE;
|
|
for I in 1..NUMBER_OF_PREFIXES loop -- Loop through PREFIXES
|
|
L := 0;
|
|
for J in SA'RANGE loop -- Loop through stem array
|
|
--PUT("J = "); PUT(J); PUT(" SA(J) = "); PUT(SA(J)); NEW_LINE;
|
|
if (SA(J)(1) = PREFIXES(I).FIX(1)) then -- Cuts down a little -- do better
|
|
if SUBTRACT_PREFIX(SA(J), PREFIXES(I)) /=
|
|
HEAD(SA(J), MAX_STEM_SIZE) then
|
|
--PUT_LINE("Hit on prefix " & PREFIXES(I).FIX);
|
|
--PUT("I = "); PUT(I); PUT(" "); PUT(PREFIXES(I).FIX); PUT(" ");
|
|
--PUT("J = "); PUT(J); PUT(" "); PUT(SA(J)); NEW_LINE;
|
|
L := L + 1; -- We have a hit, make new stem array item
|
|
SSA(L) := HEAD(SUBTRACT_PREFIX(SA(J), PREFIXES(I)),
|
|
MAX_STEM_SIZE); -- And that has prefix subtracted to match dict
|
|
--PUT("L = "); PUT(L); PUT(" "); PUT_LINE(SUBTRACT_PREFIX(SA(J), PREFIXES(I)));
|
|
end if; -- with prefix subtracted stems
|
|
end if;
|
|
end loop;
|
|
|
|
if L > 0 then -- There has been a prefix hit
|
|
SEARCH_DICTIONARIES(SSA(1..L), -- So run new dictionary search
|
|
PREFIXES(I), SUFFIX);
|
|
|
|
if PDL_INDEX /= 0 then -- Dict search was successful
|
|
--PUT_LINE("IN APPLY_PREFIX - PDL_INDEX not 0 after prefix " & PREFIXES(I).FIX);
|
|
|
|
--PUT_LINE("REDUCE_STEM_LIST being called from APPLY_PREFIX ---- SUFFIX = "
|
|
--& SUFFIX.FIX);
|
|
REDUCE_STEM_LIST(SX, SXX, PREFIXES(I), SUFFIX);
|
|
|
|
if SXX(1) /= NULL_PARSE_RECORD then -- There is reduced stem result
|
|
PA_LAST := PA_LAST + 1; -- So add prefix line to parse array
|
|
PA(PA_LAST).IR :=
|
|
((PREFIX, NULL_PREFIX_RECORD), 0, NULL_ENDING_RECORD, X, X);
|
|
PA(PA_LAST).STEM := HEAD(PREFIXES(I).FIX, MAX_STEM_SIZE);
|
|
PA(PA_LAST).MNPC := DICT_IO.COUNT(PREFIXES(I).MNPC);
|
|
PA(PA_LAST).D_K := ADDONS;
|
|
exit; -- Because we accept only one prefix
|
|
end if;
|
|
|
|
end if;
|
|
end if;
|
|
end loop; -- Loop on I for PREFIXES
|
|
end if; -- On USE_PREFIXES
|
|
end APPLY_PREFIX;
|
|
|
|
|
|
procedure APPLY_SUFFIX(SA : in STEM_ARRAY_TYPE;
|
|
SX : in SAL; SXX : in out SAL;
|
|
PA : in out PARSE_ARRAY; PA_LAST : in out INTEGER) is
|
|
SSA : STEM_ARRAY;
|
|
L : INTEGER := 0;
|
|
SUFFIX_HIT : INTEGER := 0;
|
|
-- use TEXT_IO;
|
|
-- use INFLECTIONS_PACKAGE.INTEGER_IO;
|
|
|
|
begin
|
|
--PUT_LINE("Entering APPLY_SUFFIX");
|
|
--PUT(NUMBER_OF_SUFFIXES); PUT(INTEGER(SA'LENGTH)); PUT(SA'LAST); NEW_LINE;
|
|
for I in 1..NUMBER_OF_SUFFIXES loop -- Loop through SUFFIXES
|
|
L := 0; -- Take as many as fit
|
|
|
|
for J in SA'RANGE loop -- Loop through stem array
|
|
if SUBTRACT_SUFFIX(SA(J), SUFFIXES(I)) /=
|
|
HEAD(SA(J), MAX_STEM_SIZE) then
|
|
--PUT("Hit on suffix " & SUFFIXES(I).FIX & " " & SUFFIXES(I).CONNECT & " ");
|
|
--PUT(SUFFIXES(I).ENTR); NEW_LINE;
|
|
--PUT("I = "); PUT(I); PUT(" "); PUT(SUFFIXES(I).FIX); PUT(" ");
|
|
--PUT("J = "); PUT(J); PUT(" "); PUT(SA(J)); NEW_LINE;
|
|
L := L + 1; -- We have a hit, make new stem array item
|
|
SSA(L) := HEAD(SUBTRACT_SUFFIX(SA(J), SUFFIXES(I)),
|
|
MAX_STEM_SIZE); -- And that has prefix subtracted to match dict
|
|
--PUT("L = "); PUT(L); PUT(" "); PUT_LINE(SUBTRACT_SUFFIX(SA(J), SUFFIXES(I)));
|
|
end if;
|
|
end loop; -- Loop on J through SA
|
|
|
|
if L > 0 then -- There has been a suffix hit
|
|
SEARCH_DICTIONARIES(SSA(1..L),
|
|
NULL_PREFIX_ITEM, SUFFIXES(I)); -- So run new dictionary search
|
|
-- For suffixes we allow as many as match
|
|
|
|
if PDL_INDEX /= 0 then -- Dict search was successful
|
|
--PUT_LINE("IN APPLY_SUFFIX - PDL_INDEX not 0 after suffix " & SUFFIXES(I).FIX);
|
|
|
|
--PUT_LINE("REDUCE_STEM_LIST called from APPLY_SUFFIX");
|
|
SUFFIX_HIT := I;
|
|
|
|
REDUCE_STEM_LIST(SX, SXX, NULL_PREFIX_ITEM, SUFFIXES(I));
|
|
|
|
if SXX(1) /= NULL_PARSE_RECORD then -- There is reduced stem result
|
|
PA_LAST := PA_LAST + 1; -- So add suffix line to parse array
|
|
--PUT_LINE("REDUCE_STEM_LIST is not null so add suffix to parse array");
|
|
PA(PA_LAST).IR :=
|
|
((SUFFIX, NULL_SUFFIX_RECORD), 0, NULL_ENDING_RECORD, X, X);
|
|
PA(PA_LAST).STEM := HEAD(
|
|
SUFFIXES(SUFFIX_HIT).FIX, MAX_STEM_SIZE);
|
|
-- Maybe it would better if suffix.fix was of stem size
|
|
PA(PA_LAST).MNPC := DICT_IO.COUNT(SUFFIXES(SUFFIX_HIT).MNPC);
|
|
--PUT("SUFFIX MNPC "); PUT(SUFFIXES(SUFFIX_HIT).MNPC); NEW_LINE;
|
|
PA(PA_LAST).D_K := ADDONS;
|
|
---
|
|
for I in SXX'RANGE loop
|
|
exit when SXX(I) = NULL_PARSE_RECORD;
|
|
PA_LAST := PA_LAST + 1;
|
|
PA(PA_LAST) := SXX(I);
|
|
end loop;
|
|
---
|
|
end if;
|
|
|
|
|
|
else -- there is suffix (L /= 0) but no dictionary hit
|
|
SUFFIX_HIT := I;
|
|
--PUT_LINE(" -- there is suffix (L /= 0) but no dictionary hit");
|
|
--PUT("L = "); PUT(L); PUT(" ");
|
|
--PUT("SUFFIX_HIT = "); PUT(SUFFIXES(I).FIX); NEW_LINE;
|
|
APPLY_PREFIX(SSA(1..L), SUFFIXES(I), SX, SXX, PA, PA_LAST);
|
|
--PUT_LINE("PREFIXES applied from APPLY_SUFFIXES");
|
|
if SXX(1) /= NULL_PARSE_RECORD then -- There is reduced stem result
|
|
PA_LAST := PA_LAST + 1; -- So add suffix line to parse array
|
|
--PUT_LINE("REDUCE_STEM_LIST is not null so add suffix to parse array");
|
|
PA(PA_LAST).IR :=
|
|
((SUFFIX, NULL_SUFFIX_RECORD), 0, NULL_ENDING_RECORD, X, X);
|
|
PA(PA_LAST).STEM := HEAD(
|
|
SUFFIXES(SUFFIX_HIT).FIX, MAX_STEM_SIZE);
|
|
PA(PA_LAST).MNPC := DICT_IO.COUNT(SUFFIXES(SUFFIX_HIT).MNPC);
|
|
--PUT("SUFFIX MNPC "); PUT(SUFFIXES(SUFFIX_HIT).MNPC); NEW_LINE;
|
|
PA(PA_LAST).D_K := ADDONS;
|
|
|
|
for I in SXX'RANGE loop -- Set this set of results
|
|
exit when SXX(I) = NULL_PARSE_RECORD;
|
|
PA_LAST := PA_LAST + 1;
|
|
PA(PA_LAST) := SXX(I);
|
|
end loop;
|
|
|
|
end if;
|
|
|
|
end if;
|
|
end if; -- with suffix subtracted stems
|
|
end loop; -- Loop on I for SUFFIXES
|
|
|
|
end APPLY_SUFFIX;
|
|
|
|
|
|
procedure PRUNE_STEMS(INPUT_WORD : STRING; SX : in SAL; SXX : in out SAL) is
|
|
J : INTEGER := 0;
|
|
--SXX : SAL;
|
|
|
|
begin
|
|
--TEXT_IO.PUT_LINE("Entering PRUNE_STEMS INPUT_WORD = " & INPUT_WORD );
|
|
--TEXT_IO.PUT_LINE("In PRUNE PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
if SX(1) = NULL_PARSE_RECORD then
|
|
return; end if;
|
|
|
|
-----------------------------------------------------------------
|
|
|
|
GENERATE_REDUCED_STEM_ARRAY:
|
|
begin
|
|
--PUT_LINE("List of stems by size");
|
|
--NEW_LINE;
|
|
J := 1;
|
|
for Z in 0..MIN(MAX_STEM_SIZE, LEN(INPUT_WORD)) loop
|
|
if SA(Z) /= NOT_A_STEM then
|
|
--PUT(Z); PUT(J); PUT(" "); PUT_LINE(SA(Z));
|
|
SSA(J) := SA(Z);
|
|
SSA_MAX := J;
|
|
J := J + 1;
|
|
end if;
|
|
end loop;
|
|
--PUT_LINE("SSA_MAX = " & INTEGER'IMAGE(SSA_MAX));
|
|
--NEW_LINE(2);
|
|
end GENERATE_REDUCED_STEM_ARRAY;
|
|
|
|
|
|
|
|
--TEXT_IO.PUT_LINE("PRUNE_STEMS checking (not) DO_ONLY_FIXES = " & BOOLEAN'IMAGE(WORDS_MDEV(DO_ONLY_FIXES)));
|
|
if not WORDS_MDEV(DO_ONLY_FIXES) then -- Just bypass main dictionary search
|
|
--TEXT_IO.PUT_LINE("Calling SEARCH_DICTIONARIES from PRUNE_STEMS --- General case");
|
|
|
|
SEARCH_DICTIONARIES(SSA(1..SSA_MAX), NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM);
|
|
--TEXT_IO.PUT_LINE("Finished SEARCH_DICTIONARIES from PRUNE_STEMS --- General case");
|
|
end if;
|
|
--TEXT_IO.PUT_LINE("PRUNE_STEMS passing over because of NOT DO_ONLY_FIXES");
|
|
|
|
|
|
-- ---------------------------------------------------------------
|
|
--TEXT_IO.PUT_LINE("PRUNE_STEMS below NOT DO_ONLY_FIXES PA_LAST = "
|
|
--& INTEGER'IMAGE(PA_LAST));
|
|
|
|
if (((PA_LAST = 0) and -- No Uniques or Syncope
|
|
(PDL_INDEX = 0)) --) and then -- No dictionary match
|
|
or WORDS_MDEV(DO_FIXES_ANYWAY)) and then
|
|
WORDS_MODE(DO_FIXES) then
|
|
|
|
----So try prefixes and suffixes, Generate a new SAA array, search again
|
|
--TEXT_IO.PUT_LINE(" PDL_INDEX = 0 after straight search ------ So APPLY_SUFFIX PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
|
|
if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix
|
|
APPLY_PREFIX(SSA(1..SSA_MAX), NULL_SUFFIX_ITEM, SX, SXX, PA, PA_LAST);
|
|
end if;
|
|
--------------
|
|
if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix
|
|
APPLY_SUFFIX(SSA(1..SSA_MAX), SX, SXX, PA, PA_LAST);
|
|
if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix
|
|
----So try prefixes, Generate a new SAA array, search again
|
|
--TEXT_IO.PUT_LINE(" PDL_INDEX = 0 after suffix search ----- So APPLY_PREFIX by itself PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
----Need to use the new SSA, modified to include suffixes
|
|
APPLY_PREFIX(SSA(1..SSA_MAX), NULL_SUFFIX_ITEM, SX, SXX, PA, PA_LAST);
|
|
--TEXT_IO.PUT_LINE("PREFIXES applied PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
--------------
|
|
end if; -- Suffix failed
|
|
end if; -- Suffix failed
|
|
else
|
|
--TEXT_IO.PUT_LINE(" PDL_INDEX not 0 after straight search ------ So REDUCE_STEMS PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
REDUCE_STEM_LIST(SX, SXX, NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM);
|
|
if PA_LAST = 0 and then SXX(1) = NULL_PARSE_RECORD then
|
|
--TEXT_IO.PUT_LINE("Although PDL_INDEX not 0 after straight search , SXX fails PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
--------------
|
|
if WORDS_MODE(DO_FIXES) then
|
|
APPLY_SUFFIX(SSA(1..SSA_MAX), SX, SXX, PA, PA_LAST);
|
|
--TEXT_IO.PUT_LINE("SUFFIXES applied PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
if SXX(1) = NULL_PARSE_RECORD then -- We could not find a match with suffix
|
|
----So try prefixes, Generate a new SAA array, search again
|
|
--TEXT_IO.PUT_LINE(" PDL_INDEX = 0 after suffix search ----- So APPLY_PREFIX by itself PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
----Need to use the new SSA, modified to include suffixes
|
|
APPLY_PREFIX(SSA(1..SSA_MAX), NULL_SUFFIX_ITEM,
|
|
SX, SXX, PA, PA_LAST);
|
|
--TEXT_IO.PUT_LINE("PREFIXES applied PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
--------------
|
|
end if; -- Suffix failed
|
|
end if; -- If DO_FIXES then do
|
|
end if; -- First search passed but SXX null
|
|
end if; -- First search failed
|
|
|
|
--TEXT_IO.PUT_LINE("End of PRUNE_STEMS PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
|
|
|
|
end PRUNE_STEMS;
|
|
|
|
|
|
procedure PROCESS_PACKONS(INPUT_WORD : STRING; KEY : STEM_KEY_TYPE := 0) is
|
|
|
|
STEM_LENGTH : INTEGER := 0;
|
|
PR : PARSE_RECORD;
|
|
M : INTEGER := 1;
|
|
DE : DICTIONARY_ENTRY;
|
|
MEAN : MEANING_TYPE;
|
|
PACKON_FIRST_HIT : BOOLEAN := FALSE;
|
|
SL, SL_NULLS : SAL := (others => NULL_PARSE_RECORD);
|
|
|
|
function "<=" (LEFT, RIGHT : PRONOUN_KIND_TYPE) return BOOLEAN is
|
|
begin
|
|
if (RIGHT = LEFT or else
|
|
RIGHT = X) then
|
|
return TRUE;
|
|
elsif
|
|
(RIGHT = ADJECT and -- Just for PACK
|
|
LEFT = INDEF) then
|
|
return TRUE;
|
|
else
|
|
return FALSE;
|
|
end if;
|
|
end "<=";
|
|
|
|
begin
|
|
|
|
OVER_PACKONS:
|
|
for K in PACKONS'RANGE loop -- Do whole set, more than one may apply
|
|
--TEXT_IO.PUT_LINE("OVER_PACKONS K = "& INTEGER'IMAGE(K) & " PACKON = " & PACKONS(K).TACK);
|
|
-- PACKON if the TACKON ENTRY is PRON
|
|
FOR_EACH_PACKON:
|
|
declare
|
|
XWORD : constant STRING := SUBTRACT_TACKON(INPUT_WORD, PACKONS(K));
|
|
WORD : STRING(1..XWORD'LENGTH) := XWORD;
|
|
PACKON_LENGTH : constant INTEGER := TRIM(PACKONS(K).TACK)'LENGTH;
|
|
LAST_OF_WORD : CHARACTER := WORD(WORD'LAST);
|
|
LENGTH_OF_WORD : constant INTEGER := WORD'LENGTH;
|
|
begin
|
|
--PUT_LINE("FOR_EACH_PACKON WORD = |"& WORD & "|");
|
|
--PUT_LINE("FOR_EACH_PACKON PACKON_LENGTH = "& INTEGER'IMAGE(PACKON_LENGTH));
|
|
--PUT_LINE("FOR_EACH_PACKON LENGTH_OF_WORD = "& INTEGER'IMAGE(LENGTH_OF_WORD));
|
|
SL := SL_NULLS; -- Initialize SL to nulls
|
|
if WORD /= INPUT_WORD then
|
|
--PUT_LINE("PROCESS_PACKONS Hit on PACKON " & PACKONS(K).TACK);
|
|
PACKON_FIRST_HIT := TRUE;
|
|
|
|
if PACKONS(K).TACK(1..3) = "dam" and LAST_OF_WORD = 'n' then
|
|
WORD(WORD'LAST) := 'm'; -- Takes care of the m - > n shift with dam
|
|
LAST_OF_WORD := 'm';
|
|
--PUT_LINE("PACKON = dam and LAST_OF_WORD = n => " & WORD);
|
|
end if;
|
|
|
|
-- No blank endings in these pronouns
|
|
LEL_SECTION_IO.READ(INFLECTIONS_SECTIONS_FILE, LEL, 4);
|
|
|
|
M := 0;
|
|
ON_INFLECTS:
|
|
for Z in reverse 1..MIN(6, LENGTH_OF_WORD) loop -- optimum for qu-pronouns
|
|
--PUT("ON_INFLECTS Z = "); PUT(Z); PUT(" "); PUT(WORD(1..Z)); NEW_LINE;
|
|
if PELL(Z, LAST_OF_WORD) > 0 then -- Any possible inflections at all
|
|
for I in PELF(Z, LAST_OF_WORD)..PELL(Z, LAST_OF_WORD) loop
|
|
--PUT("+");PUT(LEL(I)); PUT(WORD'LAST); PUT(WORD(WORD'LAST-Z+1..WORD'LAST));
|
|
--PUT(" "); PUT((LEL(I).ENDING.SUF(1..Z))); NEW_LINE;
|
|
|
|
if (Z <= LENGTH_OF_WORD) and then
|
|
((EQU(LEL(I).ENDING.SUF(1..Z),
|
|
WORD(WORD'LAST-Z+1..WORD'LAST))) and
|
|
(LEL(I).QUAL.PRON.DECL <= PACKONS(K).ENTR.BASE.PACK.DECL)) then
|
|
-- Have found an ending that is a possible match
|
|
-- And INFLECT agrees with PACKON.BASE
|
|
--PUT_LINE("INFLECTS HIT ------------------------------------------------------");
|
|
|
|
-- Add to list of possible ending records
|
|
STEM_LENGTH := WORD'LENGTH - Z;
|
|
PR := (HEAD(WORD(WORD'FIRST..STEM_LENGTH), MAX_STEM_SIZE),
|
|
LEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC);
|
|
M := M + 1;
|
|
SL(M) := PR;
|
|
SSA(1) := HEAD(WORD(WORD'FIRST.. WORD'FIRST+STEM_LENGTH-1),
|
|
MAX_STEM_SIZE);
|
|
--PUT_LINE("STEM_LENGTH = " & INTEGER'IMAGE(STEM_LENGTH));
|
|
--PUT_LINE("SSA(1) in PACKONS from real INFLECTS ->" & SSA(1) & '|');
|
|
-- may get set several times
|
|
end if;
|
|
end loop;
|
|
end if;
|
|
end loop ON_INFLECTS;
|
|
|
|
|
|
|
|
-- Only one stem will emerge
|
|
PDL_INDEX := 0;
|
|
SEARCH_DICTIONARIES(SSA(1..1), NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM,
|
|
PACK_ONLY);
|
|
-- Now have a PDL, scan for agreement
|
|
|
|
PDL_LOOP:
|
|
for J in 1..PDL_INDEX loop -- Go through all dictionary hits to see
|
|
--PUT_LINE("PACKON PDL_INDEX "); PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); NEW_LINE;
|
|
-- M used here wher I is used in REDUCE, maybe make consistent
|
|
M := 1;
|
|
SL_LOOP:
|
|
while SL(M) /= NULL_PARSE_RECORD loop -- Over all inflection hits
|
|
-- if this stem is possible
|
|
-- call up the meaning to check for "(w/-"
|
|
DICT_IO.SET_INDEX(DICT_FILE(PDL(J).D_K), PDL(J).DS.MNPC);
|
|
DICT_IO.READ(DICT_FILE(PDL(J).D_K), DE);
|
|
MEAN := DE.MEAN;
|
|
|
|
if (TRIM(MEAN)(1..4) = "(w/-" and then -- Does attached PACKON agree
|
|
TRIM(MEAN)(5..4+PACKON_LENGTH) = TRIM(PACKONS(K).TACK)) then
|
|
--PUT_LINE("Mean = PACK Hit Hit ");
|
|
--PUT("MEAN|" & MEAN(1..4) & '|');
|
|
--PUT_LINE("PACK|" & TRIM(PACKONS(K).TACK) & '|');
|
|
|
|
--PUT("DECL PDL_INDEX "); PUT(PDL(J).DS.PART.PACK.DECL);
|
|
--PUT(" <= ? SL ");
|
|
--PUT(SL(M).IR.QUAL.PRON.DECL);
|
|
--PUT(" <= ? PACKON ");
|
|
--PUT(PACKONS(K).ENTR.BASE.PACK.DECL); NEW_LINE;
|
|
if (PDL(J).DS.PART.PACK.DECL = SL(M).IR.QUAL.PRON.DECL) then -- or
|
|
|
|
--PUT_LINE("DECL Hit Hit Hit Hit Hit Hit Hit Hit ");
|
|
--PUT("KINDS PACKON ");
|
|
--PUT(PACKONS(K).ENTR.BASE.PACK.KIND);
|
|
--PUT(" <= ? PDL_KIND ");
|
|
--PUT(PDL(J).DS.PART.PACK.KIND);
|
|
--NEW_LINE;
|
|
--if (PACKONS(K).ENTR.BASE.PACK.KIND <=
|
|
-- PDL(J).DS.PART.PACK.KIND) then
|
|
-- Then we have a hit and make a PA
|
|
--PUT_LINE("KIND Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit Hit ");
|
|
|
|
if PACKON_FIRST_HIT then
|
|
PA_LAST := PA_LAST + 1;
|
|
PA(PA_LAST) := (PACKONS(K).TACK,
|
|
((TACKON, NULL_TACKON_RECORD), 0,
|
|
NULL_ENDING_RECORD, X, X),
|
|
ADDONS,
|
|
DICT_IO.COUNT((PACKONS(K).MNPC)));
|
|
PACKON_FIRST_HIT := FALSE;
|
|
|
|
end if;
|
|
PA_LAST := PA_LAST + 1;
|
|
--PUT_LINE("PACKON PDL HIT PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
--PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); NEW_LINE;
|
|
--PUT_LINE(MEAN);
|
|
--PUT(PA(PA_LAST)); NEW_LINE;
|
|
PA(PA_LAST) := (STEM => SL(M).STEM,
|
|
IR => (
|
|
QUAL => (
|
|
POFS => PRON,
|
|
PRON => (
|
|
PDL(J).DS.PART.PACK.DECL,
|
|
SL(M).IR.QUAL.PRON.CS,
|
|
SL(M).IR.QUAL.PRON.NUMBER,
|
|
SL(M).IR.QUAL.PRON.GENDER )),
|
|
KEY => SL(M).IR.KEY,
|
|
ENDING => SL(M).IR.ENDING,
|
|
AGE => SL(M).IR.AGE,
|
|
FREQ => SL(M).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => PDL(J).DS.MNPC);
|
|
--end if;
|
|
end if;
|
|
end if;
|
|
M := M + 1;
|
|
|
|
end loop SL_LOOP;
|
|
|
|
end loop PDL_LOOP;
|
|
|
|
end if;
|
|
end FOR_EACH_PACKON;
|
|
PACKON_FIRST_HIT := FALSE;
|
|
|
|
end loop OVER_PACKONS;
|
|
|
|
end PROCESS_PACKONS;
|
|
|
|
|
|
procedure PROCESS_QU_PRONOUNS(INPUT_WORD : STRING; QKEY : STEM_KEY_TYPE := 0) is
|
|
|
|
WORD : constant STRING := LOWER_CASE(TRIM(INPUT_WORD));
|
|
LAST_OF_WORD : constant CHARACTER := WORD(WORD'LAST);
|
|
LENGTH_OF_WORD : constant INTEGER := WORD'LENGTH;
|
|
STEM_LENGTH : INTEGER := 0;
|
|
M : INTEGER := 0;
|
|
PR : PARSE_RECORD;
|
|
SL : SAL := (others => NULL_PARSE_RECORD);
|
|
|
|
begin
|
|
--TEXT_IO.PUT_LINE("PROCESS_QU_PRONOUNS " & INPUT_WORD);
|
|
|
|
-- No blank endings in these pronouns
|
|
LEL_SECTION_IO.READ(INFLECTIONS_SECTIONS_FILE, LEL, 4);
|
|
|
|
-- M used here while I is used in REDUCE, maybe make consistent
|
|
M := 0;
|
|
ON_INFLECTS:
|
|
for Z in reverse 1..MIN(4, LENGTH_OF_WORD) loop -- optimized for qu-pronouns
|
|
--PUT("ON_INFLECTS "); PUT(Z); PUT(" "); PUT(LAST_OF_WORD); NEW_LINE;
|
|
if PELL(Z, LAST_OF_WORD) > 0 then -- Any possible inflections at all
|
|
for I in PELF(Z, LAST_OF_WORD)..PELL(Z, LAST_OF_WORD) loop
|
|
--PUT(LEL(I)); PUT(WORD'LAST); PUT(WORD'LAST-Z+1); NEW_LINE;
|
|
if (Z <= LENGTH_OF_WORD) and then
|
|
LEL(I).KEY = QKEY and then
|
|
EQU(LEL(I).ENDING.SUF(1..Z),
|
|
WORD(WORD'LAST-Z+1..WORD'LAST)) then
|
|
-- Have found an ending that is a possible match
|
|
--PUT_LINE("INFLECTS HIT --------------------------------------------");
|
|
|
|
-- Add to list of possible ending records
|
|
STEM_LENGTH := WORD'LENGTH - Z;
|
|
PR := (HEAD(WORD(WORD'FIRST..STEM_LENGTH), MAX_STEM_SIZE),
|
|
LEL(I), DEFAULT_DICTIONARY_KIND, NULL_MNPC);
|
|
M := M + 1;
|
|
SL(M) := PR;
|
|
--PUT("M = "); PUT(M); PUT(" "); PUT(SL(M)); NEW_LINE;
|
|
SSA(1) := HEAD(WORD(WORD'FIRST.. WORD'FIRST+STEM_LENGTH-1),
|
|
MAX_STEM_SIZE);
|
|
-- may get set several times
|
|
end if;
|
|
end loop;
|
|
end if;
|
|
end loop ON_INFLECTS;
|
|
|
|
|
|
|
|
-- Only one stem will emerge
|
|
PDL_INDEX := 0;
|
|
SEARCH_DICTIONARIES(SSA(1..1), NULL_PREFIX_ITEM, NULL_SUFFIX_ITEM,
|
|
QU_PRON_ONLY);
|
|
-- Now have a PDL, scan for agreement
|
|
|
|
PDL_LOOP:
|
|
for J in 1..PDL_INDEX loop -- Go through all dictionary hits to see
|
|
M := 1;
|
|
SL_LOOP:
|
|
while SL(M) /= NULL_PARSE_RECORD loop -- Over all inflection hits
|
|
--PUT("SL_LOOP M = "); PUT(M); PUT(" SL => "); PUT(SL(M)); NEW_LINE; NEW_LINE;
|
|
|
|
--PUT("DECL PDL "); PUT(PDL(J).DS.PART.PRON.DECL);
|
|
--PUT(" <= ? SL ");
|
|
--PUT(SL(M).IR.QUAL.PRON.DECL);
|
|
--NEW_LINE;
|
|
if (PDL(J).DS.PART.PRON.DECL = SL(M).IR.QUAL.PRON.DECL) then
|
|
--PUT_LINE("DECL Hit Hit Hit Hit Hit Hit Hit Hit ");
|
|
PA_LAST := PA_LAST + 1;
|
|
--PUT_LINE("QU_PRON PDL HIT PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
--PUT(PDL(J).DS.STEM); PUT(PDL(J).DS.PART); NEW_LINE;
|
|
--PUT(PA(PA_LAST)); NEW_LINE;
|
|
PA(PA_LAST) := (STEM => SL(M).STEM,
|
|
IR => (
|
|
QUAL => (
|
|
POFS => PRON,
|
|
PRON => (
|
|
PDL(J).DS.PART.PRON.DECL,
|
|
SL(M).IR.QUAL.PRON.CS,
|
|
SL(M).IR.QUAL.PRON.NUMBER,
|
|
SL(M).IR.QUAL.PRON.GENDER )),
|
|
KEY => SL(M).IR.KEY,
|
|
ENDING => SL(M).IR.ENDING,
|
|
AGE => SL(M).IR.AGE,
|
|
FREQ => SL(M).IR.FREQ),
|
|
D_K => PDL(J).D_K,
|
|
MNPC => PDL(J).DS.MNPC);
|
|
end if;
|
|
M := M + 1;
|
|
|
|
end loop SL_LOOP;
|
|
-- PDL:= PDL.SUCC;
|
|
end loop PDL_LOOP;
|
|
|
|
end PROCESS_QU_PRONOUNS;
|
|
|
|
|
|
|
|
procedure TRY_TACKONS(INPUT_WORD : STRING) is
|
|
TACKON_HIT : BOOLEAN := FALSE;
|
|
TACKON_ON : BOOLEAN := FALSE;
|
|
TACKON_LENGTH : constant INTEGER := 0;
|
|
J : INTEGER := 0;
|
|
DE : DICTIONARY_ENTRY := NULL_DICTIONARY_ENTRY;
|
|
MEAN : MEANING_TYPE := NULL_MEANING_TYPE;
|
|
ENTERING_PA_LAST : INTEGER := PA_LAST;
|
|
START_OF_LOOP : INTEGER := 5; -- 4 enclitics -- Hard number !!!!!!!!!!!!!!!
|
|
END_OF_LOOP : INTEGER := NUMBER_OF_TACKONS;
|
|
begin
|
|
--TEXT_IO.PUT_LINE("TRYing TACKONS *************** INPUT_WORD = " & INPUT_WORD);
|
|
|
|
LOOP_OVER_TACKONS:
|
|
for I in START_OF_LOOP..END_OF_LOOP loop
|
|
--PUT_LINE("TACKON #" & INTEGER'IMAGE(I) & " " &
|
|
--SUBTRACT_TACKON(INPUT_WORD, TACKONS(I)) & " + " & TACKONS(I).TACK);
|
|
|
|
REMOVE_A_TACKON:
|
|
declare
|
|
LESS : constant STRING :=
|
|
SUBTRACT_TACKON(INPUT_WORD, TACKONS(I));
|
|
begin
|
|
--TEXT_IO.PUT_LINE("LESS = " & LESS);
|
|
if LESS /= INPUT_WORD then -- LESS is less
|
|
|
|
--==========================================================
|
|
--RUN_UNIQUES(INPUT_WORD, UNIQUE_FOUND, PA, PA_LAST);
|
|
--RUN_INFLECTIONS(LESS, SS);
|
|
--PRUNE_STEMS(LESS, SS, SSS);
|
|
--if SSS(1) /= NULL_PARSE_RECORD then
|
|
--ORDER_STEMS(SSS);
|
|
--ARRAY_STEMS(SSS, PA, PA_LAST);
|
|
--SSS(1) := NULL_PARSE_RECORD;
|
|
--end if;
|
|
--==========================================================
|
|
WORD(LESS, PA, PA_LAST);
|
|
|
|
|
|
-- TEXT_IO.PUT("In TRY_TACKONS Left WORD ");
|
|
-- TEXT_IO.PUT("PA_LAST = "); TEXT_IO.PUT(INTEGER'IMAGE(PA_LAST)); TEXT_IO.PUT(" ");
|
|
-- TEXT_IO.PUT(TACKONS(I).TACK);
|
|
-- TEXT_IO.NEW_LINE;
|
|
|
|
-----------------------------------------
|
|
|
|
|
|
if PA_LAST > ENTERING_PA_LAST then -- we have a possible word
|
|
--TEXT_IO.PUT("I = " & INTEGER'IMAGE(I) & " " & TACKONS(I).TACK & " TACKONS(I).ENTR.BASE.PART = ");
|
|
--PART_OF_SPEECH_TYPE_IO.PUT(TACKONS(I).ENTR.BASE.PART); TEXT_IO.NEW_LINE;
|
|
|
|
if TACKONS(I).ENTR.BASE.POFS = X then -- on PART (= X?)
|
|
--PUT("TACKON X found "); PUT( TACKONS(I).TACK); NEW_LINE;
|
|
--PUT("PA_LAST = "); PUT(PA_LAST); PUT(" ");
|
|
--PUT("TACKON MNPC "); PUT( TACKONS(I).MNPC); NEW_LINE;
|
|
TACKON_HIT := TRUE;
|
|
--PUT("TACKON_HIT = "); PUT(TACKON_HIT); NEW_LINE;
|
|
TACKON_ON := FALSE;
|
|
|
|
else
|
|
|
|
J := PA_LAST;
|
|
|
|
while J >= ENTERING_PA_LAST+1 loop -- Sweep backwards over PA
|
|
-- Sweeping up inapplicable fixes,
|
|
-- although we only have TACKONs for X or PRON or ADJ - so far
|
|
-- and there are no fixes for PRON - so far
|
|
--TEXT_IO.PUT("J = " & INTEGER'IMAGE(J) & " PA(J).IR.QUAL = ");
|
|
--QUALITY_RECORD_IO.PUT(PA(J).IR.QUAL);
|
|
--TEXT_IO.NEW_LINE;
|
|
|
|
if ((PA(J).IR.QUAL.POFS = PREFIX) and then (TACKON_ON)) then
|
|
null; -- check PART
|
|
TACKON_ON := FALSE;
|
|
elsif ((PA(J).IR.QUAL.POFS = SUFFIX) and then (TACKON_ON)) then
|
|
-- check PART
|
|
|
|
null;
|
|
TACKON_ON := FALSE;
|
|
|
|
|
|
elsif PA(J).IR.QUAL.POFS = TACKONS(I).ENTR.BASE.POFS then
|
|
DICT_IO.SET_INDEX(DICT_FILE(PA(J).D_K), PA(J).MNPC);
|
|
DICT_IO.READ(DICT_FILE(PA(J).D_K), DE);
|
|
MEAN := DE.MEAN;
|
|
|
|
--TEXT_IO.PUT("J = " & INTEGER'IMAGE(J) & " PA(J).IR.QUAL = ");
|
|
--QUALITY_RECORD_IO.PUT(PA(J).IR.QUAL);
|
|
--TEXT_IO.NEW_LINE;
|
|
-- check PART
|
|
case TACKONS(I).ENTR.BASE.POFS is
|
|
when N =>
|
|
if (PA(J).IR.QUAL.N.DECL <=
|
|
TACKONS(I).ENTR.BASE.N.DECL) then
|
|
-- Ignore GEN and KIND
|
|
TACKON_HIT := TRUE;
|
|
TACKON_ON := TRUE;
|
|
end if;
|
|
|
|
when PRON => -- Only one we have other than X
|
|
--PUT("TACK/PA DECL "); PUT(PA(J).IR.QUAL.PRON.DECL); PUT(" - ");
|
|
--PUT(TACKONS(I).ENTR.BASE.PRON.DECL); NEW_LINE;
|
|
--PUT("TACK/PA KIND "); PUT(PA(J).IR.QUAL.PRON.KIND); PUT(" - ");
|
|
--PUT(TACKONS(I).ENTR.BASE.PRON.KIND); NEW_LINE;
|
|
if PA(J).IR.QUAL.PRON.DECL <=
|
|
TACKONS(I).ENTR.BASE.PRON.DECL --and then
|
|
--PA(J).IR.QUAL.PRON.KIND <=
|
|
--TACKONS(I).ENTR.BASE.PRON.KIND
|
|
then
|
|
--PUT("TACKON PRON found HIT "); PUT( TACKONS(I).TACK); NEW_LINE;
|
|
TACKON_HIT := TRUE;
|
|
TACKON_ON := TRUE;
|
|
else
|
|
PA(J..PA_LAST-1) := PA(J+1..PA_LAST);
|
|
PA_LAST := PA_LAST - 1;
|
|
|
|
end if;
|
|
|
|
when ADJ =>
|
|
-- Forego all checks, even on DECL of ADJ
|
|
-- -cumque is the only one I have now
|
|
-- if .......
|
|
TACKON_HIT := TRUE;
|
|
TACKON_ON := TRUE;
|
|
-- else
|
|
-- PA(J..PA_LAST-1) := PA(J+1..PA_LAST);
|
|
-- PA_LAST := PA_LAST - 1;
|
|
-- end if;
|
|
|
|
--when ADV =>
|
|
--when V =>
|
|
when others =>
|
|
PA(J..PA_LAST-1) := PA(J+1..PA_LAST);
|
|
PA_LAST := PA_LAST - 1;
|
|
|
|
end case;
|
|
|
|
else -- check PART
|
|
PA(J..PA_LAST-1) := PA(J+1..PA_LAST);
|
|
PA_LAST := PA_LAST - 1;
|
|
--PUT("J failed J & PA_LAST = "); PUT(J); PUT(" "); PUT(PA_LAST); NEW_LINE;
|
|
end if; -- check PART
|
|
J := J - 1;
|
|
end loop; -- loop sweep over PA
|
|
|
|
end if; -- on PART (= X?)
|
|
--PUT_LINE("End if on PART = X ?");
|
|
|
|
|
|
-----------------------------------------
|
|
if TACKON_HIT then
|
|
--PUT("Where it counts TACKON_HIT = "); PUT(TACKON_HIT); NEW_LINE;
|
|
-- Put on TACKON
|
|
|
|
|
|
|
|
|
|
|
|
PA_LAST := PA_LAST + 1;
|
|
PA(ENTERING_PA_LAST+2..PA_LAST) :=
|
|
PA(ENTERING_PA_LAST+1..PA_LAST-1);
|
|
PA(ENTERING_PA_LAST+1) := (TACKONS(I).TACK,
|
|
((TACKON, NULL_TACKON_RECORD), 0,
|
|
NULL_ENDING_RECORD, X, X),
|
|
ADDONS,
|
|
DICT_IO.COUNT((TACKONS(I).MNPC)));
|
|
--PUT("PA_LAST = "); PUT(PA_LAST); PUT(" ");
|
|
--PUT("I = "); PUT(I); PUT(" TACKONS(I).TACK = "); PUT(TACKONS(I).TACK);
|
|
--PUT_LINE("TACKON added");
|
|
--PUT_LINE("Now list the PA array after adding the found TACKON");
|
|
--for K in 1..PA_LAST loop
|
|
--PUT("K = "); PUT(K); PUT(" PA(K) "); PUT(PA(K).D_K); PUT(PA(K).IR); NEW_LINE;
|
|
--end loop;
|
|
return; -- Be happy with one ???????
|
|
else
|
|
null;
|
|
--PUT("No TACKON_HIT, so no punitive TACKON PA_LAST is ");
|
|
--PUT(PA_LAST); NEW_LINE;
|
|
end if; -- TACKON_HIT
|
|
|
|
end if; -- we have a possible word
|
|
-----------------------------------------
|
|
end if; -- LESS is less
|
|
end REMOVE_A_TACKON;
|
|
end loop LOOP_OVER_TACKONS;
|
|
--PUT_LINE("LEAVING TACKONS ******************************************* ");
|
|
end TRY_TACKONS;
|
|
|
|
|
|
begin -- WORD
|
|
--TEXT_IO.PUT_LINE("Starting WORD INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
if TRIM(INPUT_WORD) = "" then
|
|
return;
|
|
end if;
|
|
|
|
|
|
RUN_UNIQUES(INPUT_WORD, UNIQUE_FOUND, PA, PA_LAST);
|
|
|
|
|
|
--if INPUT_WORD(INPUT_WORD'FIRST) in 'a'..'z' then
|
|
-- CONSTRUCT_STEMS(INPUT_WORD,
|
|
-- SEARCH_DICTIONARIES();
|
|
-- TRY_STEMS_AGAINST_INFLECTIONS;
|
|
-- end if;
|
|
|
|
--if INPUT_WORD(INPUT_WORD'FIRST) in 'a'..'z' then
|
|
--TEXT_IO.PUT_LINE("After UNIQUES INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
|
|
|
|
QU:
|
|
declare
|
|
PA_QSTART : INTEGER := PA_LAST;
|
|
PA_START : INTEGER := PA_LAST;
|
|
SAVED_MODE_ARRAY : MODE_ARRAY := WORDS_MODE;
|
|
QKEY : STEM_KEY_TYPE := 0;
|
|
|
|
|
|
begin -- QU
|
|
TICKONS(NUMBER_OF_TICKONS+1) := NULL_PREFIX_ITEM;
|
|
WORDS_MODE := (others => FALSE);
|
|
|
|
for I in 1..NUMBER_OF_TICKONS+1 loop
|
|
declare
|
|
Q_WORD : constant STRING := TRIM(SUBTRACT_TICKON(INPUT_WORD, TICKONS(I)));
|
|
begin
|
|
PA_LAST := PA_QSTART;
|
|
PA(PA_LAST+1) := NULL_PARSE_RECORD;
|
|
if (I = NUMBER_OF_TICKONS + 1) or else -- The prefix is a TICKON
|
|
(Q_WORD /= INPUT_WORD) then -- and it matches the start of INPUT_WORD
|
|
|
|
if I <= NUMBER_OF_TICKONS then -- Add to PA if
|
|
--TEXT_IO.PUT_LINE("ADDING TICKON PA " & TICKONS(I).FIX);
|
|
PA_LAST := PA_LAST + 1; -- So add prefix line to parse array
|
|
PA(PA_LAST).STEM := HEAD(TICKONS(I).FIX, MAX_STEM_SIZE);
|
|
PA(PA_LAST).IR := ((PREFIX, NULL_PREFIX_RECORD), 0, NULL_ENDING_RECORD, X, X);
|
|
PA(PA_LAST).D_K := ADDONS;
|
|
PA(PA_LAST).MNPC := DICT_IO.COUNT(TICKONS(I).MNPC);
|
|
end if;
|
|
|
|
|
|
|
|
if Q_WORD'LENGTH >= 3 and then -- qui is shortest QU_PRON
|
|
((Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "qu") or
|
|
(Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "cu")) then
|
|
if Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "qu" then
|
|
QKEY := 1;
|
|
PROCESS_QU_PRONOUNS(Q_WORD, QKEY);
|
|
elsif Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "cu" then
|
|
QKEY := 2;
|
|
PROCESS_QU_PRONOUNS(Q_WORD, QKEY);
|
|
end if;
|
|
if PA_LAST <= PA_QSTART + 1 and then
|
|
QKEY > 0 then -- If did not find a PACKON
|
|
if Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "qu" then
|
|
PROCESS_PACKONS(Q_WORD, QKEY);
|
|
elsif Q_WORD(Q_WORD'FIRST..Q_WORD'FIRST+1) = "cu" then
|
|
PROCESS_PACKONS(Q_WORD, QKEY);
|
|
end if;
|
|
else
|
|
exit;
|
|
end if;
|
|
if PA_LAST > PA_QSTART + 1 then
|
|
exit;
|
|
end if;
|
|
|
|
|
|
elsif INPUT_WORD'LENGTH >= 6 then -- aliqui as aliQU_PRON
|
|
if INPUT_WORD(INPUT_WORD'FIRST..INPUT_WORD'FIRST+4) = "aliqu" then
|
|
PROCESS_QU_PRONOUNS(INPUT_WORD, 1);
|
|
elsif INPUT_WORD(INPUT_WORD'FIRST..INPUT_WORD'FIRST+4) = "alicu" then
|
|
PROCESS_QU_PRONOUNS(INPUT_WORD, 2);
|
|
|
|
end if;
|
|
|
|
end if;
|
|
|
|
|
|
|
|
if PA_LAST = PA_START + 1 then -- Nothing found
|
|
PA_LAST := PA_START; -- Reset PA_LAST
|
|
else
|
|
exit;
|
|
end if;
|
|
|
|
|
|
end if;
|
|
end;
|
|
end loop;
|
|
|
|
WORDS_MODE := SAVED_MODE_ARRAY;
|
|
|
|
|
|
|
|
exception
|
|
when others =>
|
|
WORDS_MODE := SAVED_MODE_ARRAY;
|
|
end QU;
|
|
|
|
--==========================================================
|
|
RUN_INFLECTIONS(INPUT_WORD, SS);
|
|
PRUNE_STEMS(INPUT_WORD, SS, SSS);
|
|
--TEXT_IO.PUT_LINE("After PRUNE INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
if SSS(1) /= NULL_PARSE_RECORD then
|
|
ORDER_STEMS(SSS);
|
|
ARRAY_STEMS(SSS, PA, PA_LAST);
|
|
SSS(1) := NULL_PARSE_RECORD;
|
|
end if;
|
|
--TEXT_IO.PUT_LINE("After ARRAY INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
--==========================================================
|
|
|
|
|
|
|
|
|
|
if PA_LAST = PA_SAVE then
|
|
TRY_TACKONS(INPUT_WORD);
|
|
end if;
|
|
|
|
|
|
--TEXT_IO.PUT_LINE("Out WORD INPUT = " & INPUT_WORD & " PA_LAST = " & INTEGER'IMAGE(PA_LAST));
|
|
|
|
--TEXT_IO.SET_OUTPUT(TEXT_IO.STANDARD_OUTPUT);
|
|
|
|
exception
|
|
when STORAGE_ERROR =>
|
|
TEXT_IO.PUT_LINE(TEXT_IO.STANDARD_OUTPUT,
|
|
"STORAGE_ERROR exception in WORD while processing =>"
|
|
& RAW_WORD);
|
|
PA_LAST := PA_SAVE;
|
|
if WORDS_MODE(WRITE_UNKNOWNS_TO_FILE) then
|
|
TEXT_IO.PUT(UNKNOWNS, RAW_WORD);
|
|
TEXT_IO.SET_COL(UNKNOWNS, 21);
|
|
TEXT_IO.PUT_LINE(UNKNOWNS, "======== STORAGE_ERROR ");
|
|
end if;
|
|
when others =>
|
|
if WORDS_MODE(WRITE_UNKNOWNS_TO_FILE) then
|
|
TEXT_IO.PUT(UNKNOWNS, RAW_WORD);
|
|
TEXT_IO.SET_COL(UNKNOWNS, 21);
|
|
TEXT_IO.PUT_LINE(UNKNOWNS, "======== ERROR ");
|
|
end if;
|
|
PA_LAST := PA_SAVE;
|
|
end WORD;
|
|
|
|
|
|
procedure INITIALIZE_WORD_PACKAGE is
|
|
begin -- Initializing WORD_PACKAGE
|
|
|
|
|
|
ESTABLISH_INFLECTIONS_SECTION;
|
|
|
|
LEL_SECTION_IO.OPEN(INFLECTIONS_SECTIONS_FILE, LEL_SECTION_IO.IN_FILE,
|
|
INFLECTIONS_SECTIONS_NAME);
|
|
|
|
|
|
TRY_TO_LOAD_DICTIONARY(GENERAL);
|
|
|
|
TRY_TO_LOAD_DICTIONARY(SPECIAL);
|
|
|
|
|
|
LOAD_LOCAL:
|
|
begin
|
|
-- First check if there is a LOC dictionary
|
|
CHECK_FOR_LOCAL_DICTIONARY:
|
|
declare
|
|
DUMMY : TEXT_IO.FILE_TYPE;
|
|
begin
|
|
TEXT_IO.OPEN(DUMMY, TEXT_IO.IN_FILE,
|
|
ADD_FILE_NAME_EXTENSION(DICTIONARY_FILE_NAME,
|
|
"LOCAL"));
|
|
-- Failure to OPEN will raise an exception, to be handled below
|
|
TEXT_IO.CLOSE(DUMMY);
|
|
end CHECK_FOR_LOCAL_DICTIONARY;
|
|
-- If the above does not exception out, we can load LOC
|
|
PREFACE.PUT("LOCAL ");
|
|
DICT_LOC := NULL_DICTIONARY;
|
|
LOAD_DICTIONARY(DICT_LOC,
|
|
ADD_FILE_NAME_EXTENSION(DICTIONARY_FILE_NAME, "LOCAL"));
|
|
-- Need to carry LOC through consistently on LOAD_D and LOAD_D_FILE
|
|
LOAD_STEM_FILE(LOCAL);
|
|
DICTIONARY_AVAILABLE(LOCAL) := TRUE;
|
|
exception
|
|
when others =>
|
|
DICTIONARY_AVAILABLE(LOCAL) := FALSE;
|
|
end LOAD_LOCAL;
|
|
|
|
LOAD_UNIQUES(UNQ, UNIQUES_FULL_NAME);
|
|
|
|
LOAD_ADDONS(ADDONS_FULL_NAME);
|
|
--TEXT_IO.PUT_LINE("Loaded ADDONS");
|
|
LOAD_BDL_FROM_DISK;
|
|
--TEXT_IO.PUT_LINE("BDL loaded");
|
|
if not (DICTIONARY_AVAILABLE(GENERAL) or
|
|
DICTIONARY_AVAILABLE(SPECIAL) or
|
|
DICTIONARY_AVAILABLE(LOCAL)) then
|
|
PREFACE.PUT_LINE("There are no main dictionaries - program will not do much");
|
|
PREFACE.PUT_LINE("Check that there are dictionary files in this subdirectory");
|
|
PREFACE.PUT_LINE("Except DICT.LOC that means DICTFILE, INDXFILE, STEMFILE");
|
|
end if;
|
|
|
|
--TEXT_IO.PUT_LINE("Ready to load English");
|
|
|
|
TRY_TO_LOAD_ENGLISH_WORDS:
|
|
begin
|
|
ENGLISH_DICTIONARY_AVAILABLE(GENERAL) := FALSE;
|
|
EWDS_DIRECT_IO.OPEN(EWDS_FILE, EWDS_DIRECT_IO.IN_FILE, "EWDSFILE.GEN");
|
|
|
|
ENGLISH_DICTIONARY_AVAILABLE(GENERAL) := TRUE;
|
|
|
|
exception
|
|
when others =>
|
|
PREFACE.PUT_LINE("No English available");
|
|
ENGLISH_DICTIONARY_AVAILABLE(GENERAL) := FALSE;
|
|
end TRY_TO_LOAD_ENGLISH_WORDS;
|
|
|
|
|
|
|
|
--put_line("WORD_PACKAGE INITIALIZED");
|
|
end INITIALIZE_WORD_PACKAGE;
|
|
|
|
end WORD_PACKAGE;
|