-- state_extract(addressStringLessZipCode) -- Extracts the state from end of the given string. -- -- This function uses the state_lookup table to determine which state -- the input string is indicating. First, an exact match is pursued, -- and in the event of failure, a word-by-word fuzzy match is attempted. -- -- The result is the state as given in the input string, and the approved -- state abbreviation, seperated by a colon. CREATE OR REPLACE FUNCTION state_extract(rawInput VARCHAR) RETURNS VARCHAR AS $_$ DECLARE tempInt INTEGER; tempString VARCHAR; state VARCHAR; stateAbbrev VARCHAR; result VARCHAR; rec RECORD; test BOOLEAN; ws VARCHAR; BEGIN ws := E'[ ,.\t\n\f\r]'; -- Separate out the last word of the state, and use it to compare to -- the state lookup table to determine the entire name, as well as the -- abbreviation associated with it. The zip code may or may not have -- been found. tempString := substring(rawInput from ws || E'+([^ ,.\t\n\f\r0-9]*?)$'); SELECT INTO tempInt count(*) FROM (select distinct abbrev from state_lookup WHERE upper(abbrev) = upper(tempString)) as blah; IF tempInt = 1 THEN state := tempString; SELECT INTO stateAbbrev abbrev FROM (select distinct abbrev from state_lookup WHERE upper(abbrev) = upper(tempString)) as blah; ELSE SELECT INTO tempInt count(*) FROM state_lookup WHERE upper(name) like upper('%' || tempString); IF tempInt >= 1 THEN FOR rec IN SELECT name from state_lookup WHERE upper(name) like upper('%' || tempString) LOOP SELECT INTO test texticregexeq(rawInput, name) FROM state_lookup WHERE rec.name = name; IF test THEN SELECT INTO stateAbbrev abbrev FROM state_lookup WHERE rec.name = name; state := substring(rawInput, '(?i)' || rec.name); EXIT; END IF; END LOOP; ELSE -- No direct match for state, so perform fuzzy match. SELECT INTO tempInt count(*) FROM state_lookup WHERE soundex(tempString) = end_soundex(name); IF tempInt >= 1 THEN FOR rec IN SELECT name, abbrev FROM state_lookup WHERE soundex(tempString) = end_soundex(name) LOOP tempInt := count_words(rec.name); tempString := get_last_words(rawInput, tempInt); test := TRUE; FOR i IN 1..tempInt LOOP IF soundex(split_part(tempString, ' ', i)) != soundex(split_part(rec.name, ' ', i)) THEN test := FALSE; END IF; END LOOP; IF test THEN state := tempString; stateAbbrev := rec.abbrev; EXIT; END IF; END LOOP; END IF; END IF; END IF; IF state IS NOT NULL AND stateAbbrev IS NOT NULL THEN result := state || ':' || stateAbbrev; END IF; RETURN result; END; $_$ LANGUAGE plpgsql;