_Database Developer_
by Ken North

/****************************************************************************
* FILE NAME:  metphone.java     TITLE: Metaphone phonetic text encoding
*
* AUTHOR:  K. E. North II, Ken North Computing
*
*    from Database Magic with Java (Prentice-Hall PTR, 1997)
****************************************************************************
*
* Copyright (c) Kendall E. North II, 1997. All rights reserved. Reproduction
* or translation of this work beyond that permitted in Section 117 of the
* United States Copyright Act without express written permission of the
* copyright owner is unlawful. The purchaser may make backup copies for
* his/her own use only and not for distribution or resale. The Author and
* Publisher assume no responsibility errors, omissions or damages caused by
* the use of these programs or from use of the information contained herein.
****************************************************************************
*   Synopsis:
*
*   Java application executed by Java Virtual machine. Executes a method that
*   implements a modified Philips Metaphone algorithm. To encode phonetically,
*   this version checks the character following "sch" to decide whether
*   to use a hard or soft consonant. 
*
*   Encodes input strings to four characters. To generate a longer phonetic
*   string, change encodeLimit. To run, use this command line:
*
*           java -classpath <your path> MetaphoneEncoder
*
*           Type in a last name or other string to encode.
*
****************************************************************************/

import java.io.DataInputStream;

class PhoneticString {

    String			inputString;
	StringBuffer	mString;

    PhoneticString (String  inString)  {
        inputString =  new String(inString);
        }
    public StringBuffer encodedString()    {
        return (Metafon4(inputString));
        }       

    public static boolean isVowel (char charToTest)    {
        if (charToTest == 'A') return true;
        if (charToTest == 'E') return true;
        if (charToTest == 'I') return true;
        if (charToTest == 'O') return true;
        if (charToTest == 'U') return true;
        return false;
		}       

    public StringBuffer Metafon4(String InputString)    {
        StringBuffer sb = new StringBuffer("");
        String  EncodeMe    = new String(InputString.toUpperCase());

        int encodeLimit = 4;
        int index;
		int lastidx = 0;
        int offset = 0;
		int length = EncodeMe.length();
		
		lastidx = length - 1;

        for (index = 0; offset < encodeLimit; index++)
			{
            if (index > lastidx) 
                break;

            if (index > 0) {
                if (EncodeMe.charAt(index) == EncodeMe.charAt(index-1))
                    continue;
                }

			if (EncodeMe.charAt(index) == 'A' ) {
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'E' ) {
                        sb.insert(offset, 'E');                    
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
	            if (index == 0) {
					sb.insert(offset, EncodeMe.charAt(index));
					offset++;
					}
                continue;
				}

			if (EncodeMe.charAt(index) == 'B' ) {
	            if (index < lastidx) {
					sb.insert(offset, EncodeMe.charAt(index));
					offset++;
                    continue;
					}
				else {
                    if (EncodeMe.charAt(index-1) != 'M' ) {
                        sb.insert(offset, EncodeMe.charAt(index));
                        offset++;
                        continue;
                        }
					}
				}

			if (EncodeMe.charAt(index) == 'C' ) {
				if (index > 0) {
                    if (EncodeMe.regionMatches(index-1, "SCI", 0, 3)) {
                        index += 1;
						continue;
                        }
                    if (EncodeMe.regionMatches(index-1, "SCE", 0, 3)) {
                        index += 1;
						continue;
                        }
                    if (EncodeMe.regionMatches(index-1, "SCY", 0, 3)) { 
                        index += 1;
						continue;
                        }
                    }
                if (index+1 < lastidx) {
                    if (EncodeMe.regionMatches(index+1, "IA", 0, 2)) {
                        sb.insert(offset, 'X');
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'H') {
                        sb.insert(offset, 'X');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'E') {
                        sb.insert(offset, 'S');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'I') {
                        sb.insert(offset, 'S');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'Y') {
                        sb.insert(offset, 'S');
                        offset++;
                        index += 1;
                        continue;
                        }
					}
                sb.insert(offset, 'K');
                offset++;
                continue;
 				}

			if (EncodeMe.charAt(index) == 'D' ) {
                if (index+1 < lastidx) {
                    if (EncodeMe.regionMatches(index+1, "GE", 0, 2)) {
                        sb.insert(offset, 'J');
                        offset++;
                        index += 2;
                        continue;
                        }
                    if (EncodeMe.regionMatches(index+1, "GI", 0, 2)) {
                        sb.insert(offset, 'J');
                        offset++;
                        index += 2;
                        continue;
                        }
                    if (EncodeMe.regionMatches(index+1, "GY", 0, 2)) {
                        sb.insert(offset, 'J');
                        offset++;
                        index += 2;
                        continue;
                        }
                    }
                sb.insert(offset, 'T');
                offset++;
                continue;
 				}


			if (EncodeMe.charAt(index) == 'E' ) {
	            if (index == 0) {
					sb.insert(offset, EncodeMe.charAt(index));
					offset++;
					}
                continue;
				}

			if (EncodeMe.charAt(index) == 'F' ) {
	            sb.insert(offset, EncodeMe.charAt(index));
				offset++;
                continue;
				}

            if (EncodeMe.charAt(index) == 'G' ) {
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'H') {
                        if (index+1 < lastidx) {
                            if (isVowel(EncodeMe.charAt(index+2))) {
                                sb.insert(offset, 'K');
                                index += 1;
                                offset++;
                                continue;
                                }
                            index += 1;
                            continue;
                            }
                        }
                    if (EncodeMe.charAt(index+1) == 'N') {
                        sb.insert(offset, 'N');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (index > 0) {
                        if (EncodeMe.charAt(index-1) == 'G') {
                            sb.insert(offset, 'K');
                            offset++;
                            continue;
                            }                            
                        }
                    if (EncodeMe.charAt(index+1) == 'I') {
                        sb.insert(offset, 'J');
                        offset++;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'E') {
                        sb.insert(offset, 'J');
                        offset++;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'Y') {
                        sb.insert(offset, 'J');
                        offset++;
                        continue;
                        }
                    if (EncodeMe.regionMatches(index-1, "DGE", 0, 3))
                        continue;
                    if (EncodeMe.regionMatches(index-1, "DGI", 0, 3)) 
                        continue;
                    if (EncodeMe.regionMatches(index-1, "DGY", 0, 3)) 
                        continue;
                    }
                sb.insert(offset, 'K');
                offset++;
                continue;
 				}

            if (EncodeMe.charAt(index) == 'H' ) {
                if (index > 0) {
                    if (isVowel(EncodeMe.charAt(index-1))) {
                        if (index < lastidx) {
                            if (isVowel(EncodeMe.charAt(index+1))) {
                                sb.insert(offset, EncodeMe.charAt(index));
                                offset++;
                                continue;
                                }
                            else continue;
                            }
                        else continue;
                        }
                    }
                else {
                    if (!(isVowel(EncodeMe.charAt(index+1))))
                        continue;
                    }
                sb.insert(offset, EncodeMe.charAt(index));
                offset++;
                continue;
 				}

			if (EncodeMe.charAt(index) == 'I' ) {
	            if (index == 0) {
					sb.insert(offset, EncodeMe.charAt(index));
					offset++;
					}
                continue;
				}

	        if (EncodeMe.charAt(index) == 'J') {
				sb.insert(offset, EncodeMe.charAt(index));
				offset++;
                continue;
				}

            if (EncodeMe.charAt(index) == 'K' ) {
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'N') {
                        sb.insert(offset, 'N');
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
                if (index > 0) {
                    if (EncodeMe.charAt(index-1) == 'C') 
                        continue;
                    }
                sb.insert(offset, EncodeMe.charAt(index));
                offset++;
                continue;
 				}

			if (EncodeMe.charAt(index) == 'L') {
				sb.insert(offset, EncodeMe.charAt(index));
				offset++;
                continue;
		        }

			if (EncodeMe.charAt(index) == 'M') {
				sb.insert(offset, EncodeMe.charAt(index));
				offset++;
                continue;
				}

			if (EncodeMe.charAt(index) == 'N') {
				sb.insert(offset, EncodeMe.charAt(index));
				offset++;
                continue;
				}

			if (EncodeMe.charAt(index) == 'O' ) {
	            if (index == 0) {
					sb.insert(offset, EncodeMe.charAt(index));
					offset++;
					}
                continue;
				}

			if (EncodeMe.charAt(index) == 'P' ) {
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'H') {
                        sb.insert(offset, 'F');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'N') {
                        sb.insert(offset, 'N');
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
                sb.insert(offset, EncodeMe.charAt(index));
                offset++;
                continue;
 				}

			if (EncodeMe.charAt(index) == 'Q') {
				sb.insert(offset, 'K');
				offset++;
                continue;
				}

			if (EncodeMe.charAt(index) == 'R') {
				sb.insert(offset, EncodeMe.charAt(index));
				offset++;
                continue;
				}


			if (EncodeMe.charAt(index) == 'S' ) {
                if (index+2 < lastidx) {
                    if (EncodeMe.regionMatches(index, "SCHE", 0, 4)) {
                        sb.append("SK");
                        offset += 2;
                        index += 2;
                          System.out.print (" Encoded String = "+ sb);
                          System.out.print(" \n");
                        continue;
                        }
                    if (EncodeMe.regionMatches(index, "SCHI", 0, 4)) {
                        sb.append("SK");
                        offset += 2;
                        index += 2;
                          System.out.print (" Encoded String = "+ sb);
                          System.out.print(" \n");
                        continue;
                        }
                    if (EncodeMe.regionMatches(index, "SCHO", 0, 4)) {
                        sb.append("SK");
                        offset += 2;
                        index += 2;
                          System.out.print (" Encoded String = "+ sb);
                          System.out.print(" \n");
                        continue;
                        }
                    }
                if (index+1 < lastidx) {
                    if (EncodeMe.regionMatches(index, "SCH", 0, 3)) {
                        sb.insert(offset, 'X');
                        offset++;
                        index += 2;
                        continue;
                        }
                    if (EncodeMe.regionMatches(index+1, "IA", 0, 2)) {
                        sb.insert(offset, 'X');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (EncodeMe.regionMatches(index+1, "IO", 0, 2)) {
                        sb.insert(offset, 'X');
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'H') {
                        sb.insert(offset, 'X');
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
                sb.insert(offset, EncodeMe.charAt(index));
                offset++;
                continue;
				}

			if (EncodeMe.charAt(index) == 'T' ) {
                if (index > 0) {
                    if (index+1 < lastidx) {
                        if (EncodeMe.regionMatches(index+1, "IA", 0, 2)) {
                            sb.insert(offset, 'X');
                            offset++;
                            index += 1;
                            continue;
                            }
                        if (EncodeMe.regionMatches(index+1, "IO", 0, 2)) {
                            sb.insert(offset, 'X');
                            offset++;
                            index += 1;
                            continue;
                            }
                        }
                    }
                if (index+1 < lastidx) {
                    if (EncodeMe.regionMatches(index+1, "CH", 0, 2)) {
                        continue;
                        }
                    }
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'H') {
                        sb.insert(offset, '0');
                        offset++;
                        index += 1;
                        continue;
                        }
                    }
                sb.insert(offset, EncodeMe.charAt(index));
                offset++;
                continue;
 				}

			if (EncodeMe.charAt(index) == 'U' ) {
	            if (index == 0) {
					sb.insert(offset, EncodeMe.charAt(index));
					offset++;
					}
                continue;
				}

			if (EncodeMe.charAt(index) == 'V') {
				sb.insert(offset, 'F');
				offset++;
                continue;
				}

            if (EncodeMe.charAt(index) == 'W' ) {
                if (index < lastidx) {
                    if (EncodeMe.charAt(index+1) == 'R') {
                        sb.insert(offset, 'R');
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (EncodeMe.charAt(index+1) == 'H') {
                        sb.insert(offset, EncodeMe.charAt(index));
                        offset++;
                        index += 1;
                        continue;
                        }
                    if (isVowel(EncodeMe.charAt(index+1))) {
                        sb.insert(offset, EncodeMe.charAt(index));
                        offset++;
                        continue;
                        }
                    }
                continue;
 				}

            if (EncodeMe.charAt(index) == 'X' ) {
	            if (index == 0) {
                    sb.insert(offset, 'S');
					offset++;
                    continue;
					}
                sb.append("KS");
                offset += 2;
                continue;
				}

            if (EncodeMe.charAt(index) == 'Y' ) {
                if (index < lastidx) {
                    if (isVowel(EncodeMe.charAt(index+1))) {
                        sb.insert(offset, EncodeMe.charAt(index));
                        offset++;
                        continue;
                        }
                    }
                continue;
 				}

			if (EncodeMe.charAt(index) == 'Z') {
				sb.insert(offset, 'S');
				offset++;
                continue;
				}
			}
		return sb;
        }
}

/********************************************************************/
/*					Metaphone Encoder								*/
/********************************************************************/


class MetaphoneEncoder    {

    PhoneticString    phonString;
	boolean	getMore = true;

    void encodeString () {
		while (getMore) {
			phonString = new PhoneticString(getStringInput());
            showMetaphoneValue(phonString.encodedString());
		}
    }  

    String getStringInput () {
        String              stringToEncode;
        DataInputStream     stream = new DataInputStream(System.in);

        System.out.println("Enter string to phonetically encode:");
        try {
            stringToEncode = stream.readLine();
        } catch (java.io.IOException ex)	{
            stringToEncode = "Error Getting Input";
	    	getMore = false;
		}
    return stringToEncode;
    }

    void showMetaphoneValue (StringBuffer MetaphonString)
            {
                                    				// print encoded string
          System.out.print (" Metaphone value is = "+ MetaphonString);
          System.out.print(" \n");
            }

    public static void main(String argv[]) 
        {
        MetaphoneEncoder mf = new MetaphoneEncoder();
        mf.encodeString();
        }
}

