Improving Digital Library Search Precision Using Google Desktop
by Lawrence Reeve


Listing One: 

<record> 
   <header> 
      <identifier>oai:CiteSeerPSU:l</identifier> 
      <datestamp>1993-08-11</datestamp> 
      <setSpec>CiteSeerPSUset</setSpec> 
   </header> 
   <metadata> 
      <oai_citeseer:oai_citeseer 
         xmlns:oai_citeseer="http://copper.ist.psu.edu/oai/oai_citeseer/" 
         xmlns:dc ="http://purl.org/dc/elements/l.l/. 
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
         xsi:schemaLocation="http://copper.ist.psu.edu/oai/oai_citeseer/ 
         http://copper.ist.psu.edu/oai/oai_citeseer.xsd 
         <dc:title>36 Problems for Semantic InterpretatiOn</dc7title>
         <oai_citeseer:author name="Gabriele Scheler"> 
            <address>80290 Munchen I Germany</address> 
            <affiliation>Institut fur Informatik; 
                              Technische Universitat Munchen</affiliation> 
         </oai_citeseer:author> 
         <dc:subject>Gabriele Scheler 36 Problems 
                                  for Semantic Interpretation</dc:subject> 
         <dc:description>This paper presents...</dc:description> 
         <dc:contributor>The Pennsylvania State University CiteSeer 
                                                 Archives</dc:contributor>
         <dc:publisher>unknown</dc:publisher> 
         <dc:date>1993-08-11</dc:date> 
         <dc:format>ps</dc:format> 
         <dc:identifier>http://citeseer.ist.psu.edu/l.html</dc:identifier>
         <dc:source>ftp://flop.informatik.tu-muenchen.de/
                                      pub/fki/fki-179-93.ps.gz</dc:source> 
         <dc:language>en</dc:language> / 
         <dc:rights>unrestricted</dc:rights> 
         </oai_citeseer:oai_citeseer> 
   </metadata> 
</record> 


Listing Two: 


StreamReader xmlStream = new StreamReader(filename);
XmlDocument  xmlDoc = new XmlDocument();
xmlDoc.LoadXml("<records>" + xmlStream.ReadToEnd() + "</records>");


Listing Three: 

XmlNamespaceManager nsmgr = new XmlNamespaceManager(xmlDoc.NameTable);
nsmgr.AddNamespace("oai_citeseer",  "http://copper.ist.psu.edu/
                                                    oai/oai_citeseer/");
nsmgr.AddNamespace("dc",            "http://purl.org/dc/elements/1.1/");


Listing Four: 

using System;
using System.Security.Cryptography;
using System.Text;

namespace gdsCiteSeer
{
    public class FieldBuilder
    {
        public enum EFieldNames 
        {
            FirstName,
            LastName,
            FullName,
            PubMonth,
            PubYear
        }
        public static string EncodeField(string field)
        {
            if (field == null || field.Length == 0)
                return String.Empty;
            StringBuilder encodedField = new StringBuilder();
            byte[] dataToEncode = Encoding.UTF8.GetBytes(field);
            SHA1 hasher = new SHA1CryptoServiceProvider(); 
            //MD5  hasher = new MD5CryptoServiceProvider();
            byte [] hashResult = hasher.ComputeHash(dataToEncode);

            // Note: BitConverter.ToString separates hex values with dashes
            for (int idx=0; idx < hashResult.Length; idx++)
                encodedField.Append(hashResult[idx].ToString("X2"));
            return encodedField.ToString();
        }
        public static string BuildField(
            EFieldNames fieldName, 
            string fieldValue)
        {
            string fieldValueReturned = null;
            switch(fieldName)
            {
                case EFieldNames.FirstName:
                    fieldValueReturned = fieldValue.Trim().ToLower();
                    return "first:" + fieldValueReturned;

                case EFieldNames.LastName:
                    fieldValueReturned = fieldValue.Trim().ToLower();
                    return "last:" + fieldValueReturned;
                case EFieldNames.FullName:
                    fieldValueReturned = fieldValue.Trim().ToLower();
                    return "full:" + fieldValueReturned;
                case EFieldNames.PubMonth:
                    fieldValueReturned = fieldValue.Trim();
                    if (fieldValueReturned.Length > 2)
                        fieldValueReturned = 
                                    fieldValueReturned.Substring(0, 2);
                    else if (fieldValueReturned.Length < 2) 
                        fieldValueReturned = 
                                    fieldValueReturned.PadLeft(2, '0');
                    return "pubmonth:" + fieldValueReturned;
                case EFieldNames.PubYear:
                    fieldValueReturned = fieldValue.Trim();
                    if (fieldValueReturned.Length > 4)
                        fieldValueReturned = 
                                    fieldValueReturned.Substring(0, 4);
                    else if (fieldValueReturned.Length < 4) 
                        fieldValueReturned = 
                                    fieldValueReturned.PadLeft(4, '0');
                    return "pubyear:" + fieldValueReturned;
            }
            return String.Empty;
        }
    }
}


3


