XML Programming in Python
by Sean McGrath

Listing One
<!-- A snippet of an HTML document containing "car for sale" information -->
<h1>Toyota</h1>
<li>
<ul>Price:10000 Dollars
<ul>Condition:Good
<ul>Color:Red
</li>

Listing Two
<!-- A snippet of an XML document containing "cars for sale" information -->
<CarsForSale>
  <Car Price = "10000" Units = "Dollars">
    <Maker>Toyota</Maker>
    <Condition Type = "Good"/>
    <Color>Red</Color>
  </Car>
  <Car price = "20000" units = "Irish Punts">
    <Maker>Ford</Maker>
    <Condition Type = "Good"/>
    <Color>White</Color>
  </Car>
</CarsForSale>

Listing Three
<!-- This is a snippet of an XML Document Type Definition (DTD) -->
<!-- Define an element type CarsForSale. Contains one or more Car elements-->
<!ELEMENT CarsForSale (Car)+>

<!-- A Car consists of a Maker element , an optional Condition element and
a color element -->
<!ELEMENT Car (Maker,Condition?,Color)>

<!-- A Car has two associated attributes - price and units. They contain 
character data and both are required - i.e. a document must supply them for
each Car element -->
<!ATTLIST Car
   Price CDATA #REQUIRED
    Units CDATA #REQUIRED>

<!-- Maker and Color elements consist of text -->
<!ELEMENT Maker (#PCDATA)>
<!ELEMENT Color (#PCDATA)>

<!-- Condition element does not have any content it is an EMPTY element -->
<!-- It has a "Type" attribute which can be one of Excellent, Good or Bad -->
<!ELEMENT Condition EMPTY>
<!ATTLIST Condition
    Type (Excellent|Good|Bad) #REQUIRED>

Listing Four
class foo:          # Declare a class foo
    def bar(self):          # Declare a method bar
        self.baz = 1    # set the baz object variable to 1
f = foo()           # Declare an instance of the class foo
class foo1(foo):        # Declare a class foo1 derived from foo
    def bar1(self):         # Declare a method bar1
        foo.bar(self)   # Call the bar method of superclass
        self.baz1 = 2   # Set the baz1 object variable to 2
f1 = foo1()         # Declare an instance of the class foo1

Listing Five
>>>f1 = foo1()       # Declare f1
print f1.__dict__       # print instance variables - Empty
{}
>>>f1.bar()         # Call the bar method - baz variable created
>>>print f1.__dict__
{'baz': 1}
>>>f1.bar1()     # Call bar1 method. Calls foo.bar thus baz1 variable created
>>>print f1.__dict__
{'baz': 1, 'baz1': 2}

Listing Six
>>>x = "Hello World"
>>>print x[-4]   # print 4th character from the end
o
>>>print x[2:4]  # print substring starting at offset 2 ending before offset 4
ll
>>>print x[:-1]  # All except last character
Hello Worl

>>>x = ["Hello World", 42, ['foo','bar']]
len(x)
3
>>>x[-1]        # Slicing works with lists too
['Hello World',42]
>>>x = {"Hello":"World", "World":[1,2,[2.1,2.2]]} # An associative array
>>>y = x["World"]
>>>print y
[1,2,[2.1,2.2]]
>>>y.reverse()      # Reverse list y in situ
>>>print y
[[2.1, 2.2], 2, 1]

Listing Seven
import sys,types
>>>x = [1,2,3,4]        # x is a flat list of 4 numbers_
>>>y = map (lambda e:e*e,x)     # y contains the squares of each element of x
>>>print y
[1,4,9,16]

>>>x = [1,"Hello",[2,3]]    # x is a more complex list
>>>y = filter (lambda e:type(e)==type(''),x) # y is x, filtered to string 
                                             #               elements only
>>>print y
['Hello']

Listing Eight
C>type foo.py
class foo:
        "This is some documentation on foo"
        def __init__(self):
                print "foo constructor called"
                self.x = 1
        def __del__(self):
                # foo class destructor
                print "foo destructor called"
                pass                # Do nothing
        def __repr__(self):         # Return a string representation
                return "A foo object"

f = foo()   # Causes the foo constructor to be called
print f     # Causes the __repr__ method to be called
del f       # Causes the destructor to be called

C>python foo.py
foo constructor called
A foo object
foo destructor called
Listing Nine
if x == y:
    if y == z:
        print y
else:
    print x # Associated with outermost if by virtue of indententation

Listing Ten
<!DOCTYPE CarsForSale [
<!ELEMENT CarsForSale (Car)+>
<!ELEMENT Car (Maker,Condition?,Color)>
<!ATTLIST Car
    Price NUMBER #REQUIRED
    Units CDATA "DOLLARS">
<!ELEMENT Maker (#PCDATA)>
<!ELEMENT Condition EMPTY>
   Type (Excellent|Good|Bad) #REQUIRED>
<!ELEMENT Condition (#PCDATA)>
<!ELEMENT Color (#PCDATA)>
]>
<CarsForSale>
<Car Price = "10000" Units = "Dollars">
<Maker>Toyota</Maker>
<Condition Type = "Good"/>
<Color>Red</Color>
</Car>
</CarsForSale>


Listing Eleven
(CarsForSale
APrice CDATA 10000
AUnits CDATA Dollars
(Car
(Maker
-Toyota
)Maker
AType TOKEN Good
e
(Condition
)Condition
(Color
-Red
)Color
)Car
)CarsForSale
C


Listing Twelve
# Virtual base class for nodes in an XML tree (XMLTree)
class XMLNode:
    # Constructor
    def __init__(self):
        # Each node has four references to its surrounding nodes
        self.Top = self.Bottom = self.Left = self.Right = None

# An XMLElementNode represents an XML element in an XML Tree
class XMLElementNode(XMLNode):
    def __init__(self,gi,EmptyElement=0):
        # Call superclass constructor
                XMLNode.__init__(self)
        self.gi = gi             # gi = Element (tag) name
        self.attributes = {}         # Empty associative array
          self.EmptyElement = 
                  EmptyElement # Boolean. 1 for elements like "<foo/>"
    def AddAttribute (self,name,value):
        self.attributes [name] = value
    def __repr__(self):
        # Return string representation. Recursively walks children/siblings
       res = "<" + self.gi         # Start of start-tag
        for (name,value) in self.attributes.items(): # Attributes
            res = res + ' %s = "%s"' % (name,value)
                if self.EmptyElement == 1:      # End of start-tag
                res = res + "/>"
                else:
                res = res + ">"
                if self.Bottom:
                        res = res + `self.Bottom`   # traverse children
                if self.EmptyElement == 0:      # End-tag if required
                        res = res + "</%s>" % self.gi
                if self.Right:              # traverse right siblings
                        res = res + `self.Right`
                return res
# An XMLDataNode represents data content in an XML Tree
class XMLDataNode(XMLNode):
    def __init__(self,datastr):
                XMLNode.__init__(self)
        self.datastr = datastr
    def __repr__(self):
        return self.datastr
# An XMLTree contains a root which is a reference to first node in tree
# It maintains a current position in tree in Position instance variable
class XMLTree:
    def __init__(self):
        # Tree starts out with a Dummy node
        self.root = XMLElementNode("?ROOT?")
        self.Position = self.root
    def __repr__(self):
        return `self.root.Bottom`
    # Add the specified node below current position
        def AddBelow(self,Node):
        self.Position.Bottom = Node
        Node.Top = self.Position
    # Add the specified node to the right of current position
    def AddRight(self,Node):
        self.Position.Right = Node
        Node.Top = self.Position.Top
        Node.Left = self.Position                   
    # Move current position up to parent node
    def MoveUp(self):
        self.Position = self.Position.Top
    def MoveRight(self):
        self.Position = self.Position.Right
    def MoveBelow(self):
        self.Position = self.Position.Bottom
    def MoveToRoot(self):
        self.Position = self.root   
    # Predicate - return true if positioned at an XMLDataNode
        def AtData(self):
                if self.Position.__class__.__name__ == "XMLDataNode":
                        return 1
                return None
    # Predicate - return true if positioned at an XMLElementNode
    # If ElementName specified, ensure positioned at that element type
        def AtElement(self,ElementName=None):
                if self.Position.__class__.__name__ != "XMLElementNode":
                        return 0
                if ElementName == None:
                        return 1
                return self.Position.gi == ElementName
    # Utility function to navigate to next position in Tree
    # Traversal is depth first, left to right
    def MoveNext(self):
        if self.Position.Bottom:
            self.MoveBelow()
            return 1
        while self.Position.Top:
            if self.Position.Right:
                self.MoveRight()
                return 1
            else:
                self.MoveUp()
        return 0
    # Return data content of current node
        def GetData(self):
        if self.Position.__class__.__name__ == "XMLDataNode":
                        return self.Position.datastr
          sys.stderr.write ("GetData - Current Position is not a Data node")
          return None
    # Add an attribute to the current node
    def AddAttribute(self,name,value):
        self.Position.AddAttribute(name,value)
    # Override of Python's subscripting for XMLTree objects.
    # Allows use of for loop for "linear" iteration of the tree
    def __getitem__(self, key):
        if key == 0:
            self.MoveToRoot()
            return self
        else:
            if self.MoveNext():
               return self
            else:
                raise IndexError
if __name__ == "__main__":
        import string
    x = XMLTree()
    x.AddBelow (XMLElementNode("Car"))
    x.MoveBelow()
    x.AddAttribute("Price","10000")
    x.AddAttribute("Units","Dollars")
    x.AddBelow (XMLElementNode("Maker"))
    x.MoveBelow()
    x.AddBelow (XMLDataNode("Toyota"))
    x.AddRight (XMLElementNode("Condition",1))
    x.MoveRight()
    x.AddAttribute("Type","Good")
    x.AddRight (XMLElementNode("Color"))
    x.MoveRight()
    x.AddBelow (XMLDataNode("Red"))
    x.MoveToRoot()
#       Print the entire tree
    print x
        
#       Print only the cars worth > 500 Dollars
        for n in x:
                if n.AtElement("Car"):
                        Price = n.Position.attributes["Price"]
                        Units = n.Position.attributes["Units"]
                        if Price > 500 and Units == "Dollars":
                                print n.Position


Listing Thirteen
from XMLTree import *
x = XMLTree()
x.AddBelow (XMLElementNode("Car"))
x.MoveBelow()
x.AddAttribute("Price","10000")
x.AddAttribute("Units","Dollars")
x.AddBelow (XMLElementNode("Maker"))
x.MoveBelow()
x.AddBelow (XMLDataNode("Toyota"))
x.AddRight (XMLElementNode("Condition",1))
x.MoveRight()
x.AddAttribute("Type","Good")
x.AddRight (XMLElementNode("Color"))
x.MoveRight()
x.AddBelow (XMLDataNode("Red"))
x.MoveToRoot()


Listing Fourteen
# In class XMLTree
def __repr__(self):
    return `self.root.Bottom`
# In class XMLDataNode
def __repr__(self):
    return self.datastr
#In class XMLElementNode
def __repr__(self):
        res = "<" + self.gi             # Start-tag
        for (name,value) in self.attributes.items():    # Attributes
                res = res + ' %s = "%s"' % (name,value)
        if self.EmptyElement == 1:          # End-tag if required
                res = res + "/>"
        else:
                res = res + ">"
        if self.Bottom:                 # Children if any
                res = res + `self.Bottom`
        if self.EmptyElement == 0:          # End-tag if required
                res = res + "</%s>" % self.gi
        if self.Right:                  # Right siblings if any
                res = res + `self.Right`
        return res


Listing Fifteen
<Car Units = "Dollars" Price = "10000">
 <Maker>
  Toyota
 </Maker>
 <Condition Type = "Good"/>
 <Color>
  Red
 </Color>
</Car>


Listing Sixteen
#   Assuming x is an XMLTree object
#       Print only the cars worth > 500 Dollars
        for n in x:
                if n.AtElement("Car"):
                   Price = n.Position.attributes["Price"]
                   Units = n.Position.attributes["Units"]
                   if Price > 500 and Units == "Dollars":
                      print n.Position # Output sub-tree as an XML fragment

8


