[DBP] XML parser - GameCreators Forum

Author

Message

Phaelax

DBPro Master

22

Years of Service

User Offline

Joined: 16th Apr 2003

Location: Metropia

Posted: 17th Dec 2009 04:37 Edited at: 17th Dec 2009 04:40

Link

Little project I started tonight, still a work in progress obviously. This little demo will parse the xml tags then display each tag name along with its parent's name. My next step will probably be to retrieve the inner content and after that the attributes.

`tags which have both matching open and close tags
#CONSTANT NODE_TYPE_CONTAINER = 1
`a single tag which has no closing tag (empty element tag)
#CONSTANT NODE_TYPE_EMPTY = 2

Type Element
	tagName as string
	parentElementId as integer
	attributes as string
	content as string
EndType

dim tags() as Element
dim parseStack() as integer

xmlFile$ = "c:\test.xml"
xmlFileNo = 1

`open to read xmlFileNo, xmlFile$

restore XML_SAMPLE

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Read each line of the XML file until it reaches the end
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
for k = 1 to 6
`while file end(xmlFileNo) = 0
	`read string xmlFileNo, L$
	read L$
	
	tagName$ = ""	
	matchOpenBracket = 0
	tagType = 0
	strLength = len(L$)

for i = 1 to strLength
	
		c$ = mid$(L$, i)
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ open backet found for new tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "<"
			matchOpenBracket = i
			tagType = 0
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ forward slash can either be part of a closing container tag,
		` \\ or closing an empty tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "/"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If part of a closing tag, the slash will be prefixed 
			` \\ by the bracket (greater-than sign)
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "<"
				tagType = 1
			endif
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Closing bracket for a tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\		
		if c$ = ">"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ if character before closing bracket was a slash, 
			` \\ then this bracket closed off an empty tag
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "/"
				tagType = 2
			else
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ "<? ?>" is part of the XML declaration
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if oldChar$ = "?"
					tagType = 2
				else
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` Normal close bracket, standard container element
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				endif
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If we closed off (completed) the opening tag's bracket,
			` \\ then it's open as the current container. Add this tag
			` \\ to the container stack for tracking the hierarchy and 
			` \\ store a new tag element in the array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 0
				e as Element
				e.tagName = substr$(L$,matchOpenBracket+1,i-1)
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ A parent ID of -1 means it is the root node; no parents
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
				endif
				array insert at bottom tags()
				tags() = e
			
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ Add the index of the last tag element added to the tags
				` \\ array to the stack. This keeps track of what container
				` \\ we're in.
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				add to stack parseStack()
				parseStack() = array count(tags())
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Closing tag was found, remove last container from stack
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 1
				remove from stack parseStack()
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ This was an empty tag element. As they are not containers
			` \\ nothing is added to the stack and nothing needs removed.
			` \\ Create a new element and add it to the tag array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 2
				e as Element
				e.tagName = substr$(L$,matchOpenBracket+1,i-2)
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
				endif
				array insert at bottom tags()
				tags() = e
			endif
			
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Start the whole process over, the block segment has been closed
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			matchOpenBracket = 0
		endif
	
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Helps keep track of previous characters when checking for
		` \\ for slashes, which are used to determine the type of tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		oldChar$ = c$

next i
`endwhile
next k

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Close the file, we are done with it
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
`close file xmlFileNo

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ 
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

for i = 1 to array count(tags())
	parent$ = "root"
	if tags(i).parentElementId > -1 then parent$ = tags(tags(i).parentElementId).tagName
	print tags(i).tagName," is a child of ",parent$
next i

wait key
end

function substr$(strSource$, startInc, endInc)
	strSource$ = left$(strSource$, endInc)
	strSource$ = right$(strSource$,endInc-startInc+1)
endfunction strSource$

XML_SAMPLE:
DATA "<?xml version="1.0" encoding='UTF-8'?>"
DATA "<painting>"
DATA "	<img src="madonna.jpg" alt='Foligno Madonna, by Raphael'/>"
DATA "	<caption>This is Raphael's "Foligno" Madonna, painted in"
DATA "	<date>1511</date>-<date>1512</date>.</caption>"
DATA "</painting>"

+ Code Snippet

`tags which have both matching open and close tags
#CONSTANT NODE_TYPE_CONTAINER = 1
`a single tag which has no closing tag (empty element tag)
#CONSTANT NODE_TYPE_EMPTY = 2


Type Element
	tagName as string
	parentElementId as integer
	attributes as string
	content as string
EndType


dim tags() as Element
dim parseStack() as integer



xmlFile$ = "c:\test.xml"
xmlFileNo = 1

`open to read xmlFileNo, xmlFile$

restore XML_SAMPLE


` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Read each line of the XML file until it reaches the end
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
for k = 1 to 6
`while file end(xmlFileNo) = 0
	`read string xmlFileNo, L$
	read L$
	
	tagName$ = ""	
	matchOpenBracket = 0
	tagType = 0
	strLength = len(L$)

	
	for i = 1 to strLength
	
		c$ = mid$(L$, i)
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ open backet found for new tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "<"
			matchOpenBracket = i
			tagType = 0
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ forward slash can either be part of a closing container tag,
		` \\ or closing an empty tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "/"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If part of a closing tag, the slash will be prefixed 
			` \\ by the bracket (greater-than sign)
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "<"
				tagType = 1
			endif
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Closing bracket for a tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\		
		if c$ = ">"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ if character before closing bracket was a slash, 
			` \\ then this bracket closed off an empty tag
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "/"
				tagType = 2
			else
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ "<? ?>" is part of the XML declaration
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if oldChar$ = "?"
					tagType = 2
				else
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` Normal close bracket, standard container element
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				endif
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If we closed off (completed) the opening tag's bracket,
			` \\ then it's open as the current container. Add this tag
			` \\ to the container stack for tracking the hierarchy and 
			` \\ store a new tag element in the array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 0
				e as Element
				e.tagName = substr$(L$,matchOpenBracket+1,i-1)
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ A parent ID of -1 means it is the root node; no parents
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
				endif
				array insert at bottom tags()
				tags() = e
			
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ Add the index of the last tag element added to the tags
				` \\ array to the stack. This keeps track of what container
				` \\ we're in.
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				add to stack parseStack()
				parseStack() = array count(tags())
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Closing tag was found, remove last container from stack
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 1
				remove from stack parseStack()
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ This was an empty tag element. As they are not containers
			` \\ nothing is added to the stack and nothing needs removed.
			` \\ Create a new element and add it to the tag array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 2
				e as Element
				e.tagName = substr$(L$,matchOpenBracket+1,i-2)
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
				endif
				array insert at bottom tags()
				tags() = e
			endif
			
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Start the whole process over, the block segment has been closed
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			matchOpenBracket = 0
		endif
	
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Helps keep track of previous characters when checking for
		` \\ for slashes, which are used to determine the type of tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		oldChar$ = c$

	next i
`endwhile
next k

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Close the file, we are done with it
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
`close file xmlFileNo



` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ 
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

for i = 1 to array count(tags())
	parent$ = "root"
	if tags(i).parentElementId > -1 then parent$ = tags(tags(i).parentElementId).tagName
	print tags(i).tagName," is a child of ",parent$
next i



wait key
end





function substr$(strSource$, startInc, endInc)
	strSource$ = left$(strSource$, endInc)
	strSource$ = right$(strSource$,endInc-startInc+1)
endfunction strSource$


XML_SAMPLE:
DATA "<?xml version="1.0" encoding='UTF-8'?>"
DATA "<painting>"
DATA "	<img src="madonna.jpg" alt='Foligno Madonna, by Raphael'/>"
DATA "	<caption>This is Raphael's "Foligno" Madonna, painted in"
DATA "	<date>1511</date>-<date>1512</date>.</caption>"
DATA "</painting>"

Looking at the sample xml code, would the inner text of the painting tag "This is Raphael's "Foligno" Madonna, painted in"? Or would it include the inner text of all child nodes as well? "This is Raphael's "Foligno" Madonna, painted in 1511-1512."

p.s. Yes I'm aware there's a plugin for all this, but where's the fun in that?

> SELECT * FROM users WHERE clue > 0
> 0 rows returned

Back to top

Profile PM Email Website

Zotoaster

20

Years of Service

User Offline

Joined: 20th Dec 2004

Location: Scotland

Posted: 17th Dec 2009 14:41

Link

Wow, that's some crazy commenting you got going there.

Anyway, I saw "XML Parser" as the title and before I managed to look at the author I instantly thought "Phaelax". Is that weird or what?

Pretty cool though.

"everyone forgets a semi-colon sometimes." - Phaelax

Back to top

Profile PM Email

Phaelax

DBPro Master

22

Years of Service

User Offline

Joined: 16th Apr 2003

Location: Metropia

Posted: 17th Dec 2009 17:55

Link

I just lurk in the background and pop up with something new every now and then, just so I dont feel like a forum troll

> SELECT * FROM users WHERE clue > 0
> 0 rows returned

Back to top

Profile PM Email Website

Phaelax

DBPro Master

22

Years of Service

User Offline

Joined: 16th Apr 2003

Location: Metropia

Posted: 30th Dec 2009 18:26 Edited at: 30th Dec 2009 18:27

Link

Here's an update, now retrieves the inner content from the tags. It's not 100% yet. As the tag's content does include the content from its child tags it's not including the text of the children's children..... if that makes sense. I'll need to build a recursive method most likely to handle this. I didn't touch this for a week when a convenient method for extracting inner text popped in my head. I just now got around to implementing it.

But now that I think about it, I have a better way now. I'll upload that after I get the attributes extracted.

`tags which have both matching open and close tags
#CONSTANT NODE_TYPE_CONTAINER = 1
`a single tag which has no closing tag (empty element tag)
#CONSTANT NODE_TYPE_EMPTY = 2

Type Element
	tagName as string
	parentElementId as integer
	attributes as string
	content as string
	pos as integer
	parentPos as integer
EndType

dim tags() as Element
dim parseStack() as integer

xmlFile$ = "c:\test.xml"
xmlFileNo = 1

`open to read xmlFileNo, xmlFile$

restore XML_SAMPLE

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Read each line of the XML file until it reaches the end
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
for k = 1 to 6
`while file end(xmlFileNo) = 0
	`read string xmlFileNo, L$
	read L$
	
	tagName$ = ""	
	matchOpenBracket = 0
	tagType = 0
	strLength = len(L$)

for i = 1 to strLength
	
		c$ = mid$(L$, i)
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ open backet found for new tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "<"
			matchOpenBracket = i
			tagType = 0
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ forward slash can either be part of a closing container tag,
		` \\ or closing an empty tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "/"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If part of a closing tag, the slash will be prefixed 
			` \\ by the bracket (greater-than sign)
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "<"
				tagType = 1
			endif
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Closing bracket for a tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\		
		if c$ = ">"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ if character before closing bracket was a slash, 
			` \\ then this bracket closed off an empty tag
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "/"
				tagType = 2
			else
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ "<? ?>" is part of the XML declaration
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if oldChar$ = "?"
					tagType = 2
				else
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` Normal close bracket, standard container element
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				endif
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If we closed off (completed) the opening tag's bracket,
			` \\ then it's open as the current container. Add this tag
			` \\ to the container stack for tracking the hierarchy and 
			` \\ store a new tag element in the array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 0
				e as Element
				e.pos = matchOpenBracket
				e.tagName = substr$(L$,matchOpenBracket+1,i-1)
				e.content = ""
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ A parent ID of -1 means it is the root node; no parents
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` \\ The position within the parent tag's content where this
					` \\ tag's data shows up
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					e.parentPos = len(tags(e.parentElementId).content)
				endif
				array insert at bottom tags()
				tags() = e
			
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ Add the index of the last tag element added to the tags
				` \\ array to the stack. This keeps track of what container
				` \\ we're in.
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				add to stack parseStack()
				parseStack() = array count(tags())
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Closing tag was found, remove last container from stack
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 1
				remove from stack parseStack()
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ This was an empty tag element. As they are not containers
			` \\ nothing is added to the stack and nothing needs removed.
			` \\ Create a new element and add it to the tag array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 2
				e as Element
				e.tagName = substr$(L$,matchOpenBracket+1,i-2)
				e.content = ""
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` \\ The position within the parent tag's content where this
					` \\ tag's data shows up
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					e.parentPos = len(tags(e.parentElementId).content)
				endif
				array insert at bottom tags()
				tags() = e
			endif
			
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Start the whole process over, the block segment has been closed
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			matchOpenBracket = 0
		else
		
			if matchOpenBracket = 0
				currentTag = parseStack()
				if currentTag > 0 and currentTag <= array count(tags())
					tags(currentTag).content = tags(currentTag).content + c$
				endif
			endif
		
		endif

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Helps keep track of previous characters when checking for
		` \\ for slashes, which are used to determine the type of tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		oldChar$ = c$

next i
`endwhile
next k

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Close the file, we are done with it
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
`close file xmlFileNo

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ 
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

for i = 1 to array count(tags())
	parent$ = "root"
	if tags(i).parentElementId > -1 then parent$ = tags(tags(i).parentElementId).tagName
	print tags(i).tagName," is a child of ",parent$
	print "    Content: ", getElementContent$(i)
	print
next i

wait key
end

function getElementContent$(elementId)
	content$ = tags(elementId).content
	extendedLength = 0
	for i = 1 to array count(tags())
		if tags(i).parentElementId = elementId
			content$ = insertStr$(content$, tags(i).content, tags(i).parentPos + extendedLength)
			extendedLength = extendedLength + len(tags(i).content)
		endif
	next i
endfunction content$

function substr$(strSource$, startInc, endInc)
	strSource$ = left$(strSource$, endInc)
	strSource$ = right$(strSource$,endInc-startInc+1)
endfunction strSource$

function insertStr$(strSource$, seg$, pos)
	t$ = left$(strSource$, pos)
	strSource$ = t$ + seg$ + right$(strSource$,len(strSource$)-len(t$))
endfunction strSource$

XML_SAMPLE:
DATA "<?xml version="1.0" encoding='UTF-8'?>"
DATA "<painting>"
DATA "	<img src="madonna.jpg" alt='Foligno Madonna, by Raphael'/>"
DATA "	<caption>This is Raphael's "Foligno" Madonna, painted in "
DATA "	<date>1511</date>-<date>1512</date>.</caption>"
DATA "</painting>"

+ Code Snippet

`tags which have both matching open and close tags
#CONSTANT NODE_TYPE_CONTAINER = 1
`a single tag which has no closing tag (empty element tag)
#CONSTANT NODE_TYPE_EMPTY = 2


Type Element
	tagName as string
	parentElementId as integer
	attributes as string
	content as string
	pos as integer
	parentPos as integer
EndType


dim tags() as Element
dim parseStack() as integer



xmlFile$ = "c:\test.xml"
xmlFileNo = 1

`open to read xmlFileNo, xmlFile$

restore XML_SAMPLE


` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Read each line of the XML file until it reaches the end
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
for k = 1 to 6
`while file end(xmlFileNo) = 0
	`read string xmlFileNo, L$
	read L$
	
	tagName$ = ""	
	matchOpenBracket = 0
	tagType = 0
	strLength = len(L$)

	
	for i = 1 to strLength
	
		c$ = mid$(L$, i)
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ open backet found for new tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "<"
			matchOpenBracket = i
			tagType = 0
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ forward slash can either be part of a closing container tag,
		` \\ or closing an empty tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		if c$ = "/"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If part of a closing tag, the slash will be prefixed 
			` \\ by the bracket (greater-than sign)
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "<"
				tagType = 1
			endif
		endif
		
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Closing bracket for a tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\		
		if c$ = ">"
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ if character before closing bracket was a slash, 
			` \\ then this bracket closed off an empty tag
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if oldChar$ = "/"
				tagType = 2
			else
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ "<? ?>" is part of the XML declaration
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if oldChar$ = "?"
					tagType = 2
				else
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` Normal close bracket, standard container element
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				endif
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ If we closed off (completed) the opening tag's bracket,
			` \\ then it's open as the current container. Add this tag
			` \\ to the container stack for tracking the hierarchy and 
			` \\ store a new tag element in the array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 0
				e as Element
				e.pos = matchOpenBracket
				e.tagName = substr$(L$,matchOpenBracket+1,i-1)
				e.content = ""
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ A parent ID of -1 means it is the root node; no parents
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` \\ The position within the parent tag's content where this
					` \\ tag's data shows up
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					e.parentPos = len(tags(e.parentElementId).content)
				endif
				array insert at bottom tags()
				tags() = e
			
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				` \\ Add the index of the last tag element added to the tags
				` \\ array to the stack. This keeps track of what container
				` \\ we're in.
				` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
				add to stack parseStack()
				parseStack() = array count(tags())
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Closing tag was found, remove last container from stack
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 1
				remove from stack parseStack()
			endif
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ This was an empty tag element. As they are not containers
			` \\ nothing is added to the stack and nothing needs removed.
			` \\ Create a new element and add it to the tag array
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			if tagType = 2
				e as Element
				e.tagName = substr$(L$,matchOpenBracket+1,i-2)
				e.content = ""
				if array count(parseStack()) < 0
					e.parentElementId = -1
				else
					e.parentElementId = parseStack()
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					` \\ The position within the parent tag's content where this
					` \\ tag's data shows up
					` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
					e.parentPos = len(tags(e.parentElementId).content)
				endif
				array insert at bottom tags()
				tags() = e
			endif
			
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			` \\ Start the whole process over, the block segment has been closed
			` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
			matchOpenBracket = 0
		else
		
			if matchOpenBracket = 0
				currentTag = parseStack()
				if currentTag > 0 and currentTag <= array count(tags())
					tags(currentTag).content = tags(currentTag).content + c$
				endif
			endif
		
		endif
		

	
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		` \\ Helps keep track of previous characters when checking for
		` \\ for slashes, which are used to determine the type of tag
		` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
		oldChar$ = c$

	next i
`endwhile
next k

` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ Close the file, we are done with it
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
`close file xmlFileNo



` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
` \\ 
` \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\

for i = 1 to array count(tags())
	parent$ = "root"
	if tags(i).parentElementId > -1 then parent$ = tags(tags(i).parentElementId).tagName
	print tags(i).tagName," is a child of ",parent$
	print "    Content: ", getElementContent$(i)
	print
next i


wait key
end





function getElementContent$(elementId)
	content$ = tags(elementId).content
	extendedLength = 0
	for i = 1 to array count(tags())
		if tags(i).parentElementId = elementId
			content$ = insertStr$(content$, tags(i).content, tags(i).parentPos + extendedLength)
			extendedLength = extendedLength + len(tags(i).content)
		endif
	next i
endfunction content$


function substr$(strSource$, startInc, endInc)
	strSource$ = left$(strSource$, endInc)
	strSource$ = right$(strSource$,endInc-startInc+1)
endfunction strSource$


function insertStr$(strSource$, seg$, pos)
	t$ = left$(strSource$, pos)
	strSource$ = t$ + seg$ + right$(strSource$,len(strSource$)-len(t$))
endfunction strSource$



XML_SAMPLE:
DATA "<?xml version="1.0" encoding='UTF-8'?>"
DATA "<painting>"
DATA "	<img src="madonna.jpg" alt='Foligno Madonna, by Raphael'/>"
DATA "	<caption>This is Raphael's "Foligno" Madonna, painted in "
DATA "	<date>1511</date>-<date>1512</date>.</caption>"
DATA "</painting>"

> SELECT * FROM users WHERE clue > 0
> 0 rows returned

Back to top

Profile PM Email Website

jeffhuys

19

Years of Service

User Offline

Joined: 24th May 2006

Location: No cheesy line here.

Posted: 31st Dec 2009 23:15

Link

Impressive.
I like to program these kind of things, when you're done you feel so good, it's like an oiled machine

.

You're the

'th to view this signature!

Back to top

Profile PM Email

Sorry your browser is not supported!

Code Snippets / [DBP] XML parser