#xmlFile#:
Pseudocode for xml2struct:
Premise: XML is a collection of zero or more "branch" tags
that contain one or more "leaf" tags, which may contain data.
find a valid tag
find the next end tag
is the next tag the end tag for this tag?
--yes: the data you want is between the tags. (It's a "leaf" tag.)
--no: push the current tag onto the stack and find the next tag. (It's a "branch" tag.)
The stack is implemented as the array "aStructPtr".
Note that if you have tags without end tags, then
(a) this function will fail, and
(b) that's not well-formed XML.
Version History:
v1.00 01/10/2002 dsb:
- initial release
v1.00a 01/10/2002 dsb:
- fix issue with identically-named tags nested within each other -- Thanks, Iver!
v1.10 01/18/2002 dsb:
- a couple of fixes for tags with attributes.
- if a tag is self-ending, a structure of its attributes will be created.
To Do:
- find a way to elegantly represent tags that have both attributes and a value.
-- put all values in a .value key, and all attributes in a .attributes struct? Hmm...
License:
This code is free for use/abuse to any person or entity with
a valid ColdFusion license, so long as you agree to hold neither the author
nor Macromedia, Inc. liable for any damages resulting from its use and/or failure.
That being said: though you are not required to, please leave this notice intact.
--->
function xmlDecode(xml) {
var semipos=0;
var escString="";
var escChar="";
var ascii=0;
var chrPos = find("#",xml,1);
// replace usual suspects
// (If your dtd defines other entity references, add them here.)
// (note that & is done last-- see the end of the function.)
xml = replace(xml, "<", "<", "ALL");
xml = replace(xml, ">", ">", "ALL");
xml = replace(xml, """, """", "ALL");
xml = replace(xml, "'", "'", "ALL");
// replace extended chars in either "" (hex) or "" (decimal) format
while(chrPos) {
semiPos=find(";",xml,chrPos);
if(semiPos and (semiPos-chrPos lt 6)) {
escString=mid(xml,chrPos,semiPos-chrPos+1);
if(mid(escString,3,1) is "x") {
char1 = ucase(mid(escString,4,1));
char2 = ucase(mid(escString,5,1));
if("01234566789ABCDEF" contains char1 and "01234566789ABCDEF" contains char2) {
if("ABCDEF" contains char1) {
ascii=(asc(char1)-55)*16;
} else {
ascii=char1*16;
}
if("ABCDEF" contains char2) {
ascii=ascii+(asc(char2)-55);
} else {
ascii=ascii+char2;
}
xml=removeChars(xml, chrPos, len(escString));
if(chrPos gt len(xml)) {
xml = xml & chr(ascii);
} else {
xml = insert(chr(ascii), xml, chrPos);
}
}
} else {
ascii=mid(escString,3,len(escString)-3);
if(isNumeric(ascii)) {
xml=removeChars(xml, chrPos, len(escString));
if(chrPos gt len(xml)) {
xml = xml & chr(ascii);
} else {
xml = insert(chr(ascii), xml, chrPos);
}
}
}
}
chrPos=find("#", xml, chrPos+1);
}
// always replace amersand last:
xml = replace(xml, "&", "&", "ALL");
// note that there might be problems if someone encodes ampersand as & then follows it with "amp;".
// but if you're doing that, you get what you deserve ;-P
return xml;
}
function decodeXMLattributes(tagAttributesString) {
var attributeName = "";
var equalsPos = 0;
var delim = "";
var endPos = 0;
var tagAttributes = structNew();
// returns a struct of attributes, as you'd expect
if( not len(tagAttributesString) ) {
// short circuit on empty string
return tagAttributes;
} else {
equalsPos = find("=", tagAttributesString);
while(equalsPos gt 1) {
// stuff before equals is attribute name
attributeName = trim(left(tagAttributesString,equalsPos-1));
tagAttributesString = trim(mid(tagAttributesString, equalsPos+1, len(tagAttributesString)));
// first nonspace after equals should be single or doublequote.
delim = left(tagAttributesString,1);
endPos = find(delim, tagAttributesString, 2);
if("""'" contains delim and endpos) {
// decode into attribute
tagAttributes[attributeName] = xmlDecode(mid(tagAttributesString,2,endPos-2));
tagAttributesString = mid(tagAttributesString, endPos+1, len(tagAttributesString));
} else {
// strictly speaking, we're working with bad XML here. But we'll try, anyway.
tagAttributes[attributeName] = getToken(tagAttributesString,1);
tagAttributesString = mid(tagAttributesString, len(tagAttributes[attributeName])+1, len(tagAttributesString));
}
equalsPos = find("=", tagAttributesString);
}
return tagAttributes;
}
}
function xml2struct(xml) {
// This function will "read" an xml file, and return a structure that
// represents the xml data structure.
// It does not seek begin-end tag pairs, but rather assumes ["requires"]
// that all begin tags have matching end tags (or are self-ending.)
// In other words, it only works well for well-formed xml. What did you expect?
// the structure we'll build:
var xmlStruct=structNew();
// stack to keep track of what depth we're at in nested tags:
var aStructPtr=arrayNew(1);
// string positions:
var xmlStartTagPos=findNoCase("<",xml,1);
var xmlStartTagEndPos=0;
// current start tag:
var xmlStartTag="";
// current tag name:
var xmlTagName="";
// temp:
var temp="";
// pointer to "current" struture in xmlStruct
// remember, struct assignments in CF are done by reference, not by value
var curStruct = xmlStruct;
while(xmlStartTagPos) {
// find end of this tag
xmlStartTagEndPos = findNoCase(">",xml,xmlStartTagPos);
// get entire start tag
xmlStartTag = mid(xml, xmlStartTagPos, xmlStartTagEndPos - xmlStartTagPos + 1);
// ignore comments, etc; only bother with this tag if it looks like a "normal" tag (second char is alpha or "/"):
if("ABCDEFGHIJKLMNOPQRSTUVWXYZ/" contains mid(xmlStartTag,2,1)) {
// are there spaces within the start tag?
spacePos=refindNoCase("[[:space:]]",xmlStartTag);
if(spacePos) {
xmlTagName = mid(xmlStartTag, 2, spacePos-2);
// if you wanted to do something with the attributes, such as place them
// into a key called "_attributes", here's where you'd do it.
// if you're reading this comment, then xml tag attributes are simply being ignored.
} else {
xmlTagName = mid(xmlStartTag, 2, len(xmlStartTag)-2);
if( right(xmlTagName,1) is "/" ) {
// self-ending tag. we'll handle that later.
// for now, remove the slash
xmlTagName=left(xmlTagName,len(xmlTagName)-1);
}
}
if(left(xmlTagName,1) is "/") { // non-leaf end tag. Pop a level off the stack.
// this will always be true, for well-formed XML. Silently ignore, otherwise.
if(arrayLen(aStructPtr)) {
arrayDeleteAt(aStructPtr,arrayLen(aStructPtr));
if(arrayLen(aStructPtr)) {
curStruct = aStructPtr[arrayLen(aStructPtr)];
} else {
curStruct = xmlStruct;
}
}
} else { // start tag
// Is this a self-ending tag?
if(mid(xml, xmlStartTagEndPos-1, 1) IS "/") { // is self-ending
// create struct with that tag's attributes
tagAttributesString = trim(mid(xmlStartTag, spacePos+1, len(xmlStartTag)));
if(structKeyExists(curStruct, xmlTagName)) {
if(not isArray(curStruct[xmlTagName])) {
// create array
temp=curStruct[xmlTagName];
curStruct[xmlTagName]=arrayNew(1);
arrayAppend(curStruct[xmlTagName],temp);
}
arrayAppend( curStruct[xmlTagName], decodeXMLattributes( tagAttributesString ) );
} else { // first child with this name
curStruct[xmlTagName] = decodeXMLattributes( tagAttributesString );
}
} else { // has end tag ...somewhere.
// if the next tag is the this tag's end tag, then place the
// data in between the tags into the structure.
nextTagPos = findNoCase("<", xml, xmlStartTagPos+len(xmlStartTag));
if( mid(xml, nextTagPos, len(xmlTagName)+2) is "#xmlTagName#" ) { // we are at leaf
// if more than one child with same name, create array
if(structKeyExists(curStruct, xmlTagName)) {
if(not isArray(curStruct[xmlTagName])) {
// create array
temp=curStruct[xmlTagName];
curStruct[xmlTagName]=arrayNew(1);
arrayAppend(curStruct[xmlTagName],temp);
}
arrayAppend(curStruct[xmlTagName], xmlDecode(trim(mid(xml, xmlStartTagEndPos+1, nextTagPos-xmlStartTagEndPos-1))));
} else { // first child with this name
curStruct[xmlTagName] = xmlDecode(trim(mid(xml, xmlStartTagEndPos+1, nextTagPos-xmlStartTagEndPos-1)));
}
// push xmlEndTagPos to end of end tag
xmlStartTagEndPos = nextTagPos + len(xmlTagName) + 2;
} else { // not at leaf, add to aStructPtr depth
if(structKeyExists(curStruct, xmlTagName)) {
// already a value with this name here; make into (or append to) array of structs
if(not isArray(curStruct[xmlTagName])) {
temp=curStruct[xmlTagName];
curStruct[xmlTagName]=arrayNew(1);
arrayAppend(curStruct[xmlTagName],temp);
}
arrayAppend(curStruct[xmlTagName], structNew());
arrayAppend(aStructPtr, curStruct[xmlTagName][arrayLen(curStruct[xmlTagName])]);
curStruct = aStructPtr[arrayLen(aStructPtr)];
} else {
// create new structure for this branch
curStruct[xmlTagName]=structNew();
arrayAppend(aStructPtr,curStruct[xmlTagName]);
curStruct = aStructPtr[arrayLen(aStructPtr)];
}
}
}
}
}
// find next tag
xmlStartTagPos=findNoCase("<", xml, xmlStartTagEndPos);
}
return xmlStruct;
}
// copy functions into caller scope
caller.xmlDecode = xmlDecode;
caller.decodeXMLattributes = decodeXMLattributes;
caller.xml2struct = xml2struct;