#xmlFile#:

Pseudocode for xml2struct: Premise: XML is a collection of zero or more "branch" tags that contain one or more "leaf" tags, which may contain data. find a valid tag find the next end tag is the next tag the end tag for this tag? --yes: the data you want is between the tags. (It's a "leaf" tag.) --no: push the current tag onto the stack and find the next tag. (It's a "branch" tag.) The stack is implemented as the array "aStructPtr". Note that if you have tags without end tags, then (a) this function will fail, and (b) that's not well-formed XML. Version History: v1.00 01/10/2002 dsb: - initial release v1.00a 01/10/2002 dsb: - fix issue with identically-named tags nested within each other -- Thanks, Iver! v1.10 01/18/2002 dsb: - a couple of fixes for tags with attributes. - if a tag is self-ending, a structure of its attributes will be created. To Do: - find a way to elegantly represent tags that have both attributes and a value. -- put all values in a .value key, and all attributes in a .attributes struct? Hmm... License: This code is free for use/abuse to any person or entity with a valid ColdFusion license, so long as you agree to hold neither the author nor Macromedia, Inc. liable for any damages resulting from its use and/or failure. That being said: though you are not required to, please leave this notice intact. ---> function xmlDecode(xml) { var semipos=0; var escString=""; var escChar=""; var ascii=0; var chrPos = find("&##",xml,1); // replace usual suspects // (If your dtd defines other entity references, add them here.) // (note that & is done last-- see the end of the function.) xml = replace(xml, "<", "<", "ALL"); xml = replace(xml, ">", ">", "ALL"); xml = replace(xml, """, """", "ALL"); xml = replace(xml, "'", "'", "ALL"); // replace extended chars in either "€" (hex) or "€" (decimal) format while(chrPos) { semiPos=find(";",xml,chrPos); if(semiPos and (semiPos-chrPos lt 6)) { escString=mid(xml,chrPos,semiPos-chrPos+1); if(mid(escString,3,1) is "x") { char1 = ucase(mid(escString,4,1)); char2 = ucase(mid(escString,5,1)); if("01234566789ABCDEF" contains char1 and "01234566789ABCDEF" contains char2) { if("ABCDEF" contains char1) { ascii=(asc(char1)-55)*16; } else { ascii=char1*16; } if("ABCDEF" contains char2) { ascii=ascii+(asc(char2)-55); } else { ascii=ascii+char2; } xml=removeChars(xml, chrPos, len(escString)); if(chrPos gt len(xml)) { xml = xml & chr(ascii); } else { xml = insert(chr(ascii), xml, chrPos); } } } else { ascii=mid(escString,3,len(escString)-3); if(isNumeric(ascii)) { xml=removeChars(xml, chrPos, len(escString)); if(chrPos gt len(xml)) { xml = xml & chr(ascii); } else { xml = insert(chr(ascii), xml, chrPos); } } } } chrPos=find("&##", xml, chrPos+1); } // always replace amersand last: xml = replace(xml, "&", "&", "ALL"); // note that there might be problems if someone encodes ampersand as & then follows it with "amp;". // but if you're doing that, you get what you deserve ;-P return xml; } function decodeXMLattributes(tagAttributesString) { var attributeName = ""; var equalsPos = 0; var delim = ""; var endPos = 0; var tagAttributes = structNew(); // returns a struct of attributes, as you'd expect if( not len(tagAttributesString) ) { // short circuit on empty string return tagAttributes; } else { equalsPos = find("=", tagAttributesString); while(equalsPos gt 1) { // stuff before equals is attribute name attributeName = trim(left(tagAttributesString,equalsPos-1)); tagAttributesString = trim(mid(tagAttributesString, equalsPos+1, len(tagAttributesString))); // first nonspace after equals should be single or doublequote. delim = left(tagAttributesString,1); endPos = find(delim, tagAttributesString, 2); if("""'" contains delim and endpos) { // decode into attribute tagAttributes[attributeName] = xmlDecode(mid(tagAttributesString,2,endPos-2)); tagAttributesString = mid(tagAttributesString, endPos+1, len(tagAttributesString)); } else { // strictly speaking, we're working with bad XML here. But we'll try, anyway. tagAttributes[attributeName] = getToken(tagAttributesString,1); tagAttributesString = mid(tagAttributesString, len(tagAttributes[attributeName])+1, len(tagAttributesString)); } equalsPos = find("=", tagAttributesString); } return tagAttributes; } } function xml2struct(xml) { // This function will "read" an xml file, and return a structure that // represents the xml data structure. // It does not seek begin-end tag pairs, but rather assumes ["requires"] // that all begin tags have matching end tags (or are self-ending.) // In other words, it only works well for well-formed xml. What did you expect? // the structure we'll build: var xmlStruct=structNew(); // stack to keep track of what depth we're at in nested tags: var aStructPtr=arrayNew(1); // string positions: var xmlStartTagPos=findNoCase("<",xml,1); var xmlStartTagEndPos=0; // current start tag: var xmlStartTag=""; // current tag name: var xmlTagName=""; // temp: var temp=""; // pointer to "current" struture in xmlStruct // remember, struct assignments in CF are done by reference, not by value var curStruct = xmlStruct; while(xmlStartTagPos) { // find end of this tag xmlStartTagEndPos = findNoCase(">",xml,xmlStartTagPos); // get entire start tag xmlStartTag = mid(xml, xmlStartTagPos, xmlStartTagEndPos - xmlStartTagPos + 1); // ignore comments, etc; only bother with this tag if it looks like a "normal" tag (second char is alpha or "/"): if("ABCDEFGHIJKLMNOPQRSTUVWXYZ/" contains mid(xmlStartTag,2,1)) { // are there spaces within the start tag? spacePos=refindNoCase("[[:space:]]",xmlStartTag); if(spacePos) { xmlTagName = mid(xmlStartTag, 2, spacePos-2); // if you wanted to do something with the attributes, such as place them // into a key called "_attributes", here's where you'd do it. // if you're reading this comment, then xml tag attributes are simply being ignored. } else { xmlTagName = mid(xmlStartTag, 2, len(xmlStartTag)-2); if( right(xmlTagName,1) is "/" ) { // self-ending tag. we'll handle that later. // for now, remove the slash xmlTagName=left(xmlTagName,len(xmlTagName)-1); } } if(left(xmlTagName,1) is "/") { // non-leaf end tag. Pop a level off the stack. // this will always be true, for well-formed XML. Silently ignore, otherwise. if(arrayLen(aStructPtr)) { arrayDeleteAt(aStructPtr,arrayLen(aStructPtr)); if(arrayLen(aStructPtr)) { curStruct = aStructPtr[arrayLen(aStructPtr)]; } else { curStruct = xmlStruct; } } } else { // start tag // Is this a self-ending tag? if(mid(xml, xmlStartTagEndPos-1, 1) IS "/") { // is self-ending // create struct with that tag's attributes tagAttributesString = trim(mid(xmlStartTag, spacePos+1, len(xmlStartTag))); if(structKeyExists(curStruct, xmlTagName)) { if(not isArray(curStruct[xmlTagName])) { // create array temp=curStruct[xmlTagName]; curStruct[xmlTagName]=arrayNew(1); arrayAppend(curStruct[xmlTagName],temp); } arrayAppend( curStruct[xmlTagName], decodeXMLattributes( tagAttributesString ) ); } else { // first child with this name curStruct[xmlTagName] = decodeXMLattributes( tagAttributesString ); } } else { // has end tag ...somewhere. // if the next tag is the this tag's end tag, then place the // data in between the tags into the structure. nextTagPos = findNoCase("<", xml, xmlStartTagPos+len(xmlStartTag)); if( mid(xml, nextTagPos, len(xmlTagName)+2) is "