Turn XML with dynamic N children elements into N dynamic columns

One Star

Turn XML with dynamic N children elements into N dynamic columns

I have XML inbound that looks like the below, note there are some standard elements, but then N dynamic "property" children for N unknown properties. I'd like to get a new row for each species with its 3 standard properties + all dynamic properties appended after it (comma separated). See below.
<root>
<species>
<type>BEAR</type>
<identifier>2203A</identifier>
<label>Animal</label>
<properties>
  <property>
 <name>description</name>
 <value>desc1</name>
  </property>
  <property>
 <name>region</name>
 <value>abc001</value>
  </property>
  </properties>
</species>
<species>
<type>DOG</type>
<identifier>8777</identifier>
<label>gear</label>
<properties>
  <property>
 <name>description</name>
 <value>apple</name>
  </property>
  <property>
 <name>region</name>
 <value>cccc</value>
  </property>
  <property>
 <name>prop9</name>
 <value>dddd</value>
  </property>
  <property>
 <name>prop10</name>
 <value>dlldld</value>
  </property>
  </properties>
</species>
</root>


I want rows like this, (i.e. note the dynamic N properties are appended to end)
bear,2203a,animal,desc1,abc001
dog,8777,gear,apple,cccc,dddd,dlldld


How can I do this in Talend when the <property> elements are unknown and can vary?
Four Stars

Re: Turn XML with dynamic N children elements into N dynamic columns

Selecting Unknown Nodes
XPath wildcards can be used to select unknown XML nodes.
Wildcard
Description
*Matches any element node
@*Matches any attribute node
node()Matches any node of any kind

SOURCE: www.w3schools.com/xsl/xpath_syntax.asp
One Star

Re: Turn XML with dynamic N children elements into N dynamic columns

Aware of that, but how does essentially iterate over all subnodes of properties and create additional column values for each one encountered, like what component should I be using in studio. tExtractXmlField or tXmlMap don't seem to be able to do this
This properties block could contain 0-N properties
Four Stars

Re: Turn XML with dynamic N children elements into N dynamic columns

Is this waht you want :
Starting job textxml at 15:38 25/04/2016.
connecting to socket on port 3952
connected
.------+-----.
| tLogRow_2 |
|=-----+----=|
|key |id |
|=-----+----=|
|desc1 |2203A|
|abc001|2203A|
|apple |8777 |
|cccc |8777 |
|dddd |8777 |
|dlldld|8777 |
'------+-----'
.----------------------+-----.
| tLogRow_3 |
|=---------------------+----=|
|key |id |
|=---------------------+----=|
|apple,cccc,dddd,dlldld|8777 |
|desc1,abc001 |2203A|
'----------------------+-----'
.----+-----+------.
| tLogRow_1 |
|=---+-----+-----=|
|type|id |label |
|=---+-----+-----=|
|BEAR|2203A|Animal|
|DOG |8777 |gear |
'----+-----+------'
.----+-----+------+----------------------.
| tLogRow_4 |
|=---+-----+------+---------------------=|
|type|id |label |key |
|=---+-----+------+---------------------=|
|BEAR|2203A|Animal|desc1,abc001 |
|DOG |8777 |gear |apple,cccc,dddd,dlldld|
'----+-----+------+----------------------'
disconnected
Job textxml ended at 15:38 25/04/2016.
Four Stars

Re: Turn XML with dynamic N children elements into N dynamic columns

Here the script, i deleted the tlogrow components to save space
SCRIPT_VERSION=4.2,
DEFAULT_CONTEXT: Default,
ContextType {
 NAME: Default
}
addParameters {
 addElementParameters {
  JOB_RUN_VM_ARGUMENTS : " -Xms256M -Xmx1024M",
  JOB_RUN_VM_ARGUMENTS_OPTION : "false",
  SCREEN_OFFSET_X : "0",
  SCREEN_OFFSET_Y : "0",
  SCHEMA_OPTIONS : "none",,,
  IMPLICITCONTEXT_USE_PROJECT_SETTINGS : "true",
  STATANDLOG_USE_PROJECT_SETTINGS : "true",
  MULTI_THREAD_EXECATION : "false",
  IMPLICIT_TCONTEXTLOAD_FILE : "\"\"",
  FIELDSEPARATOR : "\"\"",,,
  DISABLE_WARNINGS : "false",
  DISABLE_INFO : "false",
  FILE_PATH : "\"E:/X163842/Workspace/.metadata\"",
  ENCODING:ENCODING_TYPE : "ISO-8859-15",
  DB_VERSION : "ORACLE_10"
 }
}
addComponent {
 setComponentDefinition {
  TYPE: "tFileInputXML",
  NAME: "tFileInputXML_1",
  POSITION: 128, 160,
  SIZE: 32, 32,
  OFFSETLABEL: 0, 0
 }
 setSettings {
  START : "true",
  END_OF_FLOW : "false",
  FILENAME : "\"C:/Users/X163842/Documents/xmltest.xml\"",
  LOOP_QUERY : "\"/root/*\"",
  MAPPING {
   SCHEMA_COLUMN : "type",
   QUERY : "\"./type\"",
   NODECHECK : "false",
   SCHEMA_COLUMN : "id",
   QUERY : "\"./identifier\"",
   NODECHECK : "false",
   SCHEMA_COLUMN : "label",
   QUERY : "\"./label\"",
   NODECHECK : "false"
  },,
  DIE_ON_ERROR : "false",
  ADVANCED_SEPARATOR : "false",
  THOUSANDS_SEPARATOR : "\",\"",
  DECIMAL_SEPARATOR : "\".\"",
  IGNORE_NS : "false",
  IGNORE_DTD : "false",
  TMP_FILENAME : "\"E:/x163842/Workspace/temp.xml\"",
  USE_SEPARATOR : "false",
  FIELD_SEPARATOR : "\",\"",
  GENERATION_MODE : "Dom4j",
  CHECK_DATE : "false",
  ENCODING : "\"ISO-8859-15\"",
  ENCODING:ENCODING_TYPE : "ISO-8859-15",
  CONNECTION_FORMAT : "row",
  INFORMATION : "false",,
  VALIDATION_RULES : "false"
 }
 addSchema {
  NAME: "tFileInputXML_1",
  TYPE: "FLOW"
  addColumn {
   NAME: "type",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "label",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
 }
 addSchema {
  NAME: "REJECT",
  TYPE: "REJECT"
  addColumn {
   NAME: "type",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "label",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "errorCode",
   TYPE: "id_String",
   NULLABLE: true,
   DEFAULTVALUE: "",
   LENGTH: 255,
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "errorMessage",
   TYPE: "id_String",
   NULLABLE: true,
   DEFAULTVALUE: "",
   LENGTH: 255,
   SOURCETYPE: ""
  }
 }
}
addComponent {
 setComponentDefinition {
  TYPE: "tMap",
  NAME: "tMap_1",
  POSITION: 384, 160,
  SIZE: 32, 32,
  OFFSETLABEL: 0, 0
 }
 setSettings {
  MAP : "",
  LINK_STYLE : "AUTO",,
  PREVIEW : "_JPgaEArVEeaWsax5FJa6_g-tMap_1-PREVIEW.bmp",
  DIE_ON_ERROR : "true",
  LKUP_PARALLELIZE : "false",
  ROWS_BUFFER_SIZE : "2000000",
  CHANGE_HASH_AND_EQUALS_FOR_BIGDECIMAL : "false",
  CONNECTION_FORMAT : "row",
  INFORMATION : "false",,
  VALIDATION_RULES : "false"
 }
 addSchema {
  NAME: "easy",
  TYPE: "FLOW",
  LABEL: "easy"
  addColumn {
   NAME: "type",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "label",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "key",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
 }
 addMapperData {
  addUiProperties {
   SHELLMAXIMIZED: false
  }
  addInputTable {
   NAME: "row5",
   SIZESTATE: INTERMEDIATE,
   MATCHINGMODE: "UNIQUE_MATCH",
   LOOKUPMODE: "LOAD_ONCE"
   addColumn {
    NAME: "type",
    TYPE: "id_String",
    NULLABLE: true
   }
   addColumn {
    NAME: "id",
    TYPE: "id_String",
    NULLABLE: true
   }
   addColumn {
    NAME: "label",
    TYPE: "id_String",
    NULLABLE: true
   }
  }
  addInputTable {
   NAME: "row6",
   SIZESTATE: INTERMEDIATE,
   MATCHINGMODE: "UNIQUE_MATCH",
   LOOKUPMODE: "LOAD_ONCE"
   addColumn {
    NAME: "key",
    TYPE: "id_String",
    NULLABLE: true
   }
   addColumn {
    NAME: "id",
    TYPE: "id_String",
    NULLABLE: true,
    EXPRESSION: "row5.id ",
    OPERATOR: "="
   }
  }
  addVarTable {
   NAME: "Var",
   MINIMIZED: true,
   SIZESTATE: INTERMEDIATE
  }
  addOutputTable {
   ID: "",
   NAME: "easy",
   SIZESTATE: INTERMEDIATE
   addColumn {
    NAME: "type",
    TYPE: "id_String",
    NULLABLE: true,
    EXPRESSION: "row5.type "
   }
   addColumn {
    NAME: "id",
    TYPE: "id_String",
    NULLABLE: true,
    EXPRESSION: "row5.id "
   }
   addColumn {
    NAME: "label",
    TYPE: "id_String",
    NULLABLE: true,
    EXPRESSION: "row5.label "
   }
   addColumn {
    NAME: "key",
    TYPE: "id_String",
    NULLABLE: true,
    EXPRESSION: "row6.key "
   }
  }
 }
}
addComponent {
 setComponentDefinition {
  TYPE: "tLogRow",
  NAME: "tLogRow_4",
  POSITION: 576, 160,
  SIZE: 32, 32,
  OFFSETLABEL: 0, 0
 }
 setSettings {
  END_OF_FLOW : "true",
  BASIC_MODE : "false",
  TABLE_PRINT : "true",
  VERTICAL : "false",
  PRINT_UNIQUE : "true",
  PRINT_LABEL : "false",
  PRINT_UNIQUE_LABEL : "false",
  FIELDSEPARATOR : "\"|\"",
  PRINT_HEADER : "false",
  PRINT_UNIQUE_NAME : "false",
  PRINT_COLNAMES : "false",
  USE_FIXED_LENGTH : "false",
  LENGTHS {
   SCHEMA_COLUMN : "type",
   LENGTH : "10",
   SCHEMA_COLUMN : "id",
   LENGTH : "10",
   SCHEMA_COLUMN : "label",
   LENGTH : "10",
   SCHEMA_COLUMN : "key",
   LENGTH : "10"
  },
  SCHEMA_OPT_NUM : "100",
  CONNECTION_FORMAT : "row",
  INFORMATION : "false",,
  VALIDATION_RULES : "false"
 }
 addSchema {
  NAME: "tLogRow_4",
  TYPE: "FLOW",
  LABEL: "easy"
  addColumn {
   NAME: "type",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "label",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "key",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
 }
}
addComponent {
 setComponentDefinition {
  TYPE: "tDenormalize",
  NAME: "tDenormalize_1",
  POSITION: 384, 256,
  SIZE: 32, 32,
  OFFSETLABEL: 0, 0
 }
 setSettings {
  DENORMALIZE_COLUMNS {
   INPUT_COLUMN : "key",
   DELIMITER : "\",\"",
   MERGE : "true"
  },,
  CONNECTION_FORMAT : "row",
  INFORMATION : "false",,
  VALIDATION_RULES : "false"
 }
 addSchema {
  NAME: "tDenormalize_1",
  TYPE: "FLOW"
  addColumn {
   NAME: "key",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
 }
}
addComponent {
 setComponentDefinition {
  TYPE: "tFileInputXML",
  NAME: "tFileInputXML_2",
  POSITION: 128, 256,
  SIZE: 32, 32,
  OFFSETLABEL: 0, 0
 }
 setSettings {
  SUBTREE_START : "false",
  END_OF_FLOW : "false",
  FILENAME : "\"C:/Users/X163842/Documents/xmltest.xml\"",
  LOOP_QUERY : "\"/root/species/properties/*\"",
  MAPPING {
   SCHEMA_COLUMN : "key",
   QUERY : "\"./value\"",
   NODECHECK : "false",
   SCHEMA_COLUMN : "id",
   QUERY : "\"../../identifier\"",
   NODECHECK : "false"
  },,
  DIE_ON_ERROR : "false",
  ADVANCED_SEPARATOR : "false",
  THOUSANDS_SEPARATOR : "\",\"",
  DECIMAL_SEPARATOR : "\".\"",
  IGNORE_NS : "false",
  IGNORE_DTD : "false",
  TMP_FILENAME : "\"E:/x163842/Workspace/temp.xml\"",
  USE_SEPARATOR : "false",
  FIELD_SEPARATOR : "\",\"",
  GENERATION_MODE : "Dom4j",
  CHECK_DATE : "false",
  ENCODING : "\"ISO-8859-15\"",
  ENCODING:ENCODING_TYPE : "ISO-8859-15",
  CONNECTION_FORMAT : "row",
  INFORMATION : "false",,
  VALIDATION_RULES : "false"
 }
 addSchema {
  NAME: "tFileInputXML_2",
  TYPE: "FLOW"
  addColumn {
   NAME: "key",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
 }
 addSchema {
  NAME: "REJECT",
  TYPE: "REJECT"
  addColumn {
   NAME: "key",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "id",
   TYPE: "id_String",
   NULLABLE: true,
   COMMENT: "",
   PATTERN: "",
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "errorCode",
   TYPE: "id_String",
   NULLABLE: true,
   DEFAULTVALUE: "",
   LENGTH: 255,
   SOURCETYPE: ""
  }
  addColumn {
   NAME: "errorMessage",
   TYPE: "id_String",
   NULLABLE: true,
   DEFAULTVALUE: "",
   LENGTH: 255,
   SOURCETYPE: ""
  }
 }
}
addConnection {
 TYPE: "FLOW",
 NAME: "row5",
 METANAME: "tFileInputXML_1",
 SOURCE: "tFileInputXML_1",
 TARGET: "tMap_1",
 OFFSETLABEL: 0, 0,
 UNIQUE_NAME : "row5",
 TRACES_CONNECTION_FILTER {
  TRACE_COLUMN : "type",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "id",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "label",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : ""
 },
 MONITOR_CONNECTION : "false",
 USEROWLABEL : "true",
 METTERLABEL : "\"MyLabel\"",
 ABSOLUTE : "Absolute",,,
 PARTITIONER : "false",
 DEPARTITIONER : "false",
 REPARTITIONER : "false",
 NONE : "true",
 NUM_PARTITIONS : "5",
 PART_QUEUE_SIZE : "1000",
 HASH_PARTITION : "false",,
 DEPART_QUEUE_SIZE : "1000",
 IS_SORTING : "true",
 ACTIVATE : "true",
 TRACES_CONNECTION_ENABLE : "true",
 RESUMING_CHECKPOINT : "false",,,
 ACTIVEBREAKPOINT : "false",
 LOGICAL_OP : "&&",,
 USE_ADVANCED : "false",
 ADVANCED_COND : "// code sample : use input_row to define the condition.
// input_row.columnName1.equals(\"foo\") ||!(input_row.columnName2.equals(\"bar\"))
// replace the following expression by your own filter condition
input_row.columnName1.equals(input_row.columnName2)
   "
}
addConnection {
 TYPE: "FLOW",
 NAME: "easy",
 METANAME: "easy",
 SOURCE: "tMap_1",
 TARGET: "tLogRow_4",
 OFFSETLABEL: 0, 0,
 UNIQUE_NAME : "easy",
 TRACES_CONNECTION_FILTER {
  TRACE_COLUMN : "type",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "id",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "label",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "key",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : ""
 },
 MONITOR_CONNECTION : "false",
 USEROWLABEL : "true",
 METTERLABEL : "\"MyLabel\"",
 ABSOLUTE : "Absolute",,,
 PARTITIONER : "false",
 DEPARTITIONER : "false",
 REPARTITIONER : "false",
 NONE : "true",
 NUM_PARTITIONS : "5",
 PART_QUEUE_SIZE : "1000",
 HASH_PARTITION : "false",,
 DEPART_QUEUE_SIZE : "1000",
 IS_SORTING : "true",
 ACTIVATE : "true",
 TRACES_CONNECTION_ENABLE : "true",
 RESUMING_CHECKPOINT : "false",,,
 ACTIVEBREAKPOINT : "false",
 LOGICAL_OP : "&&",,
 USE_ADVANCED : "false",
 ADVANCED_COND : "// code sample : use input_row to define the condition.
// input_row.columnName1.equals(\"foo\") ||!(input_row.columnName2.equals(\"bar\"))
// replace the following expression by your own filter condition
input_row.columnName1.equals(input_row.columnName2)
   "
}
addConnection {
 TYPE: "FLOW",
 NAME: "row6",
 LINESTYLE: 8,
 METANAME: "tDenormalize_1",
 SOURCE: "tDenormalize_1",
 TARGET: "tMap_1",
 OFFSETLABEL: 0, 0,
 UNIQUE_NAME : "row6",
 TRACES_CONNECTION_FILTER {
  TRACE_COLUMN : "key",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "id",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : ""
 },
 MONITOR_CONNECTION : "false",
 USEROWLABEL : "true",
 METTERLABEL : "\"MyLabel\"",
 ABSOLUTE : "Absolute",,,
 PARTITIONER : "false",
 DEPARTITIONER : "false",
 REPARTITIONER : "false",
 NONE : "true",
 NUM_PARTITIONS : "5",
 PART_QUEUE_SIZE : "1000",
 HASH_PARTITION : "false",,
 DEPART_QUEUE_SIZE : "1000",
 IS_SORTING : "true",
 ACTIVATE : "true",
 TRACES_CONNECTION_ENABLE : "true",
 RESUMING_CHECKPOINT : "false",,,
 ACTIVEBREAKPOINT : "false",
 LOGICAL_OP : "&&",,
 USE_ADVANCED : "false",
 ADVANCED_COND : "// code sample : use input_row to define the condition.
// input_row.columnName1.equals(\"foo\") ||!(input_row.columnName2.equals(\"bar\"))
// replace the following expression by your own filter condition
input_row.columnName1.equals(input_row.columnName2)
   "
}
addConnection {
 TYPE: "FLOW",
 NAME: "row3",
 METANAME: "tFileInputXML_2",
 SOURCE: "tFileInputXML_2",
 TARGET: "tDenormalize_1",
 OFFSETLABEL: 0, 0,
 UNIQUE_NAME : "row3",
 TRACES_CONNECTION_FILTER {
  TRACE_COLUMN : "key",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : "",
  TRACE_COLUMN : "id",
  TRACE_COLUMN_CHECKED : "true",
  TRACE_COLUMN_CONDITION : ""
 },
 MONITOR_CONNECTION : "false",
 USEROWLABEL : "true",
 METTERLABEL : "\"MyLabel\"",
 ABSOLUTE : "Absolute",,,
 PARTITIONER : "false",
 DEPARTITIONER : "false",
 REPARTITIONER : "false",
 NONE : "true",
 NUM_PARTITIONS : "5",
 PART_QUEUE_SIZE : "1000",
 HASH_PARTITION : "false",,
 DEPART_QUEUE_SIZE : "1000",
 IS_SORTING : "true",
 ACTIVATE : "true",
 TRACES_CONNECTION_ENABLE : "true",
 RESUMING_CHECKPOINT : "false",,,
 ACTIVEBREAKPOINT : "false",
 LOGICAL_OP : "&&",,
 USE_ADVANCED : "false",
 ADVANCED_COND : "// code sample : use input_row to define the condition.
// input_row.columnName1.equals(\"foo\") ||!(input_row.columnName2.equals(\"bar\"))
// replace the following expression by your own filter condition
input_row.columnName1.equals(input_row.columnName2)
   "
}
addSubjob {
 NAME: "tFileInputXML_1"
 SUBJOB_TITLE_COLOR : "160;190;240",
 SUBJOB_COLOR : "220;220;250"
}