rwdvc
rwdvc

Reputation: 457

R & xml2: parsing an xml document values to vector or data.frame

I'm trying to parse the variable names, indexes and values from the below xml. Subsetting on the variables works, but getting the actual values from each of the variables has been a bit of a struggle. Could someone point me in the right direction?

require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
 <header
   problemName="Oil-blending.lp"
   objectiveValue="287750"
   solutionTypeValue="1"
   solutionTypeString="basic"
   solutionStatusValue="1"
   solutionStatusString="optimal"
   solutionMethodString="dual"
   primalFeasible="1"
   dualFeasible="1"
   simplexIterations="14"
   writeLevel="1"/>
 <quality
   epRHS="1e-06"
   epOpt="1e-06"
   maxPrimalInfeas="0"
   maxDualInfeas="0"
   maxPrimalResidual="9.66338120633736e-13"
   maxDualResidual="7.105427357601e-15"
   maxX="7500"
   maxPi="57.25"
   maxSlack="4000"
   maxRedCost="40.9"
   kappa="83.7880434782609"/>
 <linearConstraints>
  <constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
  <constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
  <constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
  <constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
  <constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
  <constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
  <constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
  <constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
  <constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
  <constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
  <constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
  <constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
 </linearConstraints>
 <variables>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
  <variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
  <variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
  <variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
  <variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
  <variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
 </variables>
</CPLEXSolution>'

x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")

Upvotes: 0

Views: 486

Answers (2)

Dave2e
Dave2e

Reputation: 24079

The xml2 package is a good choice for this type of problem. Your starting code above was close, you just needed to parse out the "variable" children nodes and extract the text from the attribute of interest.

library(xml2)
x <- read_xml(xml_file)
#Read parent node variables
vars <- xml_find_all(x, "//variables")

#parse the children nodes "variable"
variable<-xml_find_all(vars, "//variable")
#obtain the text from the "index" & "value" attributes and convert to numeric.
vnames<-xml_attr(variable, "name")
index<-as.integer((xml_attr(variable, "index")))
values<-as.numeric(xml_attr(variable, "value"))

data.frame(index, values)

Sample output:

data.frame(index, values)
   index    values
1      0 2222.2222
2      1  444.4444
3      2  333.3333
4      3 2111.1111
5      4 4222.2222
6      5 3166.6667
7      6  666.6667
8      7  333.3333
9      8    0.0000
10     9    0.0000
11    10 7500.0000
12    11    0.0000
13    12    0.0000

Upvotes: 2

Ben373
Ben373

Reputation: 971

With the stringr library

xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({&quot;Super&quot;})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({&quot;Regular&quot;})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({&quot;Diesel&quot;})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({&quot;Crude1&quot;})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({&quot;Crude2&quot;})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({&quot;Crude3&quot;})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({&quot;Super&quot;})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({&quot;Regular&quot;})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({&quot;Diesel&quot;})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({&quot;Super&quot;})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({&quot;Regular&quot;})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({&quot;Diesel&quot;})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({&quot;Crude1&quot;})({&quot;Super&quot;})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({&quot;Crude2&quot;})({&quot;Super&quot;})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({&quot;Crude3&quot;})({&quot;Super&quot;})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({&quot;Crude1&quot;})({&quot;Regular&quot;})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({&quot;Crude2&quot;})({&quot;Regular&quot;})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({&quot;Crude3&quot;})({&quot;Regular&quot;})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({&quot;Crude1&quot;})({&quot;Diesel&quot;})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({&quot;Crude2&quot;})({&quot;Diesel&quot;})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({&quot;Crude3&quot;})({&quot;Diesel&quot;})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({&quot;Super&quot;})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({&quot;Regular&quot;})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({&quot;Diesel&quot;})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'

library(stringr)
as.numeric(str_extract_all(xml_file,"(?<=value=\")[0-9]+.*[0-9]*(?=\" reducedCost)")[[1]])
[1] 2222.2222  444.4444  333.3333 2111.1111 4222.2222 3166.6667  666.6667  333.3333    0.0000    0.0000
[11] 7500.0000    0.0000    0.0000

Upvotes: 0

Related Questions