Reputation: 457
I'm trying to parse the variable names, indexes and values from the below xml. Subsetting on the variables works, but getting the actual values from each of the variables has been a bit of a struggle. Could someone point me in the right direction?
require(xml2)
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({"Super"})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({"Regular"})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({"Diesel"})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({"Crude1"})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({"Crude2"})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({"Crude3"})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({"Super"})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({"Regular"})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({"Diesel"})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({"Super"})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Regular"})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Diesel"})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({"Crude1"})({"Super"})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Super"})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Super"})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Regular"})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Regular"})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Regular"})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Diesel"})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Diesel"})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Diesel"})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({"Super"})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({"Regular"})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({"Diesel"})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'
x <- read_xml(xml_file)
vars <- xml_find_all(x, "//variables")
Upvotes: 0
Views: 486
Reputation: 24079
The xml2
package is a good choice for this type of problem. Your starting code above was close, you just needed to parse out the "variable" children nodes and extract the text from the attribute of interest.
library(xml2)
x <- read_xml(xml_file)
#Read parent node variables
vars <- xml_find_all(x, "//variables")
#parse the children nodes "variable"
variable<-xml_find_all(vars, "//variable")
#obtain the text from the "index" & "value" attributes and convert to numeric.
vnames<-xml_attr(variable, "name")
index<-as.integer((xml_attr(variable, "index")))
values<-as.numeric(xml_attr(variable, "value"))
data.frame(index, values)
Sample output:
data.frame(index, values)
index values
1 0 2222.2222
2 1 444.4444
3 2 333.3333
4 3 2111.1111
5 4 4222.2222
6 5 3166.6667
7 6 666.6667
8 7 333.3333
9 8 0.0000
10 9 0.0000
11 10 7500.0000
12 11 0.0000
13 12 0.0000
Upvotes: 2
Reputation: 971
With the stringr
library
xml_file <- '<?xml version = "1.0" encoding="UTF-8" standalone="yes"?>
<CPLEXSolution version="1.2">
<header
problemName="Oil-blending.lp"
objectiveValue="287750"
solutionTypeValue="1"
solutionTypeString="basic"
solutionStatusValue="1"
solutionStatusString="optimal"
solutionMethodString="dual"
primalFeasible="1"
dualFeasible="1"
simplexIterations="14"
writeLevel="1"/>
<quality
epRHS="1e-06"
epOpt="1e-06"
maxPrimalInfeas="0"
maxDualInfeas="0"
maxPrimalResidual="9.66338120633736e-13"
maxDualResidual="7.105427357601e-15"
maxX="7500"
maxPi="57.25"
maxSlack="4000"
maxRedCost="40.9"
kappa="83.7880434782609"/>
<linearConstraints>
<constraint name="ct_demand({"Super"})" index="0" status="LL" slack="0" dual="-20.8"/>
<constraint name="ct_demand({"Regular"})" index="1" status="LL" slack="0" dual="0.1"/>
<constraint name="ct_demand({"Diesel"})" index="2" status="LL" slack="0" dual="-40.8"/>
<constraint name="ct_capacity({"Crude1"})" index="3" status="LL" slack="0" dual="57.25"/>
<constraint name="ct_capacity({"Crude2"})" index="4" status="LL" slack="0" dual="20.9"/>
<constraint name="ct_capacity({"Crude3"})" index="5" status="BS" slack="1500" dual="0"/>
<constraint name="ct_total_max_prod" index="6" status="BS" slack="499.999999999997" dual="0"/>
<constraint name="ct_octane_min({"Super"})" index="7" status="BS" slack="-2000" dual="-0"/>
<constraint name="ct_octane_min({"Regular"})" index="8" status="LL" slack="0" dual="-1.77635683940025e-15"/>
<constraint name="ct_octane_min({"Diesel"})" index="9" status="BS" slack="-4000" dual="-0"/>
<constraint name="ct_lead_max({"Super"})" index="10" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Regular"})" index="11" status="LL" slack="0" dual="30.9"/>
<constraint name="ct_lead_max({"Diesel"})" index="12" status="LL" slack="0" dual="30.9"/>
</linearConstraints>
<variables>
<variable name="Blend({"Crude1"})({"Super"})" index="0" status="BS" value="2222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Super"})" index="1" status="BS" value="444.444444444444" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Super"})" index="2" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Regular"})" index="3" status="BS" value="2111.11111111111" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Regular"})" index="4" status="BS" value="4222.22222222222" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Regular"})" index="5" status="BS" value="3166.66666666667" reducedCost="-0"/>
<variable name="Blend({"Crude1"})({"Diesel"})" index="6" status="BS" value="666.666666666667" reducedCost="-0"/>
<variable name="Blend({"Crude2"})({"Diesel"})" index="7" status="BS" value="333.333333333333" reducedCost="-0"/>
<variable name="Blend({"Crude3"})({"Diesel"})" index="8" status="LL" value="0" reducedCost="-7.105427357601e-15"/>
<variable name="Inventory({"Super"})" index="9" status="LL" value="0" reducedCost="-20.9"/>
<variable name="Inventory({"Regular"})" index="10" status="BS" value="7500" reducedCost="-0"/>
<variable name="Inventory({"Diesel"})" index="11" status="LL" value="0" reducedCost="-40.9"/>
<variable name="x13" index="12" status="UL" value="0" reducedCost="1"/>
</variables>
</CPLEXSolution>'
library(stringr)
as.numeric(str_extract_all(xml_file,"(?<=value=\")[0-9]+.*[0-9]*(?=\" reducedCost)")[[1]])
[1] 2222.2222 444.4444 333.3333 2111.1111 4222.2222 3166.6667 666.6667 333.3333 0.0000 0.0000
[11] 7500.0000 0.0000 0.0000
Upvotes: 0