Reputation: 1
I tried to calculate BDe Score in R without using the built in function of different R packages.
library(bnlearn)
library(tidyverse)
# Load the ALARM network
# load("http://www.bnlearn.com/bnrepository/alarm/alarm.bif.gz")
alarmNetwork_ls <- read.bif("alarm.bif.gz")
# Load the ALARM data
data("alarm")
# Select a subset of the data for testing
test_data <- alarm[sample(nrow(alarm), 1000), ]
# The functions above match on names;
# the name of one of the nodes in the network is "LVFAILURE",
# but this name in the alarm dataset is "LVF".
# We fixed the column name using the code below.
test_data <- test_data %>%
rename(
HISTORY = HIST,
HREKG = HREK,
HRSAT = HRSA,
PRESS = PRSS,
EXPCO2 = ECO2,
MINVOL = MINV,
MINVOLSET = MVS,
HYPOVOLEMIA = HYP,
ANAPHYLAXIS = APL,
INSUFFANESTH = ANES,
PULMEMBOLUS = PMB,
INTUBATION = INT,
KINKEDTUBE = KINK,
DISCONNECT = DISC,
LVEDVOLUME = LVV,
STROKEVOLUME = STKV,
CATECHOL = CCHL,
LVFAILURE = LVF,
ERRLOWOUTPUT = ERLO,
ERRCAUTER = ERCA,
SHUNT = SHNT,
PVSAT = PVS,
ARTCO2 = ACO2,
VENTALV = VALV,
VENTLUNG = VLNG,
VENTTUBE = VTUB,
VENTMACH = VMCH
)
# calculate log-likelihood of data under the network
log_likelihood <- function(data, bn) {
n <- nrow(data)
nodes <- nodes(bn)
parents <- parents(bn)
logprob <- rep(0, n)
for (i in 1:n) {
prob <- 1
for (j in 1:length(nodes)) {
node <- nodes[[j]]
node_name <- node$name
node_parents <- parents[[j]]
if (length(node_parents) == 0) {
prob_node <- cpquery(bn, node_name, list(), data[i,])
} else {
parent_values <- data[i,node_parents]
prob_node <- cpquery(bn, node_name, list(parents = parent_values), data[i,])
}
prob <- prob * prob_node
}
logprob[i] <- log(prob)
}
return(sum(logprob))
}
# calculate number of parameters in the model
num_params <- function(bn) {
nodes <- nodes(bn)
parents <- parents(bn)
n_params <- 0
for (i in 1:length(nodes)) {
node <- nodes[[i]]
node_states <- length(node$levels[[1]])
n_parents <- length(parents[[i]])
n_params <- n_params + node_states * (n_parents + 1)
}
return(n_params)
}
# calculate BDe score
BDe_score <- function(data, bn) {
n <- nrow(data)
LL <- log_likelihood(data, bn)
d <- ncol(data)
k <- num_params(bn)
score <- LL - 0.5 * log(n) * k
return(score)
}
# test function on alarm data and network
BDe_score(test_data, alarmNetwork_ls)
I tried ro run the above code but got follwing error:
Error in check.nodes(nodes = node, graph = x, max.nodes = 1) :
no node specified.
I know there are several R packages to calculate BDe score but can anyone help me to resolve my issue without using those built-in functions? Or if anyone can help me to code the proposition 18.2 of Probabilistic Graphical Models: Principles and Techniques
Book by Daphne Koller and Nir Friedman
Upvotes: 0
Views: 91