R Programming: Efficiently Calculating Keyword Group Presence Using Matrix Multiplication and Data Frames

Here’s how you could implement this using R:

# Given dataframes
abstracts <- structure(
  data.frame(keyword1 = c(0, 1, 1), keyword2 = c(1, 0, 0), keyword3 = c(1, 0, 0), keyword4 = c(0, 0, 0))
)

groups <- structure(
  data.frame(group1 = c(1, 1, 1), group2 = c(1, 0, 1), group3 = c(0, 0, 1), group4 = c(1, 1, 1), group5 = c(0, 1, 0))
)

# Convert dataframes to matrices
abstracts_mat <- matrix(nrow = nrow(abstracts), ncol = 4)
colnames(abstracts_mat) <- paste0("keyword", names(abstracts))

abstracts_mat

groups_mat <- matrix(nrow = ncol(groups), ncol = 5)
rownames(groups_mat) <- paste0("keyword", names(groups))
colnames(groups_mat) <- paste0("group", 1:ncol(groups))

groups_mat

# Create the result matrix
result_matrix <- t(t(abstracts_mat %*% groups_mat)) - rowSums(groups_mat)

# Check if all keywords from a group are present in an abstract
result_matrix

You could also use data frames directly without converting to matrices:

# Given dataframes
abstracts <- structure(
  data.frame(keyword1 = c(0, 1, 1), keyword2 = c(1, 0, 0), keyword3 = c(1, 0, 0), keyword4 = c(0, 0, 0))
)

groups <- structure(
  data.frame(group1 = c(1, 1, 1), group2 = c(1, 0, 1), group3 = c(0, 0, 1), group4 = c(1, 1, 1), group5 = c(0, 1, 0))
)

# Create the result matrix
result_matrix <- t(t(as.matrix(abstracts) %*% as.matrix(groups))) - rowSums(as.matrix(groups))

# Check if all keywords from a group are present in an abstract
result_matrix

Last modified on 2024-01-03