authorPubNodes <- function(keywords,pub_start_date,pub_end_date){
  
  keywords <- keywords
  pub_start_date <- pub_start_date
  pub_end_date <- pub_end_date
  
  # create search engine function
  search_engine <- function(keywords,pub_start_date,pub_end_date){
    suppressPackageStartupMessages(library(openalexR))
    suppressPackageStartupMessages(library(tidyverse))
    
    options(openalexR.mailto = "idiayeifeanyi@yahoo.com")
    
    # search engine
    works_search <- oa_fetch(
      entity = "works",
      title.search = c(keywords),
      cited_by_count = ">50",
      from_publication_date = pub_start_date,
      to_publication_date = pub_end_date,
      options = list(sort = "cited_by_count:desc"),
      verbose = FALSE
    )
    
    return(works_search)
    
  }
  
  search_data <- search_engine(keywords,pub_start_date,pub_end_date)
  
  
  # grab authors and group them according to collaboration
  authors_collaboration_groups <- list()
  for (i in 1:nrow(search_data)){
    authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
  }
  
  # grab all authors
  all_authors <- c()
  for (i in 1:length(authors_collaboration_groups)) {
    all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
  }
  
  # get length of each authors collaboration
  authors_length <- c()
  for(authors in 1:length(authors_collaboration_groups)){
    authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
  }
  
  # grab all publications
  publications <- list()
  for (i in 1:nrow(search_data)){
    publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
 }
  
  # place all publications in a vector
  all_publications <- c()
  for(i in 1:length(publications)){
    all_publications <- c(all_publications,publications[[i]])
  }
  
  # create author_to_publication data frame
  authors_to_publications <- data.frame(
    Authors = all_authors,
    Publications = all_publications
  )
  
  # stack the df so that authors and publications
  # are together as one column
  stacked_df <- stack(authors_to_publications)
  stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows
  stacked_df <- stacked_df[-2] # delete second column in df
  
  # create author_publications_nodes df
  author_publication_nodes <- data.frame(
    Id = 1:nrow(stacked_df),
    Nodes = stacked_df$values,
    Label = stacked_df$values
  )
  
  
  return(author_publication_nodes)
  
  
}

# authorPub_data <- search_engine(keywords = c("data","deep learning","analytics"),
#                                 pub_start_date = "2021-01-01",
#                                 pub_end_date = "2022-12-31")
# 
# tictoc::tic()
# nodes <- authorPubNodes(search_data = authorPub_data)
# tictoc::toc()
# 
# nodes |> view()
# 
# 
# # write to csv
# write.csv(nodes,file = "authorPubNodes.csv",row.names = F)

nodes_pub <- authorPubNodes(keywords = c("data","deep learning","analytics"),
                            pub_start_date = "2021-01-01",
                            pub_end_date = "2022-12-31")
nodes_pub |> view()

write.csv(nodes_pub,file = "nodes_pub.csv",row.names = F)