Ifeanyi commited on
Commit
4457e6d
·
1 Parent(s): 2c8be03

Delete OpenAlexNodes.R

Browse files
Files changed (1) hide show
  1. OpenAlexNodes.R +0 -106
OpenAlexNodes.R DELETED
@@ -1,106 +0,0 @@
1
- authorPubNodes <- function(keywords,pub_start_date,pub_end_date){
2
-
3
- keywords <- keywords
4
- pub_start_date <- pub_start_date
5
- pub_end_date <- pub_end_date
6
-
7
- # create search engine function
8
- search_engine <- function(keywords,pub_start_date,pub_end_date){
9
- suppressPackageStartupMessages(library(openalexR))
10
- suppressPackageStartupMessages(library(tidyverse))
11
-
12
- options(openalexR.mailto = "[email protected]")
13
-
14
- # search engine
15
- works_search <- oa_fetch(
16
- entity = "works",
17
- title.search = c(keywords),
18
- cited_by_count = ">50",
19
- from_publication_date = pub_start_date,
20
- to_publication_date = pub_end_date,
21
- options = list(sort = "cited_by_count:desc"),
22
- verbose = FALSE
23
- )
24
-
25
- return(works_search)
26
-
27
- }
28
-
29
- search_data <- search_engine(keywords,pub_start_date,pub_end_date)
30
-
31
-
32
- # grab authors and group them according to collaboration
33
- authors_collaboration_groups <- list()
34
- for (i in 1:nrow(search_data)){
35
- authors_collaboration_groups[[i]] <- search_data$author[[i]][2]
36
- }
37
-
38
- # grab all authors
39
- all_authors <- c()
40
- for (i in 1:length(authors_collaboration_groups)) {
41
- all_authors <- c(all_authors,authors_collaboration_groups[[i]][[1]])
42
- }
43
-
44
- # get length of each authors collaboration
45
- authors_length <- c()
46
- for(authors in 1:length(authors_collaboration_groups)){
47
- authors_length <- c(authors_length,authors_collaboration_groups[[authors]] |> nrow())
48
- }
49
-
50
- # grab all publications
51
- publications <- list()
52
- for (i in 1:nrow(search_data)){
53
- publications[[i]] <- rep(search_data$display_name[i], each = authors_length[i])
54
- }
55
-
56
- # place all publications in a vector
57
- all_publications <- c()
58
- for(i in 1:length(publications)){
59
- all_publications <- c(all_publications,publications[[i]])
60
- }
61
-
62
- # create author_to_publication data frame
63
- authors_to_publications <- data.frame(
64
- Authors = all_authors,
65
- Publications = all_publications
66
- )
67
-
68
- # stack the df so that authors and publications
69
- # are together as one column
70
- stacked_df <- stack(authors_to_publications)
71
- stacked_df <- unique.data.frame(stacked_df) # remove duplicate rows
72
- stacked_df <- stacked_df[-2] # delete second column in df
73
-
74
- # create author_publications_nodes df
75
- author_publication_nodes <- data.frame(
76
- Id = 1:nrow(stacked_df),
77
- Nodes = stacked_df$values,
78
- Label = stacked_df$values
79
- )
80
-
81
-
82
- return(author_publication_nodes)
83
-
84
-
85
- }
86
-
87
- # authorPub_data <- search_engine(keywords = c("data","deep learning","analytics"),
88
- # pub_start_date = "2021-01-01",
89
- # pub_end_date = "2022-12-31")
90
- #
91
- # tictoc::tic()
92
- # nodes <- authorPubNodes(search_data = authorPub_data)
93
- # tictoc::toc()
94
- #
95
- # nodes |> view()
96
- #
97
- #
98
- # # write to csv
99
- # write.csv(nodes,file = "authorPubNodes.csv",row.names = F)
100
-
101
- nodes_pub <- authorPubNodes(keywords = c("data","deep learning","analytics"),
102
- pub_start_date = "2021-01-01",
103
- pub_end_date = "2022-12-31")
104
- nodes_pub |> view()
105
-
106
- write.csv(nodes_pub,file = "nodes_pub.csv",row.names = F)