generate-rdata.R 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. #!/usr/bin/env Rscript
  2. library(readr)
  3. library(dplyr)
  4. # TODO rename functions
  5. # This one generates a list for a tree or dendrogram
  6. rsplit <- function(x) {
  7. if(ncol(x) > 1) {
  8. listSplit <- split(x[-c(1)], x[1], drop=F)
  9. lapply(names(listSplit), function(y) {list(name = y,
  10. children = rsplit(listSplit[[y]]))})
  11. } else {
  12. lapply(seq(nrow(x[1])), function(y) {list(name = as.character(x[y,1]))})
  13. }
  14. }
  15. # This one generates a list for a treemap
  16. makeList <- function(x) {
  17. #varia com n
  18. if(ncol(x) > 8) {
  19. listSplit <- split(x[-c(1)], x[1], drop=F)
  20. lapply(names(listSplit), function(y) {list(name = y,
  21. # varios n
  22. escopo = colnames(x[1]),
  23. value = as.integer( listSplit[[y]][1,1] ),
  24. Hora = as.integer( listSplit[[y]][1,2] ),
  25. Dia = as.character(listSplit[[y]][1,3]),
  26. Idade = as.double( listSplit[[y]][1,4] ),
  27. Sexo = as.double( listSplit[[y]][1,5] ),
  28. Causador = as.character(listSplit[[y]][1,6]),
  29. CNAE = as.character(listSplit[[y]][1,7]),
  30. # -n
  31. # children = listSplit[[y]][ -c(1:2) ] )})
  32. children = makeList( listSplit[[y]][ -(1:7) ] ))})
  33. } else {
  34. lapply(seq(nrow(x[1])), function(y) {
  35. list(name = as.character(x[y,1]),
  36. escopo = colnames(x[y,1]),
  37. value = as.integer(x[y,2]),
  38. Hora = as.integer( x[y,3] ),
  39. Dia = as.character(x[y,4]),
  40. Idade = as.double( x[y,5] ),
  41. Sexo = as.double( x[y,6] ),
  42. Causador = as.character(x[y,7]),
  43. CNAE = as.character(x[y,8])
  44. )})
  45. }
  46. }
  47. make_bardata <- function(x) {
  48. listSplit <- split(x[-1], x[1], drop=F)
  49. lapply(names(listSplit), function(y) {
  50. list(name = y,
  51. value = as.vector(filter(x, x[1] == y)$n))
  52. })
  53. }
  54. make_bardata2 <- function(x) {
  55. apply(unique(x[c(1, 2)]), 1, function(y) {
  56. list(name = c(as.character(y[1]), as.character(y[2])),
  57. value = as.vector(filter(x, x[1] == y[1], x[2] == y[2])$n))
  58. })
  59. }
  60. row_to_list <- function(d) {
  61. apply(d, 1, function(x) {
  62. setNames(as.list(x), c("axis", "value"))
  63. })
  64. }
  65. make_radardata <- function(x) {
  66. listSplit <- split(x[-c(1)], x[1], drop=F)
  67. lapply(names(listSplit), function(y) {
  68. list(name = y,
  69. value = row_to_list(filter(x, x[1] == y)[-1]))
  70. })
  71. }
  72. make_radardata2 <- function(x) {
  73. apply(unique(x[c(1, 2)]), 1, function(y) {
  74. list(name = c(as.character(y[1]), as.character(y[2])),
  75. value = row_to_list(filter(x, x[1] == y[1], x[2] == y[2])[-c(1,2)]))
  76. })
  77. }
  78. print("loading data...")
  79. data <- read_csv2("../app/data/amostra.csv")
  80. brasil <- read_csv2("../app/data/brasil.csv")
  81. acidentes <- read_csv2("../app/data/acidentes_victor.csv")
  82. #barchart data
  83. pais_bar <- read_csv2("../app/data/barchart/ac_pais_2012.csv")
  84. regioes_bar <- read_csv2("../app/data/barchart/ac_regiao_2012.csv")
  85. estados_bar <- read_csv2("../app/data/barchart/ac_uf_2012.csv")
  86. meso_bar <- read_csv2("../app/data/barchart/ac_meso_2012.csv")
  87. micro_bar <- read_csv2("../app/data/barchart/ac_micro_2012.csv")
  88. municipios_bar <- read_csv2("../app/data/barchart/ac_mun_2012.csv")
  89. #radarchart data
  90. pais_radar <- read_csv2("../app/data/radarchart/pais_by_year.csv")
  91. regioes_radar <- read_csv2("../app/data/radarchart/regiao_by_year.csv")
  92. estados_radar <- read_csv2("../app/data/radarchart/uf_by_year.csv")
  93. meso_radar <- read_csv2("../app/data/radarchart/meso_by_year.csv")
  94. micro_radar <- read_csv2("../app/data/radarchart/micro_by_year.csv")
  95. municipios_radar <- read_csv2("../app/data/radarchart/mun_by_year.csv")
  96. print("Computing scatterplot data...")
  97. sct_data <- as.matrix(data[ , c("hora_acidente", "idade_cat", "ds_agente_causador")])
  98. print("Computing choropleth map data...")
  99. choropleth_data <- read_csv2("../app/data/puf.csv")
  100. print("Computing parallel coordinates data...")
  101. parallelcoordinates_data <- read_csv2("../app/data/acidentes-sumarizado.csv")
  102. print("Computing barchart data...")
  103. pais_bar <- make_bardata(pais_bar)
  104. regioes_bar <- make_bardata2(regioes_bar)
  105. estados_bar <- make_bardata2(estados_bar)
  106. meso_bar <- make_bardata2(meso_bar)
  107. micro_bar <- make_bardata2(micro_bar)
  108. municipios_bar <- make_bardata2(municipios_bar)
  109. bar_data <- c(pais_bar, regioes_bar, estados_bar,
  110. meso_bar, micro_bar, municipios_bar)
  111. print("Computing radarchart data...")
  112. pais_radar <- make_radardata(pais_radar)
  113. regioes_radar <- make_radardata2(regioes_radar)
  114. estados_radar <- make_radardata2(estados_radar)
  115. meso_radar <- make_radardata2(meso_radar)
  116. micro_radar <- make_radardata2(micro_radar)
  117. municipios_radar <- make_radardata2(municipios_radar)
  118. radar_data <- c(pais_radar, regioes_radar, estados_radar,
  119. meso_radar, micro_radar, municipios_radar)
  120. print("Computing treemap data...")
  121. treemap_data <- makeList(unique(acidentes))
  122. tp <- unique(acidentes)
  123. treemap_data_cities <-
  124. data.frame(
  125. tp["pais"],
  126. tp["regiao"],
  127. tp["uf"],
  128. tp["mesorregiao"],
  129. tp["microrregiao"],
  130. tp["municipio"],
  131. tp["acidentes_municipio"],
  132. tp["hora_municipio"],
  133. tp["diaSemana_municipio"],
  134. tp["idadeMedia_municipio"],
  135. tp["sexoPorcentagem_municipio"],
  136. tp["dsAgenteModa_municipio"],
  137. tp["classeCnaeModa_municipio"]
  138. )
  139. print("Computing dendrogram data...")
  140. tree_data <- rsplit(unique(brasil))[[1]]
  141. print("Writing out generated R objects...")
  142. saveRDS(sct_data, "../app/data/rds/sct_data.rds")
  143. saveRDS(choropleth_data, "../app/data/rds/choropleth_data.rds")
  144. saveRDS(parallelcoordinates_data, "../app/data/rds/parallelcoordinates_data.rds")
  145. saveRDS(bar_data, "../app/data/rds/bar_data.rds")
  146. saveRDS(radar_data, "../app/data/rds/radar_data.rds")
  147. saveRDS(treemap_data, "../app/data/rds/treemap_data.rds")
  148. saveRDS(treemap_data_cities, "../app/data/rds/treemap_data_cities.rds")
  149. saveRDS(tree_data, "../app/data/rds/tree_data.rds")
  150. print("Done!")