treemap_summary.R 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #!/usr/bin/env Rscript
  2. library(tidyverse)
  3. library(lubridate)
  4. # Generate a summary considering current scope
  5. summarize_scope <- function(x) {
  6. summarize (x, acidentes = n(),
  7. ds_agente_causador = names(which.max(table(ds_agente_causador))),
  8. ds_cnae_classe_cat = names(which.max(table(ds_cnae_classe_cat))),
  9. ds_emitente_cat = names(which.max(table(ds_emitente_cat))),
  10. idade = mean(idade_cat),
  11. obito = sum(cd_indica_obito == "N")/(n()),
  12. ds_natureza_lesao = names(which.max(table(ds_natureza_lesao))),
  13. ds_cbo = names(which.max(table(ds_cbo))),
  14. ds_parte_corpo_atingida = names(which.max(table(ds_parte_corpo_atingida))),
  15. sexo = sum(cd_tipo_sexo_empregado_cat == "Masculino")/(n()),
  16. ds_tipo_acidente = names(which.max(table(ds_tipo_acidente))),
  17. ds_tipo_local_acidente = names(which.max(table(ds_tipo_local_acidente))),
  18. turno = names(which.max(table(turno)))
  19. )
  20. }
  21. brasil <- read_csv2("../app/data/brasil.csv")
  22. complete <- read_csv2("../app/data/completo.csv", na = c("NA", "", "Não informado"),
  23. col_types = cols(
  24. pais = col_character(),
  25. regiao = col_character(),
  26. uf = col_character(),
  27. mesorregiao = col_character(),
  28. microrregiao = col_character(),
  29. municipio = col_character(),
  30. st_acidente_feriado = col_character(),
  31. ds_agente_causador = col_character(),
  32. ano_cat = col_integer(),
  33. ds_cnae_classe_cat = col_character(),
  34. dt_acidente = col_date(),
  35. st_dia_semana_acidente = col_character(),
  36. ds_emitente_cat = col_character(),
  37. hora_acidente = col_time(),
  38. idade_cat = col_integer(),
  39. cd_indica_obito = col_character(),
  40. ds_natureza_lesao = col_character(),
  41. ds_cbo = col_character(),
  42. ds_parte_corpo_atingida = col_character(),
  43. cd_tipo_sexo_empregado_cat = col_character(),
  44. ds_tipo_acidente = col_character(),
  45. ds_tipo_local_acidente = col_character()
  46. ))
  47. # Remove unnecessary columns
  48. complete <- complete[-c(7, 9, 11, 12)]
  49. complete <- drop_na(complete)
  50. # Change absolute hour to "shifts"
  51. complete <- complete %>% mutate(turno = case_when(
  52. hora_acidente >= hm("00:00") & hora_acidente < hm("06:00") ~ "Madrugada",
  53. hora_acidente >= hm("06:00") & hora_acidente < hm("12:00") ~ "Manhã",
  54. hora_acidente >= hm("12:00") & hora_acidente < hm("18:00") ~ "Tarde",
  55. hora_acidente >= hm("18:00") & hora_acidente <= hm("23:59") ~ "Noite",
  56. TRUE ~ NA_character_))
  57. # Remove absolute hour
  58. complete <- complete[-10]
  59. # Summarize for every scope:
  60. country <- group_by(complete, pais) %>% summarize_scope()
  61. by_region <- group_by(complete, pais, regiao) %>% summarize_scope()
  62. by_uf <- group_by(complete, regiao, uf) %>% summarize_scope()
  63. by_meso <- group_by(complete, uf, mesorregiao) %>% summarize_scope()
  64. by_micro <- group_by(complete, mesorregiao, microrregiao) %>% summarize_scope()
  65. by_town <- group_by(complete, microrregiao, municipio) %>% summarize_scope()
  66. # We need full information for towns
  67. by_town <- brasil %>% inner_join(by_town, by = c("microrregiao", "municipio"))
  68. # Write summaries
  69. write_delim(country, "../app/data/treemap/sumario-pais.csv", delim = ";")
  70. write_delim(by_region, "../app/data/treemap/sumario-regiao.csv", delim = ";")
  71. write_delim(by_uf, "../app/data/treemap/sumario-uf.csv", delim = ";")
  72. write_delim(by_meso, "../app/data/treemap/sumario-meso.csv", delim = ";")
  73. write_delim(by_micro, "../app/data/treemap/sumario-micro.csv", delim = ";")
  74. write_delim(by_town, "../app/data/treemap/sumario-municipio.csv", delim = ";")