summary.R 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/env Rscript
  2. library(tidyverse)
  3. brasil <- read_csv2("../app/data/brasil.csv")
  4. data <- read_csv2("../app/data/dados_cat.csv", na = "NA",
  5. col_types = cols(
  6. st_acidente_feriado = col_character(),
  7. ds_agente_causador = col_character(),
  8. ano_cat = col_integer(),
  9. ds_cnae_classe_cat = col_character(),
  10. dt_acidente = col_date(format = "%d/%m/%Y"),
  11. st_dia_semana_acidente = col_character(),
  12. ds_emitente_cat = col_character(),
  13. hora_acidente = col_time(format = "%H%M"),
  14. idade_cat = col_integer(),
  15. cd_indica_obito = col_character(),
  16. nm_municipio = col_character(),
  17. nome_uf = col_character(),
  18. ds_natureza_lesao = col_character(),
  19. ds_cbo = col_character(),
  20. ds_parte_corpo_atingida = col_character(),
  21. cd_tipo_sexo_empregado_cat = col_character(),
  22. ds_tipo_acidente = col_character(),
  23. ds_tipo_local_acidente = col_character()
  24. ))
  25. # Use better variable names for dataset and put locality data in front
  26. data <- rename(data, uf = nome_uf,
  27. municipio = nm_municipio) %>%
  28. select(uf, municipio, everything())
  29. # Add correponding locality data from brasil to data
  30. complete <- brasil %>% inner_join(data, by = c("uf", "municipio"))
  31. write_delim(complete, "../app/data/completo.csv", delim = ";")
  32. # Number of accidents:
  33. country <- group_by(complete, pais) %>% summarize(acidentes = n())
  34. by_region <- group_by(complete, regiao) %>% summarize(acidentes = n())
  35. by_uf <- group_by(complete, uf) %>% summarize(acidentes = n())
  36. by_meso <- group_by(complete, mesorregiao) %>% summarize(acidentes = n())
  37. by_micro <- group_by(complete, microrregiao) %>% summarize(acidentes = n())
  38. by_town <- group_by(complete, municipio) %>% summarize(acidentes = n())
  39. # Write the summaries
  40. write_delim(country, "../app/data/acidentes-total.csv", delim = ";")
  41. write_delim(by_region, "../app/data/acidentes-regiao.csv", delim = ";")
  42. write_delim(by_uf, "../app/data/acidentes-uf.csv", delim = ";")
  43. write_delim(by_meso, "../app/data/acidentes-meso.csv", delim = ";")
  44. write_delim(by_micro,"../app/data/acidentes-micro.csv", delim = ";")
  45. write_delim(by_town, "../app/data/acidentes-municipio.csv", delim = ";")
  46. # Put every accident alongside locality (this is temporary)
  47. acidentes <- brasil %>% inner_join(country, by = c("pais")) %>%
  48. inner_join(by_region, by = c("regiao")) %>%
  49. inner_join(by_uf, by = c("uf")) %>%
  50. inner_join(by_meso, by = c("mesorregiao")) %>%
  51. inner_join(by_micro, by = c("microrregiao")) %>%
  52. inner_join(by_town, by = c("municipio")) %>%
  53. select(pais,
  54. total = acidentes.x,
  55. regiao,
  56. acidentes_regiao = acidentes.y,
  57. uf,
  58. acidentes_uf = acidentes.x.x,
  59. mesorregiao,
  60. acidentes_meso = acidentes.y.y,
  61. microrregiao,
  62. acidentes_micro = acidentes.x.x.x,
  63. municipio,
  64. acidentes_municipio = acidentes.y.y.y)
  65. write_delim(acidentes, "../app/data/acidentes.csv", delim = ";")
  66. #Summarization by ds_tipo_local_acidente(axis) and ano_cat(draw lines) for radarchart
  67. by_local <- data %>% group_by(ano_cat, ds_tipo_local_acidente) %>% summarize(acidentes = n())
  68. b2012 <- by_local %>% filter(ano_cat == 2012) %>%
  69. mutate(total = sum(acidentes), porcentagem = acidentes / total)
  70. b2013 <- by_local %>% filter(ano_cat == 2013) %>%
  71. mutate(total = sum(acidentes), porcentagem = acidentes / total)
  72. b2014 <- by_local %>% filter(ano_cat == 2014) %>%
  73. mutate(total = sum(acidentes), porcentagem = acidentes / total)
  74. b2015 <- by_local %>% filter(ano_cat == 2015) %>%
  75. mutate(total = sum(acidentes), porcentagem = acidentes / total)
  76. b2016 <- by_local %>% filter(ano_cat == 2016) %>%
  77. mutate(total = sum(acidentes), porcentagem = acidentes / total)
  78. #Summarization by ano_cat(axis) and sex(draw lines) for radarchart
  79. #Uncomment if necessary
  80. #by_sex <- data %>% group_by(ano_cat, cd_tipo_sexo_empregado_cat) %>% summarize(acidentes = n())
  81. #s2012 <- by_sex %>% filter(ano_cat == 2012) %>% mutate(total = sum(acidentes), porcentagem = acidentes / total)
  82. #s2013 <- by_sex %>% filter(ano_cat == 2013) %>% mutate(total = sum(acidentes), porcentagem = acidentes / total)
  83. #s2014 <- by_sex %>% filter(ano_cat == 2014) %>% mutate(total = sum(acidentes), porcentagem = acidentes / total)
  84. #s2015 <- by_sex %>% filter(ano_cat == 2015) %>% mutate(total = sum(acidentes), porcentagem = acidentes / total)
  85. #s2016 <- by_sex %>% filter(ano_cat == 2016) %>% mutate(total = sum(acidentes), porcentagem = acidentes / total)
  86. #Sumarization of places by ano_cat and acidentes.
  87. by_uf_municipio <- complete %>% group_by(uf, municipio, ano_cat) %>% summarize(acidentes = n())
  88. write_delim(by_uf_municipio, "../app/data/uf-municipio.csv", delim = ";")