summary.R 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. library(tidyverse)
  2. brasil <- read_csv2("app/data/brasil.csv")
  3. data <- read_csv2("app/data/amostra.csv", na = "NA",
  4. col_types = cols(
  5. st_acidente_feriado = col_character(),
  6. ds_agente_causador = col_character(),
  7. ano_cat = col_integer(),
  8. ds_cnae_classe_cat = col_character(),
  9. dt_acidente = col_date(format = "%d/%m/%Y"),
  10. st_dia_semana_acidente = col_character(),
  11. ds_emitente_cat = col_character(),
  12. hora_acidente = col_time(format = "%H%M"),
  13. idade_cat = col_integer(),
  14. cd_indica_obito = col_character(),
  15. nm_municipio = col_character(),
  16. nome_uf = col_character(),
  17. ds_natureza_lesao = col_character(),
  18. ds_cbo = col_character(),
  19. ds_parte_corpo_atingida = col_character(),
  20. cd_tipo_sexo_empregado_cat = col_character(),
  21. ds_tipo_acidente = col_character(),
  22. ds_tipo_local_acidente = col_character()
  23. ))
  24. # Remove codenames for localities and use better names for variables
  25. #brasil <- brasil %>%
  26. # select(Nome_UF, Nome_Mesorregião, Nome_Microrregião, Nome_Município) %>%
  27. # rename(uf = Nome_UF,
  28. # mesorregiao = Nome_Mesorregião,
  29. # microrregiao = Nome_Microrregião,
  30. # municipio = Nome_Município)
  31. # Use better variable names for dataset and put locality data in front
  32. data <- rename(data, uf = nome_uf,
  33. municipio = nm_municipio) %>%
  34. select(uf, municipio, everything())
  35. # Add correponding locality data from brasil to data
  36. complete <- brasil %>% inner_join(data, by = c("uf", "municipio"))
  37. write.csv2(complete, "completo.csv", row.names=FALSE)
  38. # Number of accidents:
  39. country <- group_by(complete, pais) %>% summarize(acidentes = n())
  40. by_region <- group_by(complete, regiao) %>% summarize(acidentes = n())
  41. by_uf <- group_by(complete, uf) %>% summarize(acidentes = n())
  42. by_meso <- group_by(complete, mesorregiao) %>% summarize(acidentes = n())
  43. by_micro <- group_by(complete, microrregiao) %>% summarize(acidentes = n())
  44. by_town <- group_by(complete, municipio) %>% summarize(acidentes = n())
  45. # Write the summaries
  46. write.csv2(country, "acidentes-total.csv", row.names=FALSE)
  47. write.csv2(by_region, "acidentes-regiao.csv", row.names=FALSE)
  48. write.csv2(by_uf, "acidentes-uf.csv", row.names=FALSE)
  49. write.csv2(by_meso, "acidentes-meso.csv", row.names=FALSE)
  50. write.csv2(by_micro,"acidentes-micro.csv", row.names=FALSE)
  51. write.csv2(by_town, "acidentes-municipio.csv", row.names=FALSE)
  52. # Put everything accident alongside locality (this is temporary)
  53. acidentes <- brasil %>% inner_join(country, by = c("pais")) %>%
  54. inner_join(by_region, by = c("regiao")) %>%
  55. inner_join(by_uf, by = c("uf")) %>%
  56. inner_join(by_meso, by = c("mesorregiao")) %>%
  57. inner_join(by_micro, by = c("microrregiao")) %>%
  58. inner_join(by_town, by = c("municipio")) %>%
  59. select(pais,
  60. total = acidentes.x,
  61. regiao,
  62. acidentes_regiao = acidentes.y,
  63. uf,
  64. acidentes_uf = acidentes.x.x,
  65. mesorregiao,
  66. acidentes_meso = acidentes.y.y,
  67. microrregiao,
  68. acidentes_micro = acidentes.x.x.x,
  69. municipio,
  70. acidentes_municipio = acidentes.y.y.y)
  71. write.csv2(acidentes, "acidentes.csv", row.names=FALSE)