123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- #!/usr/bin/env Rscript
- library(tidyverse)
- library(lubridate)
- # Generate a summary considering current scope
- summarize_scope <- function(x) {
- summarize (x, acidentes = n(),
- ds_agente_causador = names(which.max(table(ds_agente_causador))),
- ds_cnae_classe_cat = names(which.max(table(ds_cnae_classe_cat))),
- ds_emitente_cat = names(which.max(table(ds_emitente_cat))),
- idade = mean(idade_cat),
- obito = sum(cd_indica_obito == "N")/(n()),
- ds_natureza_lesao = names(which.max(table(ds_natureza_lesao))),
- ds_cbo = names(which.max(table(ds_cbo))),
- ds_parte_corpo_atingida = names(which.max(table(ds_parte_corpo_atingida))),
- sexo = sum(cd_tipo_sexo_empregado_cat == "Masculino")/(n()),
- ds_tipo_acidente = names(which.max(table(ds_tipo_acidente))),
- ds_tipo_local_acidente = names(which.max(table(ds_tipo_local_acidente))),
- turno = names(which.max(table(turno)))
- )
- }
- brasil <- read_csv2("../app/data/brasil.csv")
- complete <- read_csv2("../app/data/completo.csv", na = c("NA", "", "Não informado"),
- col_types = cols(
- pais = col_character(),
- regiao = col_character(),
- uf = col_character(),
- mesorregiao = col_character(),
- microrregiao = col_character(),
- municipio = col_character(),
- st_acidente_feriado = col_character(),
- ds_agente_causador = col_character(),
- ano_cat = col_integer(),
- ds_cnae_classe_cat = col_character(),
- dt_acidente = col_date(),
- st_dia_semana_acidente = col_character(),
- ds_emitente_cat = col_character(),
- hora_acidente = col_time(),
- idade_cat = col_integer(),
- cd_indica_obito = col_character(),
- ds_natureza_lesao = col_character(),
- ds_cbo = col_character(),
- ds_parte_corpo_atingida = col_character(),
- cd_tipo_sexo_empregado_cat = col_character(),
- ds_tipo_acidente = col_character(),
- ds_tipo_local_acidente = col_character()
- ))
- # Remove unnecessary columns
- complete <- complete[-c(7, 9, 11, 12)]
- complete <- drop_na(complete)
- # Change absolute hour to "shifts"
- complete <- complete %>% mutate(turno = case_when(
- hora_acidente >= hm("00:00") & hora_acidente < hm("06:00") ~ "Madrugada",
- hora_acidente >= hm("06:00") & hora_acidente < hm("12:00") ~ "Manhã",
- hora_acidente >= hm("12:00") & hora_acidente < hm("18:00") ~ "Tarde",
- hora_acidente >= hm("18:00") & hora_acidente <= hm("23:59") ~ "Noite",
- TRUE ~ NA_character_))
- # Remove absolute hour
- complete <- complete[-10]
- # Summarize for every scope:
- country <- group_by(complete, pais) %>% summarize_scope()
- by_region <- group_by(complete, pais, regiao) %>% summarize_scope()
- by_uf <- group_by(complete, regiao, uf) %>% summarize_scope()
- by_meso <- group_by(complete, uf, mesorregiao) %>% summarize_scope()
- by_micro <- group_by(complete, mesorregiao, microrregiao) %>% summarize_scope()
- by_town <- group_by(complete, microrregiao, municipio) %>% summarize_scope()
- # We need full information for towns
- by_town <- brasil %>% inner_join(by_town, by = c("microrregiao", "municipio"))
- # Write summaries
- write_delim(country, "../app/data/treemap/sumario-pais.csv", delim = ";")
- write_delim(by_region, "../app/data/treemap/sumario-regiao.csv", delim = ";")
- write_delim(by_uf, "../app/data/treemap/sumario-uf.csv", delim = ";")
- write_delim(by_meso, "../app/data/treemap/sumario-meso.csv", delim = ";")
- write_delim(by_micro, "../app/data/treemap/sumario-micro.csv", delim = ";")
- write_delim(by_town, "../app/data/treemap/sumario-municipio.csv", delim = ";")
|