text.py 601 B

123456789101112131415161718192021
  1. def get_separated_text_from_file(file: str, repl_dict=None) -> list[str]:
  2. new_list = []
  3. with open(file, "r") as f:
  4. text = f.read()
  5. text_list = text.split('\n')
  6. for t in text_list:
  7. if len(t.strip()) > 1:
  8. if len(t.strip()) > 1000:
  9. p = [s for s in t.split(". ") if len(s.strip()) > 1]
  10. new_list += p
  11. else:
  12. new_list.append(t)
  13. return replace_by_dict(new_list, repl_dict) if repl_dict else new_list
  14. def replace_by_dict(paragraphs: list[str], repl_dict: dict) -> list[str]:
  15. return paragraphs