generator.py 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. import asyncio
  2. from pprint import pprint
  3. from bs4 import BeautifulSoup
  4. import requests
  5. import random
  6. from parsing.task_text_parser import get_task_text
  7. from . import get_files
  8. from . import task_text_parser
  9. # from . import *
  10. # from task_text_parser import get_task_text
  11. async def generate_random_variant(var_num):
  12. URL = f'https://kpolyakov.spb.ru/school/ege/gen.php?action=viewVar&answers=on&varId={var_num}'
  13. response = requests.get(URL)
  14. soup = BeautifulSoup(response.text, 'html.parser')
  15. center = soup.find('div', class_='center')
  16. tasks_table = center.find('table', class_='vartopic')
  17. tasks_td = tasks_table.findAll('td', class_='topicview')
  18. tasks_script = [i.find('script') for i in tasks_td]
  19. answer_table = center.find('table', class_='varanswer')
  20. answer_td_list = answer_table.findAll('td', class_='answer')
  21. answers = []
  22. for i, elem in enumerate(answer_td_list):
  23. if elem.text:
  24. answers.append(elem.text)
  25. else:
  26. answers.append(str(elem).split("'")[1].replace('<br/>', ' '))
  27. if i == 18:
  28. answers[-1] = answers[-1].replace('\r', '')
  29. answers[-1] = answers[-1].replace('\t', '')
  30. byte_img_list = []
  31. byte_excel_list = []
  32. byte_word_list = []
  33. byte_txt_list = []
  34. for task_number in range(len(tasks_script)):
  35. """
  36. making bytes objects lists
  37. """
  38. task_script_text = str(tasks_script[task_number])
  39. # --- making xls and docx bytes lists ---
  40. begin_index = task_script_text.find('<a') + 9
  41. end_index = 0
  42. for i in range(begin_index, len(task_script_text)):
  43. if task_script_text[i] == '"':
  44. end_index = i
  45. break
  46. address = task_script_text[begin_index:end_index]
  47. if 'xls' in task_script_text:
  48. byte_excel_list.append(get_files.get_excel(address))
  49. else:
  50. byte_excel_list.append(None)
  51. if 'docx' in task_script_text:
  52. byte_word_list.append(get_files.get_word(address))
  53. else:
  54. byte_word_list.append(None)
  55. # --- making img byte list
  56. if 'img' in task_script_text:
  57. begin_index = task_script_text.find('img') + 9
  58. end_index = 0
  59. for i in range(begin_index, len(task_script_text)):
  60. if task_script_text[i] == '"':
  61. end_index = i
  62. break
  63. img_address = task_script_text[begin_index:end_index]
  64. byte_img_list.append(get_files.get_photo(img_address))
  65. else:
  66. byte_img_list.append(None)
  67. # --- making txt bytes list ---
  68. if '<a' in task_script_text and 'txt' in task_script_text and task_number != 10 - 1:
  69. # task_number == 27
  70. if task_number == 27 - 3:
  71. begin_index_1, begin_index_2 = task_script_text.find('<a') + 9, \
  72. task_script_text.rfind('<a') + 9
  73. end_index_1, end_index_2 = 0, 0
  74. for i in range(begin_index_1, len(task_script_text)):
  75. if task_script_text[i] == '"':
  76. end_index_1 = i
  77. break
  78. for i in range(begin_index_2, len(task_script_text)):
  79. if task_script_text[i] == '"':
  80. end_index_2 = i
  81. break
  82. txg_address_1 = task_script_text[begin_index_1:end_index_1]
  83. txg_address_2 = task_script_text[begin_index_2:end_index_2]
  84. byte_txt_list.append((get_files.get_word(txg_address_1), get_files.get_word(txg_address_2)))
  85. else:
  86. begin_index = task_script_text.find('<a') + 9
  87. end_index = 0
  88. for i in range(begin_index, len(task_script_text)):
  89. if task_script_text[i] == '"':
  90. end_index = i
  91. break
  92. txg_address_1 = task_script_text[begin_index:end_index]
  93. byte_txt_list.append(get_files.get_word(txg_address_1))
  94. else:
  95. byte_txt_list.append(None)
  96. tasks_description = []
  97. for task_number in range(len(tasks_script)):
  98. tasks_description.append(await get_task_text(tasks_script[task_number]))
  99. # add a hint to task 19-21, because there are 3 answers
  100. if task_number + 1 == 19:
  101. tasks_description[task_number] += '\n Ответы на каждый из трех вопросов вводите в новой' \
  102. ' строке, а ответы внутри одного' \
  103. ' вопроса пробелом'
  104. variant = []
  105. task_number = 1
  106. for task_description, answer, byte_img, byte_excel, byte_word, byte_txt in zip(tasks_description,
  107. answers,
  108. byte_img_list,
  109. byte_excel_list,
  110. byte_word_list,
  111. byte_txt_list):
  112. if task_number == 20:
  113. task_number = 22
  114. variant.append(None)
  115. variant.append(None)
  116. if isinstance(byte_txt, tuple):
  117. byte_txt_1 = byte_txt[0]
  118. byte_txt_2 = byte_txt[1]
  119. else:
  120. byte_txt_1 = byte_txt
  121. byte_txt_2 = None
  122. task = dict()
  123. task['description'] = task_description
  124. task['answer'] = answer
  125. if byte_img:
  126. with open(f'data/temp_task_files/{task_number}.png', 'wb') as img:
  127. img.write(byte_img)
  128. task['image'] = byte_img
  129. if byte_excel:
  130. with open(f'data/temp_task_files/{task_number}.xlsx', 'wb') as xls:
  131. xls.write(byte_excel)
  132. task['excel'] = byte_excel
  133. if byte_word:
  134. with open(f'data/temp_task_files/{task_number}.docx', 'wb') as docx:
  135. docx.write(byte_word)
  136. task['word'] = byte_word
  137. if byte_txt_1:
  138. with open(f"data/temp_task_files/{task_number}_A.txt", 'wb') as docx:
  139. docx.write(byte_txt_1)
  140. task['txt1'] = byte_txt_1
  141. if byte_txt_2:
  142. with open(f"data/temp_task_files/{task_number}_B.txt", 'wb') as docx:
  143. docx.write(byte_txt_2)
  144. task['txt2'] = byte_txt_2
  145. variant.append(task)
  146. task_number += 1
  147. return variant
  148. if __name__ == '__main__':
  149. asyncio.run(generate_random_variant(1))