yemekhanejson.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. #!/bin/python3
  2. import subprocess, json
  3. import urllib.request
  4. days = ["Pazartesi","Salı","Çarşamba","Perşembe","Cuma","Cumartesi","Pazar"]
  5. def main():
  6. urllib.request.urlretrieve('http://www.boun.edu.tr/Assets/Documents/Content/Public/kampus_hayati/yemek_listesi.pdf','yemek_listesi.pdf')
  7. pdfdata = subprocess.run(['pdftotext', '-layout', 'yemek_listesi.pdf', '-'],stdout=subprocess.PIPE)
  8. pdftext = pdfdata.stdout.decode('utf-8')
  9. pdftext = pdftext.splitlines()
  10. days_list = []
  11. for line in pdftext:
  12. for day in days:
  13. if day in line:
  14. day_index = pdftext.index(line)
  15. date_index = day_index - 2
  16. corba_index = date_index - 1
  17. line = line.strip().split()
  18. m_date = pdftext[date_index].strip().split()[0]
  19. m_day = pdftext[day_index].strip().split()[0]
  20. m_corba = list(filter(None, pdftext[corba_index].strip().split(' ')))
  21. m_corba = [x.strip() for x in m_corba]
  22. m_o_corba = None
  23. m_a_corba = None
  24. if not len(m_corba) == 0:
  25. m_o_corba = m_corba[0]
  26. if not len(m_corba) == 1:
  27. m_a_corba = m_corba[1]
  28. m_ana = list(filter(None, pdftext[date_index].strip().split(' ')))
  29. m_ana = [x.strip() for x in m_ana]
  30. m_o_ana = None
  31. m_a_ana = None
  32. if not len(m_ana) == 1:
  33. m_o_ana = m_ana[1]
  34. if not len(m_ana) ==2:
  35. m_a_ana = m_ana[2]
  36. m_vegan = list(filter(None, pdftext[date_index+1].strip().split(' ')))
  37. m_vegan = [x.strip() for x in m_vegan]
  38. m_o_vegan = None
  39. m_a_vegan = None
  40. if not len(m_vegan) == 0:
  41. m_o_vegan = m_vegan[0]
  42. if not len(m_vegan) == 1:
  43. m_a_vegan = m_vegan[1]
  44. m_kar = list(filter(None, pdftext[day_index].strip().split(' ')))
  45. m_kar = [x.strip() for x in m_kar]
  46. m_o_kar = None
  47. m_a_kar = None
  48. if not len(m_kar) == 1:
  49. m_o_kar = m_kar[1]
  50. if not len(m_kar) ==2:
  51. m_a_kar = m_kar[2]
  52. m_tat = list(filter(None, pdftext[day_index+1].strip().split(' ')))
  53. m_tat = [x.strip() for x in m_tat]
  54. m_o_tat = None
  55. m_a_tat = None
  56. if not len(m_tat) == 0:
  57. m_o_tat = m_tat[0]
  58. if not len(m_tat) == 1:
  59. m_a_tat = m_tat[1]
  60. item = {
  61. "date" : m_date,
  62. "day" : m_day,
  63. "o_corba" : m_o_corba,
  64. "a_corba" : m_a_corba,
  65. "o_ana" : m_o_ana,
  66. "a_ana" : m_a_ana,
  67. "o_kar" : m_o_kar,
  68. "a_kar" : m_a_kar,
  69. "o_tat" : m_o_tat,
  70. "a_tat" : m_a_tat
  71. }
  72. days_list.append(item)
  73. break
  74. json_string = json.dumps(days_list, ensure_ascii=False)
  75. f = open('yemek_listesi.json', 'wt', encoding='utf-8')
  76. f.write(json_string)
  77. print(json_string)
  78. if __name__ == '__main__':
  79. main()