123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- #!/bin/python3
- import subprocess, json
- import urllib.request
- days = ["Pazartesi","Salı","Çarşamba","Perşembe","Cuma","Cumartesi","Pazar"]
- def main():
- urllib.request.urlretrieve('http://www.boun.edu.tr/Assets/Documents/Content/Public/kampus_hayati/yemek_listesi.pdf','yemek_listesi.pdf')
- pdfdata = subprocess.run(['pdftotext', '-layout', 'yemek_listesi.pdf', '-'],stdout=subprocess.PIPE)
- pdftext = pdfdata.stdout.decode('utf-8')
-
-
- pdftext = pdftext.splitlines()
-
-
- days_list = []
-
- for line in pdftext:
- for day in days:
- if day in line:
- day_index = pdftext.index(line)
- date_index = day_index - 2
- corba_index = date_index - 1
- line = line.strip().split()
-
-
- m_date = pdftext[date_index].strip().split()[0]
- m_day = pdftext[day_index].strip().split()[0]
-
- m_corba = list(filter(None, pdftext[corba_index].strip().split(' ')))
- m_corba = [x.strip() for x in m_corba]
-
- m_o_corba = None
- m_a_corba = None
- if not len(m_corba) == 0:
- m_o_corba = m_corba[0]
- if not len(m_corba) == 1:
- m_a_corba = m_corba[1]
- m_ana = list(filter(None, pdftext[date_index].strip().split(' ')))
- m_ana = [x.strip() for x in m_ana]
-
-
- m_o_ana = None
- m_a_ana = None
- if not len(m_ana) == 1:
- m_o_ana = m_ana[1]
- if not len(m_ana) ==2:
- m_a_ana = m_ana[2]
- m_vegan = list(filter(None, pdftext[date_index+1].strip().split(' ')))
- m_vegan = [x.strip() for x in m_vegan]
-
- m_o_vegan = None
- m_a_vegan = None
- if not len(m_vegan) == 0:
- m_o_vegan = m_vegan[0]
- if not len(m_vegan) == 1:
- m_a_vegan = m_vegan[1]
- m_kar = list(filter(None, pdftext[day_index].strip().split(' ')))
- m_kar = [x.strip() for x in m_kar]
-
-
- m_o_kar = None
- m_a_kar = None
- if not len(m_kar) == 1:
- m_o_kar = m_kar[1]
- if not len(m_kar) ==2:
- m_a_kar = m_kar[2]
- m_tat = list(filter(None, pdftext[day_index+1].strip().split(' ')))
- m_tat = [x.strip() for x in m_tat]
-
- m_o_tat = None
- m_a_tat = None
- if not len(m_tat) == 0:
- m_o_tat = m_tat[0]
- if not len(m_tat) == 1:
- m_a_tat = m_tat[1]
-
-
- item = {
- "date" : m_date,
- "day" : m_day,
- "o_corba" : m_o_corba,
- "a_corba" : m_a_corba,
- "o_ana" : m_o_ana,
- "a_ana" : m_a_ana,
- "o_kar" : m_o_kar,
- "a_kar" : m_a_kar,
- "o_tat" : m_o_tat,
- "a_tat" : m_a_tat
-
- }
-
- days_list.append(item)
-
- break
- json_string = json.dumps(days_list, ensure_ascii=False)
-
- f = open('yemek_listesi.json', 'wt', encoding='utf-8')
- f.write(json_string)
- print(json_string)
-
- if __name__ == '__main__':
- main()
|