sozler.py 1.3 KB

1234567891011121314151617181920212223242526272829303132333435
  1. #!/usr/bin/python3
  2. # web-crawler-rap v0.0.1
  3. # Copyright (C) 2017 Özcan Oğuz, Zeynep Topsakal
  4. #
  5. # This program is free software: you can redistribute it and/or modify it under the terms of the GNU General
  6. # Public License as published by the Free Software Foundation, either version 3 of the License, or (at your
  7. # option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
  10. # implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
  11. # for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License along with this program.
  14. # If not, see <http://www.gnu.org/licenses/>.
  15. from bs4 import BeautifulSoup;
  16. import requests;
  17. import urllib;
  18. import re;
  19. sarkiDosyasi = open('sarkilar.txt', 'r');
  20. sarki = sarkiDosyasi.readlines();
  21. for i in range(0, len(sarki)):
  22. source = requests.get(sarki[i]);
  23. icerik = source.content;
  24. crawler = BeautifulSoup(icerik,"html.parser");
  25. dosya = open('sozler/{}.txt'.format(crawler.html.head.title.string),'a');
  26. sozlerHam = crawler.find('div', {"class" : "reading"});
  27. sozler = str(sozlerHam);
  28. sarkiSozleri = re.sub('<.*?>', '\n', sozler);
  29. sarkiSozleri = re.sub('.*[;:]', '\n', sarkiSozleri);
  30. dosya.write(sarkiSozleri);
  31. print (i, "numaralı" , crawler.html.head.title.string , "yazıldı.");