ooguz
/
web-crawler-rap


			
				
					
						
						
							1234567891011121314151617181920212223242526272829303132333435
							#!/usr/bin/python3

# web-crawler-rap v0.0.1
# Copyright (C) 2017 Özcan Oğuz, Zeynep Topsakal
#
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU General
# Public License as published by the Free Software Foundation, either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the 
# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along with this program.
# If not, see <http://www.gnu.org/licenses/>.

from bs4 import BeautifulSoup;
import requests;
import urllib;
import re;

sarkiDosyasi = open('sarkilar.txt', 'r');
sarki = sarkiDosyasi.readlines();

for i in range(0, len(sarki)):
	source = requests.get(sarki[i]);
	icerik = source.content;
	crawler = BeautifulSoup(icerik,"html.parser");
	dosya = open('sozler/{}.txt'.format(crawler.html.head.title.string),'a');
	sozlerHam = crawler.find('div', {"class" : "reading"});
	sozler = str(sozlerHam);
	sarkiSozleri = re.sub('<.*?>', '\n', sozler);
	sarkiSozleri = re.sub('.*[;:]', '\n', sarkiSozleri);
	dosya.write(sarkiSozleri);
	print (i, "numaralı" , crawler.html.head.title.string , "yazıldı.");