티스토리 뷰

사용 라이브러리 : BeautifulSoup4

사용 파이썬 버전 : 2.7

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from __future__ import unicode_literals
import requests
from bs4 import BeautifulSoup
from urlparse import urljoin
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
 
def krcert_news(interval):
    interval+=1
    index = 1
 
    for index in range(interval): 
        title_url = 'http://krcert.or.kr/data/trendList.do?page={0}&sort_code=&search_sort=title_name&search_word='.format(index)
        title_response = requests.get(title_url)
 
        title_soup = BeautifulSoup(title_response.content, "html.parser")
 
        l_data = title_soup.find_all("td", {"class""colTit"})
 
        for items in l_data:
            items = items.find('a')
            urls = items['href']
            krcert_links = urljoin(title_url, urls)
            
            split_text = krcert_links.split("bulletin_writing_sequence=")[1]
            pat = re.compile("\d{5}", re.IGNORECASE)
            m = pat.match(split_text)
            article_number = m.group(0)
 
            content_url = "https://www.krcert.or.kr/data/trendView.do?bulletin_writing_sequence={0}".format(article_number)
            content_response = requests.get(content_url)
            content_soup = BeautifulSoup(content_response.content, "html.parser")
            content_data = content_soup.find_all("table", {"class""basicView"})
 
            for title in items:
                f_title = title+"["+krcert_links+"]"
                
                for content in content_data:
                    result=content.find('tbody')
                    f_content = result.text
                    
            with open ('krert_article.txt''a') as files:
                files.write(f_title+'\n'+f_content)
                
if __name__ == "__main__":
    krcert_news(1# 페이지 번호(한 페이지 = 10개의 게시물) 
 
cs


※ 문제가 있을 시에 삭제 조치 하겠습니다.

댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2024/05   »
1 2 3 4
5 6 7 8 9 10 11
12 13 14 15 16 17 18
19 20 21 22 23 24 25
26 27 28 29 30 31
글 보관함