import requests
response = requests.get("http://www.naver.com")
html = response.text
print(html)
2. 원하는 태그를 선택하기 : Beautifulsoup
import requests
# no module named 'bs4'일 경우 : 터미널에 pip install bs4 입력
from bs4 import BeautifulSoup
# naver 서버에 대화를 시도
response = requests.get("http://www.naver.com/")
# naver 에서 html 줌
html = response.text
# html 번역선생님(html.parser)으로 수프 만듦
soup = BeautifulSoup(html, 'html.parser')
# id 값이 #NM_set_home_btn인 요소 한 개를 찾아냄
word = soup.select_one('#NM_set_home_btn')
# 텍스트요소만 출력
print(word.text)
3. 뉴스제목과 링크 가져오기 : for문
import requests
from bs4 import BeautifulSoup
response = requests.get(
"https://search.naver.com/search.naver?where=news&sm=tab_jum&query=ssafy")
html = response.text
soup = BeautifulSoup(html, 'html.parser')
links = soup.select(".news_tit") # 결과는 리스트
for link in links:
title = link.text # 태그 안에 텍스트요소를 가져옴
url = link.attrs['href'] # href의 속성값을 가져옴
print(title, url)
3-1 url 구성요소
4. 검색어 변경하기
import requests
from bs4 import BeautifulSoup
keyword = input("검색어를 입력하세요>>>")
response = requests.get(
"https://search.naver.com/search.naver?where=news&sm=tab_jum&query="+keyword)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
links = soup.select(".news_tit")
for link in links:
title = link.text
url = link.attrs['href']
print(title, url)
4-1. 검색창띄우기 (pyautogui ; 파이오토지유아이)
import requests
from bs4 import BeautifulSoup
import pyautogui
keyword = pyautogui.prompt("검색어를 입력하세요")
response = requests.get(
f"https://search.naver.com/search.naver?where=news&sm=tab_jum&query={keyword}") # f string
html = response.text
soup = BeautifulSoup(html, 'html.parser')
links = soup.select(".news_tit")
for link in links:
title = link.text
url = link.attrs['href']
print(title, url)
5. 여러 페이지 결과 가져오기 : 반복문
for i in range(시작, 끝, 단계)
for i in range(1, 30, 10) #1, 11, 21
import requests
from bs4 import BeautifulSoup
import pyautogui
keyword = pyautogui.prompt("검색어를 입력하세요")
lastpage = pyautogui.prompt("마지막 페이지번호를 입력해주세요")
pageNum = 1
for i in range(1, int(lastpage)*10, 10):
print(f"{pageNum}페이지 입니다.==============================================")
response = requests.get(
f"https://search.naver.com/search.naver?where=news&sm=tab_jum&query={keyword}&start={i}")
html = response.text
soup = BeautifulSoup(html, 'html.parser')
links = soup.select(".news_tit")
for link in links:
title = link.text
url = link.attrs['href']
print(title, url)
pageNum += 1