## select
find_all()๊ณผ ๋ง์ฐฌ๊ฐ์ง๋ก ๋งค์นญ๋๋ ๋ชจ๋ ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌ์คํธ๋ก ๋ณํ
select_one()์ผ๋ก ํ๋์ ๊ฒฐ๊ณผ๋ง ๋ฐํํ๋ ๊ฒ๋ ๊ฐ๋ฅ
ํด๋์ค๋ ๋ง์นจํ(.) ์์ด๋๋ ์ต(#)์ผ๋ก, ์์ํ๊ทธ๋ ๋์ด์ฐ๊ธฐ๋ก ํํ
print(soup.select("p")) #pํ๊ทธ
print(soup.select(".d")) # class๊ฐ d์ธ ํ๊ทธ
print(soup.select("p.d")) # class๊ฐ d์ธ pํ๊ทธ
print(soup.select("#i")) # id๊ฐ i์ธ ํ๊ทธ
print(soup.select("p#i")) # id๊ฐ i์ธ pํ๊ทธ
print(soup.select("body p")) #body์ ์์์ธ p ํ๊ทธ
ํฌ๋กค๋ง ์์
f12 ๊ฐ๋ฐ์ ๋๊ตฌ๋ก ์กฐํ
๋์ ํฌ๋กค๋ง
https://chromedriver.chromium.org/downloads
๋ด ํฌ๋กฌ ๋ฒ์ ๊ณผ ๋ง๋ ๋ฒ์ ์ ๋ค์ด๋ก๋ ๋ฐ๋๋ค.
์ ๋ ๋์ด ์์๋์ง ์๋๋ค.
cmd์ฐฝ์ pip install senium์ ์ ๋ ฅํ๋ค.
#Selenum์ผ๋ก Dom์ ์ ๊ทผํ๋ ๋ฐฉ๋ฒ
๋จ์ผ ๊ฐ์ฒด ๋ฐํ(bs4์ find()์ ๊ฐ์ ํํ)
find_element
๋ฆฌ์คํธ ๊ฐ์ฒด ๋ฐํ(bs4์ find_all()๊ณผ ๊ฐ์ ํํ)
find_elements
# ์น ์ ์ํ๊ธฐ
url = "https://www.naver.com"
driver = webdriver.Chrome("chromedriver")
driver.get(url) # url ์ ์
## css_selector
bs4์ select()์ ๋์ผ
url = "https://pjt3591oo.github.io"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(by = By.CSS_SELECTOR, value = "div.p")
print(selected)
print(selected.tag_name)
print(selected.text)
selected = driver.find_elements(By.CSS_SELECTOR, "div.p")
print(selected)
์๋ ์์ ์ ๊ทผ
-bs4 ์๋ ๋ค๋ฅด๊ฒ ์๋ ์์์ ์ ๊ทผํ๋ฉด ์๋ฌ๋ฅผ ๋์
no such Element Exception
๋ง์ฐ์ค ์ ์ด
url = "https://pjt3591oo.github.io"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(by = By.CSS_SELECTOR, value = "div.p a")
print(selected)
print(selected.text)
selected.click()
์ค๋ฅ ํ์ด์ง
๋ฉ์ธ ํ์ด์ง์์ ๋ํธ๋ฆฌ๋ฅผ ๋ถ๋ฌ์ค๊ณ ๋ค๋ฅธ ํ์ด์ง๋ก ๋์ด๊ฐ๋ฉด ๊ทธ ์ ์ ๊ฐ์ ธ์จ ํ์ด์ง๋ ์ฌ์ฉํ ์ ์๊ฒ ๋จ
๋ฐ๋ผ์ click์ ํ์ด์ง ์ด๋ ์ฉ๋๋ก ์ฌ์ฉํ๋ ๊ฒ์ ๊ฐ๊ธ์ ํผํ๋ ๊ฒ์ด ์ข์
ํ์ด์ง ๋ณํ ์์ด ํ์ด์ง ๋ด์์ ๋ฐ์ดํฐ๊ฐ ๋ณํ๋๋ ๊ฒฝ์ฐ์ ์ฌ์ฉํ๋ ๊ฒ์ ๊ถ์ฅ
ํค๋ณด๋ ์ ์ด
url = "https://pjt3591oo.github.io/search"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(By.CSS_SELECTOR, "input#search-box")
selected.send_keys("test")
์ํฐํค
selected.send_keys(Keys.ENTER)
selenium๊ณผ bs4์ ์กฐํฉ
page_source : ํ์ฌ ์น ๋ธ๋ผ์ฐ์ ์ HTML ์ฝ๋๋ฅผ ๊ฐ์ ธ์ด
url = "https://pjt3591oo.github.io"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
soup = BeautifulSoup(driver.page_source, "lxml")
print(soup.select("div"))
url = "https://pjt3591oo.github.io/search"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(By.CSS_SELECTOR, "input#search-box")
selected.send_keys("test")
selected.send_keys(Keys.ENTER)
soup = BeautifulSoup(driver.page_source, "lxml")
items = soup.select("ul#search-results li")
for item in items:
title = item.select_one("h3").text
description = item.select_one("p").text
print(title)
print(description)
์์ # ๋ค์ด๋ฒ์์ ๊ณ ์ด๋์น ๊ฒ์ ํ ๊ณ ์ด๋์น ์ง์๋ฐฑ๊ณผ ์ ์
๋ฐฉ๋ฒ 1
# ๋ค์ด๋ฒ์์ ๊ณ ์ด๋์น ๊ฒ์ ํ ๊ณ ์ด๋์น ์ง์๋ฐฑ๊ณผ ์ ์
url = "https://www.naver.com/"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
driver.implicitly_wait(3) # ๋ฌต์์ ๋๊ธฐ
search = driver.find_element(By.CSS_SELECTOR, "input#query")
search.send_keys("๊ณ ์ด๋์น")
search.send_keys(Keys.ENTER)
post = driver.find_element(By.CSS_SELECTOR, "a.area_text_title")
post.click()
๋ฐฉ๋ฒ 2
url = "https://www.naver.com/"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
driver.implicitly_wait(3) # ๋ฌต์์ ๋๊ธฐ
search = driver.find_element(By.CSS_SELECTOR, "input#query")
selected.send_keys("๊ณ ์ด๋์น")
selected.send_keys(Keys.ENTER)
selected = driver.find_element(By.CSS_SELECTOR, "div.title_area a")
selected.click()
๋ฌต์์ ๋๊ธฐ
driver.implicitly wait(3) # ์ต๋ 3์ด๋ฅผ ์ผ
import time
time.sleep(1)
'๐์น ๊ฐ๋ฐ(Web) > ๐ํ์ด์ฌ(Python)' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
ํ์ด์ฌ ์น ํฌ๋กค๋ง Beautiful Soup (2) | 2023.05.09 |
---|---|
ํ์ด์ฌ json (0) | 2023.02.08 |
ํ์ผ๋ณต์ฌ/csvํ์ผ ์ฝ๊ธฐ ์ฐ๊ธฐ (0) | 2023.02.08 |
ํ์ผ ๋ง๋ค๊ณ ์ฝ๊ธฐ (0) | 2023.02.06 |
์ง์ญ๋ณ์(local) ์ ์ญ๋ณ์(global) (0) | 2023.02.01 |