## select
find_all()๊ณผ ๋ง์ฐฌ๊ฐ์ง๋ก ๋งค์นญ๋๋ ๋ชจ๋ ๊ฒฐ๊ณผ๋ฅผ ๋ฆฌ์คํธ๋ก ๋ณํ
select_one()์ผ๋ก ํ๋์ ๊ฒฐ๊ณผ๋ง ๋ฐํํ๋ ๊ฒ๋ ๊ฐ๋ฅ
ํด๋์ค๋ ๋ง์นจํ(.) ์์ด๋๋ ์ต(#)์ผ๋ก, ์์ํ๊ทธ๋ ๋์ด์ฐ๊ธฐ๋ก ํํ
print(soup.select("p")) #pํ๊ทธ
print(soup.select(".d")) # class๊ฐ d์ธ ํ๊ทธ
print(soup.select("p.d")) # class๊ฐ d์ธ pํ๊ทธ
print(soup.select("#i")) # id๊ฐ i์ธ ํ๊ทธ
print(soup.select("p#i")) # id๊ฐ i์ธ pํ๊ทธ
print(soup.select("body p")) #body์ ์์์ธ p ํ๊ทธ
ํฌ๋กค๋ง ์์
f12 ๊ฐ๋ฐ์ ๋๊ตฌ๋ก ์กฐํ
๋์ ํฌ๋กค๋ง
https://chromedriver.chromium.org/downloads
ChromeDriver - WebDriver for Chrome - Downloads
Current Releases If you are using Chrome version 114, please download ChromeDriver 114.0.5735.16 If you are using Chrome version 113, please download ChromeDriver 113.0.5672.63 If you are using Chrome version 112, please download ChromeDriver 112.0.5615.49
chromedriver.chromium.org
๋ด ํฌ๋กฌ ๋ฒ์ ๊ณผ ๋ง๋ ๋ฒ์ ์ ๋ค์ด๋ก๋ ๋ฐ๋๋ค.
์ ๋ ๋์ด ์์๋์ง ์๋๋ค.
cmd์ฐฝ์ pip install senium์ ์ ๋ ฅํ๋ค.
#Selenum์ผ๋ก Dom์ ์ ๊ทผํ๋ ๋ฐฉ๋ฒ
๋จ์ผ ๊ฐ์ฒด ๋ฐํ(bs4์ find()์ ๊ฐ์ ํํ)
find_element
๋ฆฌ์คํธ ๊ฐ์ฒด ๋ฐํ(bs4์ find_all()๊ณผ ๊ฐ์ ํํ)
find_elements
# ์น ์ ์ํ๊ธฐ
url = "https://www.naver.com"
driver = webdriver.Chrome("chromedriver")
driver.get(url) # url ์ ์
## css_selector
bs4์ select()์ ๋์ผ
url = "https://pjt3591oo.github.io"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(by = By.CSS_SELECTOR, value = "div.p")
print(selected)
print(selected.tag_name)
print(selected.text)
selected = driver.find_elements(By.CSS_SELECTOR, "div.p")
print(selected)
์๋ ์์ ์ ๊ทผ
-bs4 ์๋ ๋ค๋ฅด๊ฒ ์๋ ์์์ ์ ๊ทผํ๋ฉด ์๋ฌ๋ฅผ ๋์
no such Element Exception
๋ง์ฐ์ค ์ ์ด
url = "https://pjt3591oo.github.io"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(by = By.CSS_SELECTOR, value = "div.p a")
print(selected)
print(selected.text)
selected.click()
์ค๋ฅ ํ์ด์ง
๋ฉ์ธ ํ์ด์ง์์ ๋ํธ๋ฆฌ๋ฅผ ๋ถ๋ฌ์ค๊ณ ๋ค๋ฅธ ํ์ด์ง๋ก ๋์ด๊ฐ๋ฉด ๊ทธ ์ ์ ๊ฐ์ ธ์จ ํ์ด์ง๋ ์ฌ์ฉํ ์ ์๊ฒ ๋จ
๋ฐ๋ผ์ click์ ํ์ด์ง ์ด๋ ์ฉ๋๋ก ์ฌ์ฉํ๋ ๊ฒ์ ๊ฐ๊ธ์ ํผํ๋ ๊ฒ์ด ์ข์
ํ์ด์ง ๋ณํ ์์ด ํ์ด์ง ๋ด์์ ๋ฐ์ดํฐ๊ฐ ๋ณํ๋๋ ๊ฒฝ์ฐ์ ์ฌ์ฉํ๋ ๊ฒ์ ๊ถ์ฅ
ํค๋ณด๋ ์ ์ด
url = "https://pjt3591oo.github.io/search"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(By.CSS_SELECTOR, "input#search-box")
selected.send_keys("test")
์ํฐํค
selected.send_keys(Keys.ENTER)
selenium๊ณผ bs4์ ์กฐํฉ
page_source : ํ์ฌ ์น ๋ธ๋ผ์ฐ์ ์ HTML ์ฝ๋๋ฅผ ๊ฐ์ ธ์ด
url = "https://pjt3591oo.github.io"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
soup = BeautifulSoup(driver.page_source, "lxml")
print(soup.select("div"))
url = "https://pjt3591oo.github.io/search"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
selected = driver.find_element(By.CSS_SELECTOR, "input#search-box")
selected.send_keys("test")
selected.send_keys(Keys.ENTER)
soup = BeautifulSoup(driver.page_source, "lxml")
items = soup.select("ul#search-results li")
for item in items:
title = item.select_one("h3").text
description = item.select_one("p").text
print(title)
print(description)
์์ # ๋ค์ด๋ฒ์์ ๊ณ ์ด๋์น ๊ฒ์ ํ ๊ณ ์ด๋์น ์ง์๋ฐฑ๊ณผ ์ ์
๋ฐฉ๋ฒ 1
# ๋ค์ด๋ฒ์์ ๊ณ ์ด๋์น ๊ฒ์ ํ ๊ณ ์ด๋์น ์ง์๋ฐฑ๊ณผ ์ ์
url = "https://www.naver.com/"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
driver.implicitly_wait(3) # ๋ฌต์์ ๋๊ธฐ
search = driver.find_element(By.CSS_SELECTOR, "input#query")
search.send_keys("๊ณ ์ด๋์น")
search.send_keys(Keys.ENTER)
post = driver.find_element(By.CSS_SELECTOR, "a.area_text_title")
post.click()
๋ฐฉ๋ฒ 2
url = "https://www.naver.com/"
driver = webdriver.Chrome("chromedriver")
driver.get(url)
driver.implicitly_wait(3) # ๋ฌต์์ ๋๊ธฐ
search = driver.find_element(By.CSS_SELECTOR, "input#query")
selected.send_keys("๊ณ ์ด๋์น")
selected.send_keys(Keys.ENTER)
selected = driver.find_element(By.CSS_SELECTOR, "div.title_area a")
selected.click()
๋ฌต์์ ๋๊ธฐ
driver.implicitly wait(3) # ์ต๋ 3์ด๋ฅผ ์ผ
import time
time.sleep(1)
'๐์น ๊ฐ๋ฐ(Web) > ๐ํ์ด์ฌ(Python)' ์นดํ ๊ณ ๋ฆฌ์ ๋ค๋ฅธ ๊ธ
ํ์ด์ฌ ์น ํฌ๋กค๋ง Beautiful Soup (2) | 2023.05.09 |
---|---|
ํ์ด์ฌ json (0) | 2023.02.08 |
ํ์ผ๋ณต์ฌ/csvํ์ผ ์ฝ๊ธฐ ์ฐ๊ธฐ (0) | 2023.02.08 |
ํ์ผ ๋ง๋ค๊ณ ์ฝ๊ธฐ (0) | 2023.02.06 |
์ง์ญ๋ณ์(local) ์ ์ญ๋ณ์(global) (0) | 2023.02.01 |