Selenium
네이버 주식 크롤링
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
service = Service()
d = webdriver.Chrome(service=service)
try:
url = 'https://finance.naver.com/'
d.get(url)
trs = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#_topItems1 > tr')))
for tr in trs:
updown = tr.get_attribute("class")
if updown == "up":
sign = "+"
elif updown == "down":
sign = "-"
else: sign = ""
item = tr.find_element(By.CSS_SELECTOR, "th > a").text.strip()
tds = tr.find_elements(By.CSS_SELECTOR, "td")
price = tds[0].text.strip()
num = tds[1].text.strip().replace("\n", "")
rate = tds[2].text.strip()
print(f"{item}, {price}, {num}, {rate}")
except Exception as e:
print(e)
finally:
d.close()
d.quit()
인스타그램
Selenium 정보 저장
- 빈 파일 생성
- 로그인정보를 저장하기 위해 추가된 코드
# 크롬에 정보 저장
options = webdriver.ChromeOptions()
options.add_argument("--user-data-dir={디렉토리 경로}")
service = Service()
d = webdriver.Chrome(service=service, options=options)
- 코드 중간에
input()
을 넣어 두고 실행 (코드 일시정지 용도)
- 열린 크롬 창에서 인스타 로그인
input()
에 입력 하여 코드 종료
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
# 크롬에 정보 저장
options = webdriver.ChromeOptions()
options.add_argument("--user-data-dir=/Users/syshin/Desktop/Syshin/SeSAC/파이썬 데이터 처리 프로그래밍/4일차-/MyChrome")
service = Service()
d = webdriver.Chrome(service=service, options=options)
try:
url = 'https://www.instagram.com/'
d.get(url)
# 검색 버튼
buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".xvy4d1p")))
# buttons = d.find_elements(By.CSS_SELECTOR, ".xvy4d1p")
search_button = buttons[2]
# 검색 버튼 클릭
ac = ActionChains(d)
ac.move_to_element(search_button).click().pause(1)
ac.perform()
elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".x7xwk5j")))
ac.reset_actions()
# ac.move_to_element(elem).click().send_keys("#파이썬")
ac.send_keys_to_element(elem, "#파이썬")
ac.pause(1)
ac.move_by_offset(0,100)
ac.click()
ac.perform()
posts = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "._aabd")))
for post in posts:
ac.reset_actions()
ac.click(post)
# ac.pause(1)
ac.perform()
content = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "._a9zs > ._aade")))
print(content.text)
ac.reset_actions()
ac.send_keys(Keys.ESCAPE)
ac.perform()
except Exception as e:
print(e)
finally:
time.sleep(3)
d.close()
d.quit()
인스타 크롤링 고양이 검색, 엑셀에 저장 실습
from selenium import webdriver
from openpyxl import Workbook
from selenium.webdriver import ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
wb = Workbook()
ws = wb.active
options = ChromeOptions()
# options.add_argument("--headless") # 브라우저 안뜬 상태로 진행
# options.add_argument("--window-size=1920,1080)") # 브라우저 사이즈 조절
options.add_argument("--user-data-dir=/Users/syshin/Desktop/Syshin/SeSAC/파이썬 데이터 처리 프로그래밍/4일차-/MyChrome")
service = Service()
d = webdriver.Chrome(service=service, options=options)
url = "https://www.instagram.com"
try:
d.get(url)
buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.xvy4d1p')))
search_button = buttons[2]
ac = ActionChains(d)
ac.move_to_element(search_button).click().pause(1).perform()
elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
ac.reset_actions()
ac.send_keys_to_element(elem, "#고양이").pause(2)
ac.send_keys(Keys.TAB, Keys.TAB, Keys.ENTER).perform()
ac = ActionChains(d)
posts = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div._al3l > a._a6hd")))
for post in posts:
ac.reset_actions()
ac.click(post).perform()
content = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div._a9zs> h1._aade"))).text
ac.reset_actions()
ws.append([content])
ac.send_keys(Keys.ESCAPE).perform()
except Exception as e:
print(e)
finally:
time.sleep(3)
d.close()
d.quit()
wb.save("instagram.xlsx")
미니프로젝트-멜론, 인스타에서 TOP 100 가수,곡 정보 크롤링, DB 저장
Except 위치 나타내기
> import traceback
try:
except:
traceback.print_exc()
- 가수 테이블 생성
- id(Autoincrement 필드, PK)
- name(가수이름)
- follower(팔로우 수)
- song 테이블 생성
- id(Autoincrement, PK)
- singer_id(FK)
- title(제목)
- album(앨범)
- 멜론 TOP 100 수집
- 멜론 TOP 100에 존재하는 가수들을 singer테이블에 저장
- 각 가수의 곡들은 SONG 테이블에 저장
- 단, singer테이블에 follower으로 0으로 저장
- instagram에서 가수 검색 follower 수집
import traceback
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import pymysql
from selenium.webdriver.chrome.service import Service
import time
db = pymysql.connect(host="localhost", port=3306, user="root", password="{비밀번호}", db="sba")
cursor = db.cursor()
options = webdriver.ChromeOptions()
options.add_argument("--user-data-dir=/Users/syshin/Desktop/Syshin/SeSAC/파이썬 데이터 처리 프로그래밍/4일차-/MyChrome")
service = Service()
d = webdriver.Chrome(service=service, options=options)
url_melon = "https://www.melon.com/chart/index.htm"
url_instagram = "https://www.instagram.com"
try:
# 멜론 TOP 100
d.get(url_melon)
trs = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#frm > div > table > tbody > tr")))
already = []
# DB 초기화
sql_delete_song = 'DELETE FROM song;'
cursor.execute(sql_delete_song)
sql_delete_singer = 'DELETE FROM singer;'
cursor.execute(sql_delete_singer)
for tr in trs:
# 곡, 가수, 앨범명 수집
title = tr.find_element(By.CSS_SELECTOR, 'div.rank01 > span > a').text.strip().replace("'", '"')
singer = tr.find_element(By.CSS_SELECTOR, 'div.ellipsis.rank02 > a').text.strip().replace("'", '"')
album = tr.find_element(By.CSS_SELECTOR, 'div.ellipsis.rank03 > a').text.strip().replace("'", '"')
# 쿼리 생성 (SINGER)
if singer not in already:
sql_singer = f"""
INSERT INTO singer VALUES(NULL, '{singer}', 0)
"""
cursor.execute(sql_singer)
already.append(singer) # 중복 확인
# 쿼리 생성 (SONG)
sql_song = f"""
INSERT INTO song VALUES(NULL, (SELECT id FROM singer WHERE name='{singer}' LIMIT 1), '{title}', '{album}')
"""
cursor.execute(sql_song)
db.commit()
# 가수명 DB에서 검색
select_sql = "SELECT name FROM singer"
cursor.execute(select_sql)
singer_list = cursor.fetchmany(size=100)
# 인스타 접속, 공식 계정의 팔로워 수 수집, DB에 저장
d.get(url_instagram)
authorized = []
ac = ActionChains(d)
# 검색 버튼
buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.xvy4d1p')))
search_button = buttons[2]
ac.move_to_element(search_button).click().pause(1).perform()
first = True # 첫번째 검색시에만 검색버튼을 눌러줘야 하므로 첫번째인지 변수에 저장
# 가수마다 공식계정 검색
for singers in singer_list:
for singer in singers:
elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
ac.reset_actions()
ac.send_keys_to_element(elem, singer).pause(2).perform()
# 공식계정 있을 시 팔로워수 DB에 저장
authorized = []
authorized = d.find_elements(By.CSS_SELECTOR, 'div.x9f619.xjbqb8w.x1rg5ohu.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.xsgj6o6.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.xdt5ytf.xqjyukv.x1qjc9v5.x1oa3qoh.x1nhvcw1')
ac.reset_actions()
if authorized:
ac.move_to_element(authorized[0]).click().pause(2).perform()
followers = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'span._ac2a > span')))
# DB 저장
sql_update_singer = f"""
UPDATE singer SET follower = '{followers[1].text}' WHERE name = '{singer}'
"""
cursor.execute(sql_update_singer)
# 검색 버튼 다시 열기
if first:
buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.xvy4d1p')))
search_button = buttons[2]
ac.reset_actions()
ac.move_to_element(search_button).click().pause(1).perform()
first = False
else:
elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
elem.clear()
# 공식 계정이 없을 시, 검색창 비움
else:
time.sleep(1)
elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
elem.clear()
except Exception as e:
# print(e)
traceback.print_exc()
finally:
# websriver 종료
d.close()
d.quit()
# DB 커밋, 종료
db.commit()
db.close()