Selenium
네이버 주식 크롤링
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
 
service = Service()
d = webdriver.Chrome(service=service)
 
try:
    url = 'https://finance.naver.com/'
    d.get(url)
    trs = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#_topItems1 > tr')))
    for tr in trs:
        updown = tr.get_attribute("class")
        if updown == "up":
            sign = "+"
        elif updown == "down":
            sign = "-"
        else: sign = ""
        item = tr.find_element(By.CSS_SELECTOR, "th > a").text.strip()
        tds = tr.find_elements(By.CSS_SELECTOR, "td")
        price = tds[0].text.strip()
        num = tds[1].text.strip().replace("\n", "")
        rate = tds[2].text.strip()
        print(f"{item}, {price}, {num}, {rate}")
except Exception as e:
    print(e)
finally:
    d.close()
    d.quit()
인스타그램
Selenium 정보 저장
- 빈 파일 생성
- 로그인정보를 저장하기 위해 추가된 코드
# 크롬에 정보 저장
options = webdriver.ChromeOptions()
options.add_argument("--user-data-dir={디렉토리 경로}")
service = Service()
d = webdriver.Chrome(service=service, options=options)
- 코드 중간에 input()을 넣어 두고 실행 (코드 일시정지 용도)
- 열린 크롬 창에서 인스타 로그인
- input()에 입력 하여 코드 종료
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import requests
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
 
# 크롬에 정보 저장
options = webdriver.ChromeOptions()
options.add_argument("--user-data-dir=/Users/syshin/Desktop/Syshin/SeSAC/파이썬 데이터 처리 프로그래밍/4일차-/MyChrome")
service = Service()
d = webdriver.Chrome(service=service, options=options)
 
 
try:
    url = 'https://www.instagram.com/'
    d.get(url)
 
    # 검색 버튼
    buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".xvy4d1p")))
    # buttons = d.find_elements(By.CSS_SELECTOR, ".xvy4d1p")
    search_button = buttons[2]
    
    # 검색 버튼 클릭
    ac = ActionChains(d)
    ac.move_to_element(search_button).click().pause(1)
    ac.perform()
    
    elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, ".x7xwk5j")))
    ac.reset_actions()
    # ac.move_to_element(elem).click().send_keys("#파이썬")
    ac.send_keys_to_element(elem, "#파이썬")
    ac.pause(1)
    ac.move_by_offset(0,100)
    ac.click()
    ac.perform()
    
    posts = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "._aabd")))
    for post in posts:
        ac.reset_actions()
        ac.click(post)
        # ac.pause(1)
        ac.perform()
        content = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "._a9zs > ._aade")))
        print(content.text)
        ac.reset_actions()
        ac.send_keys(Keys.ESCAPE)
        ac.perform()
 
    
except Exception as e:
    print(e)
finally:
    time.sleep(3)
    d.close()
    d.quit()
    
인스타 크롤링 고양이 검색, 엑셀에 저장 실습
from selenium import webdriver
from openpyxl import Workbook
from selenium.webdriver import ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
 
wb = Workbook()
ws = wb.active
 
options = ChromeOptions()
 
# options.add_argument("--headless") # 브라우저 안뜬 상태로 진행
# options.add_argument("--window-size=1920,1080)") # 브라우저 사이즈 조절
options.add_argument("--user-data-dir=/Users/syshin/Desktop/Syshin/SeSAC/파이썬 데이터 처리 프로그래밍/4일차-/MyChrome")
service = Service()
d = webdriver.Chrome(service=service, options=options)
url = "https://www.instagram.com"
 
try:
    d.get(url)
    buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.xvy4d1p')))
    search_button = buttons[2]
    
    ac = ActionChains(d)
    ac.move_to_element(search_button).click().pause(1).perform()
    
    elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
    
    ac.reset_actions()
    ac.send_keys_to_element(elem, "#고양이").pause(2)
    ac.send_keys(Keys.TAB, Keys.TAB, Keys.ENTER).perform()
    
    ac = ActionChains(d)
    posts = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div._al3l > a._a6hd")))
 
    for post in posts:
        ac.reset_actions()
        ac.click(post).perform()
        content = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "div._a9zs> h1._aade"))).text
        ac.reset_actions()
        ws.append([content])
        ac.send_keys(Keys.ESCAPE).perform()
except Exception as e:
    print(e)
    
finally:
    time.sleep(3)
    d.close()
    d.quit()
    wb.save("instagram.xlsx")
미니프로젝트-멜론, 인스타에서 TOP 100 가수,곡 정보 크롤링, DB 저장
Except 위치 나타내기
> import traceback
try:
except:
	traceback.print_exc()
- 가수 테이블 생성
- id(Autoincrement 필드, PK)
- name(가수이름)
- follower(팔로우 수)
- song 테이블 생성
- id(Autoincrement, PK)
- singer_id(FK)
- title(제목)
- album(앨범)
- 멜론 TOP 100 수집
- 멜론 TOP 100에 존재하는 가수들을 singer테이블에 저장
- 각 가수의 곡들은 SONG 테이블에 저장
- 단, singer테이블에 follower으로 0으로 저장
- instagram에서 가수 검색 follower 수집
import traceback
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import pymysql
from selenium.webdriver.chrome.service import Service
import time
 
db = pymysql.connect(host="localhost", port=3306, user="root", password="{비밀번호}", db="sba")
cursor = db.cursor()
 
options = webdriver.ChromeOptions()
options.add_argument("--user-data-dir=/Users/syshin/Desktop/Syshin/SeSAC/파이썬 데이터 처리 프로그래밍/4일차-/MyChrome")
service = Service()
d = webdriver.Chrome(service=service, options=options)
url_melon = "https://www.melon.com/chart/index.htm"
url_instagram = "https://www.instagram.com"
 
try:
    # 멜론 TOP 100
    d.get(url_melon)
    trs = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#frm > div > table > tbody > tr")))
    already = []
    
    # DB 초기화
    sql_delete_song = 'DELETE FROM song;'
    cursor.execute(sql_delete_song)
    sql_delete_singer = 'DELETE FROM singer;'
    cursor.execute(sql_delete_singer)
    
    for tr in trs:
        # 곡, 가수, 앨범명 수집
        title = tr.find_element(By.CSS_SELECTOR, 'div.rank01 > span > a').text.strip().replace("'", '"')
        singer = tr.find_element(By.CSS_SELECTOR, 'div.ellipsis.rank02 > a').text.strip().replace("'", '"')
        album = tr.find_element(By.CSS_SELECTOR, 'div.ellipsis.rank03 > a').text.strip().replace("'", '"')
        
        # 쿼리 생성 (SINGER)
        if singer not in already:
            sql_singer = f"""
            INSERT INTO singer VALUES(NULL, '{singer}', 0)
            """
            cursor.execute(sql_singer)
            already.append(singer) # 중복 확인
        
        # 쿼리 생성 (SONG)
        sql_song = f"""
        INSERT INTO song VALUES(NULL, (SELECT id FROM singer WHERE name='{singer}' LIMIT 1), '{title}', '{album}')
        """    
        cursor.execute(sql_song)
    db.commit()
    
    # 가수명 DB에서 검색
    select_sql = "SELECT name FROM singer"
    cursor.execute(select_sql)
    singer_list = cursor.fetchmany(size=100)
    
    # 인스타 접속, 공식 계정의 팔로워 수 수집, DB에 저장
    d.get(url_instagram)
    authorized = []
    ac = ActionChains(d)
    
    # 검색 버튼
    buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.xvy4d1p')))
    search_button = buttons[2]
    ac.move_to_element(search_button).click().pause(1).perform()
    
    
    first = True # 첫번째 검색시에만 검색버튼을 눌러줘야 하므로 첫번째인지 변수에 저장
    
    # 가수마다 공식계정 검색
    for singers in singer_list:
        for singer in singers:
            elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
            ac.reset_actions()
            ac.send_keys_to_element(elem, singer).pause(2).perform()
            
            
            # 공식계정 있을 시 팔로워수 DB에 저장
            authorized = []
            authorized = d.find_elements(By.CSS_SELECTOR, 'div.x9f619.xjbqb8w.x1rg5ohu.x168nmei.x13lgxp2.x5pf9jr.xo71vjh.xsgj6o6.x1uhb9sk.x1plvlek.xryxfnj.x1c4vz4f.x2lah0s.xdt5ytf.xqjyukv.x1qjc9v5.x1oa3qoh.x1nhvcw1')
            ac.reset_actions()
            if authorized:
                ac.move_to_element(authorized[0]).click().pause(2).perform()
                followers = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'span._ac2a > span')))
                
                # DB 저장
                sql_update_singer = f"""
                    UPDATE singer SET follower = '{followers[1].text}' WHERE name = '{singer}'
                """
                cursor.execute(sql_update_singer)
                
                # 검색 버튼 다시 열기
                if first:
                    buttons = WebDriverWait(d, 10).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.xvy4d1p')))
                    search_button = buttons[2]
                    ac.reset_actions()
                    ac.move_to_element(search_button).click().pause(1).perform()
                    first = False
                else:
                    elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
                    elem.clear()
                
            # 공식 계정이 없을 시, 검색창 비움
            else:
                time.sleep(1)
                elem = WebDriverWait(d, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "input.x7xwk5j")))
                elem.clear()
except Exception as e:
    # print(e)
    traceback.print_exc()
finally:
    # websriver 종료
    d.close()
    d.quit()
    
    # DB 커밋, 종료
    db.commit()
    db.close()