네이버 지도 리뷰 크롤링 문제

조회수 36회
from selenium.webdriver.common.by import By
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from openpyxl import Workbook
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time
import datetime
import requests

# 크롬 옵션 설정
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions

options = ChromeOptions()
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
options.add_argument('user-agent=' + user_agent)
options.add_argument("lang=ko_KR")
options.add_argument('headless')  # 헤드리스 모드로 실행 (브라우저 창을 띄우지 않음)
options.add_argument('window-size=1920x1080')
options.add_argument("disable-gpu")
options.add_argument("--no-sandbox")

# 크롬 드라이버 최신 버전 설정
service = ChromeService(executable_path=ChromeDriverManager().install())

# 크롬 드라이버 실행
driver = webdriver.Chrome(service=service, options=options)

# url
url = 'https://m.place.naver.com/restaurant/1085956231/review/visitor?entry=ple&reviewSort=recent'

# BS4 setting for secondary access
session = requests.Session()
headers = {
    "User-Agent": "user value"}

retries = Retry(total=5,
                backoff_factor=0.1,
                status_forcelist=[500, 502, 503, 504])

session.mount('http://', HTTPAdapter(max_retries=retries))

# New xlsx file
now = datetime.datetime.now()
xlsx = Workbook()
list_sheet = xlsx.create_sheet('output')
list_sheet.append(['nickname', 'content', 'date', 'revisit'])

# Start crawling/scraping!
try:
    print("Starting webdriver and accessing URL...")
    driver.get(url)
    driver.implicitly_wait(30)

    print("Page loaded. Scrolling down...")
    driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)

    count = 0
    try:
        while True:
            print("Clicking on '더보기' button...")
            driver.find_element(By.XPATH, '//*[@id="app-root"]/div/div/div/div[6]/div[2]/div[3]/div[2]/div/a').click()
            count += 1
            print(f"'더보기' button clicked {count} times.")
            time.sleep(0.4)
    except Exception as e:
        print(f'No more "더보기" button found, finished scrolling after {count} clicks.')

    time.sleep(25)
    html = driver.page_source
    bs = BeautifulSoup(html, 'lxml')
    reviews = bs.select('li.YlrAu')
    print(f"Found {len(reviews)} reviews.")

    for r in reviews:
        nickname = r.select_one('div.VYGLG')
        content = r.select_one('div.vg7Fp.CyA_N')
        date = r.select('div.D40bm>span.CKUdu>time')[0]
        revisit = r.select('div.D40bm>span.CKUdu')[1]

        # exception handling
        nickname = nickname.text if nickname else ''
        content = content.text if content else ''
        date = date.text if date else ''
        revisit = revisit.text if revisit else ''
        time.sleep(0.06)

        print(f"Review: {nickname} / {content} / {date} / {revisit}")
        list_sheet.append([nickname, content, date, revisit])
        time.sleep(0.06)
    # Save the file
    file_name = 'naver_review_' + now.strftime('%Y-%m-%d_%H-%M-%S') + '.xlsx'
    xlsx.save(file_name)
    print(f"File saved as {file_name}")

except Exception as e:
    print(f"Exception occurred: {e}")
    # Save the file(temp)
    file_name = 'naver_review_' + now.strftime('%Y-%m-%d_%H-%M-%S') + '.xlsx'
    xlsx.save(file_name)
    print(f"File saved as {file_name} after exception")

finally:
    driver.quit()
    print("Webdriver closed.")

네이버 지도 리뷰 크롤링 코드 이며,

위 와같이 작성시

엑셀파일은 생성되지만, 크롤링이 전혀안되고있는 상황입니다.

DevTools listening on ws://127.0.0.1:51574/devtools/browser/16977761-9899-4120-9ba5-2ef94f71fbc6
Starting webdriver and accessing URL...
Page loaded. Scrolling down...
Clicking on '더보기' button...
No more "더보기" button found, finished scrolling after 0 clicks.
Found 0 reviews.
File saved as naver_review_2024-06-07_10-45-31.xlsx
Webdriver closed.
PS C:\Users\ooooo\Downloads\test> 

위는 결과값 입니다.

도움을 주시면 감사드리겠습니다.

답변을 하려면 로그인이 필요합니다.

프로그래머스 커뮤니티는 개발자들을 위한 Q&A 서비스입니다. 로그인해야 답변을 작성하실 수 있습니다.

(ಠ_ಠ)
(ಠ‿ಠ)