네이버 지도 리뷰 크롤링 문제

Question

네이버 지도 리뷰 크롤링 문제

조회수 204회

python

0

싫어요

from selenium.webdriver.common.by import By
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from openpyxl import Workbook
from bs4 import BeautifulSoup
from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
import time
import datetime
import requests

# 크롬 옵션 설정
from selenium.webdriver.chrome.service import Service as ChromeService
from selenium.webdriver.chrome.options import Options as ChromeOptions

options = ChromeOptions()
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"
options.add_argument('user-agent=' + user_agent)
options.add_argument("lang=ko_KR")
options.add_argument('headless')  # 헤드리스 모드로 실행 (브라우저 창을 띄우지 않음)
options.add_argument('window-size=1920x1080')
options.add_argument("disable-gpu")
options.add_argument("--no-sandbox")

# 크롬 드라이버 최신 버전 설정
service = ChromeService(executable_path=ChromeDriverManager().install())

# 크롬 드라이버 실행
driver = webdriver.Chrome(service=service, options=options)

# url
url = 'https://m.place.naver.com/restaurant/1085956231/review/visitor?entry=ple&reviewSort=recent'

# BS4 setting for secondary access
session = requests.Session()
headers = {
    "User-Agent": "user value"}

retries = Retry(total=5,
                backoff_factor=0.1,
                status_forcelist=[500, 502, 503, 504])

session.mount('http://', HTTPAdapter(max_retries=retries))

# New xlsx file
now = datetime.datetime.now()
xlsx = Workbook()
list_sheet = xlsx.create_sheet('output')
list_sheet.append(['nickname', 'content', 'date', 'revisit'])

# Start crawling/scraping!
try:
    print("Starting webdriver and accessing URL...")
    driver.get(url)
    driver.implicitly_wait(30)

    print("Page loaded. Scrolling down...")
    driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.PAGE_DOWN)

    count = 0
    try:
        while True:
            print("Clicking on '더보기' button...")
            driver.find_element(By.XPATH, '//*[@id="app-root"]/div/div/div/div[6]/div[2]/div[3]/div[2]/div/a').click()
            count += 1
            print(f"'더보기' button clicked {count} times.")
            time.sleep(0.4)
    except Exception as e:
        print(f'No more "더보기" button found, finished scrolling after {count} clicks.')

    time.sleep(25)
    html = driver.page_source
    bs = BeautifulSoup(html, 'lxml')
    reviews = bs.select('li.YlrAu')
    print(f"Found {len(reviews)} reviews.")

    for r in reviews:
        nickname = r.select_one('div.VYGLG')
        content = r.select_one('div.vg7Fp.CyA_N')
        date = r.select('div.D40bm>span.CKUdu>time')[0]
        revisit = r.select('div.D40bm>span.CKUdu')[1]

        # exception handling
        nickname = nickname.text if nickname else ''
        content = content.text if content else ''
        date = date.text if date else ''
        revisit = revisit.text if revisit else ''
        time.sleep(0.06)

        print(f"Review: {nickname} / {content} / {date} / {revisit}")
        list_sheet.append([nickname, content, date, revisit])
        time.sleep(0.06)
    # Save the file
    file_name = 'naver_review_' + now.strftime('%Y-%m-%d_%H-%M-%S') + '.xlsx'
    xlsx.save(file_name)
    print(f"File saved as {file_name}")

except Exception as e:
    print(f"Exception occurred: {e}")
    # Save the file(temp)
    file_name = 'naver_review_' + now.strftime('%Y-%m-%d_%H-%M-%S') + '.xlsx'
    xlsx.save(file_name)
    print(f"File saved as {file_name} after exception")

finally:
    driver.quit()
    print("Webdriver closed.")

네이버 지도 리뷰 크롤링 코드 이며,

위 와같이 작성시

엑셀파일은 생성되지만, 크롤링이 전혀안되고있는 상황입니다.

DevTools listening on ws://127.0.0.1:51574/devtools/browser/16977761-9899-4120-9ba5-2ef94f71fbc6
Starting webdriver and accessing URL...
Page loaded. Scrolling down...
Clicking on '더보기' button...
No more "더보기" button found, finished scrolling after 0 clicks.
Found 0 reviews.
File saved as naver_review_2024-06-07_10-45-31.xlsx
Webdriver closed.
PS C:\Users\ooooo\Downloads\test>

위는 결과값 입니다.

도움을 주시면 감사드리겠습니다.

ooooogoon 0 points

2024-06-08 01:32:23에 작성됨

네이버 지도 리뷰 크롤링 문제

조회수 204회

python

0

ooooogoon 0 points

2024-06-08 01:32:23에 작성됨

댓글 입력

답변을 하려면 로그인이 필요합니다.