Study/연습

여기어때 홈페이지 크롤링 연습

상맹 2021. 11. 15. 21:37
반응형
from bs4.element import Stylesheet
import requests
from bs4 import BeautifulSoup
import re
import gettext
from requests.api import delete
import json
import urllib.request as req
# 링크, 사진, 숙박업소이름, 평점, 위치, 설명, 가격


def Callthedata():
    uri = f'''
    https://www.goodchoice.kr/product/result?sel_date=2021-11-15&sel_date2=2021-11-16&keyword=%EC%95%A0%EA%B2%AC'''
    response = requests.get(uri)

    soup = BeautifulSoup(response.text, 'html.parser')

    # 박스 값
    bigbox = soup.find(id="product_filter_form")
    middlebox = bigbox.find(id="content")
    hotellistbox = middlebox.find(id="poduct_list_area")

    # 이미지, 이름, 이름 리스트
    namelist = []
    imglist = []

    imgclass = hotellistbox.find_all(class_="lazy")

    for i in range(len(imgclass)):
        namelist.append(imgclass[i]['alt'])

    for i in range(len(imgclass)):
        imglist.append(imgclass[i]['src'])

    # print(namelist)
    # print(imglist)

    # 링크 list_4 adcno1, list_4 adcno2, list_4 adcno3, list_4 adcno5,
    hotellist = []
    alist = hotellistbox.find_all('a')  # a 태그

    for link in alist:
        hotellist.append(link.get('href'))

    # print(hotellist)

    # 위치
    locationlist = []
    Llist = hotellistbox.select('.stage')  # p 태그

    # for l in Llist:
    #    locationlist.append(str(l.p))
    for loc in Llist:
        try:
            localdata = loc.find(class_='name')
            localdata.div.extract()
            localdata.find(class_='score').extract()
            localdata.find('strong').extract()
            localdata.find(class_="txt_evt").extract()

            local = localdata.text.strip()
            locationlist.append(local)
        except Exception:
            pass

    # 가격

    pricelist = []
    plist = hotellistbox.find_all(class_="map_html")
    pdata = hotellistbox.select(".price")

    for p in pdata:
        try:
            pdata = p.find(class_='map_html')
            pdata.find('em').extract()

            price = pdata.text
            pricelist.append(price)
        except Exception:
            pass

    # 이름
    namelist
    # 이미지
    imglist
    # 링크
    hotellist
    # 위치
    locationlist
    # 가격
    pricelist

    j = 0
    datarow = []
    while j < 10:
        datarow.append("name")
        datarow.append("image")
        datarow.append("link")
        datarow.append("location")
        datarow.append("price")
        j += 1

    datalist = []

    i = 0
    while i < 10:
        name = namelist[i]
        image = imglist[i]
        link = hotellist[i]
        location = locationlist[i]
        price = pricelist[i]

        datalist.append(str(name))
        datalist.append(str(image))
        datalist.append(str(link))
        datalist.append(str(location))
        datalist.append(str(price))
        dict_list = list(zip(datarow, datalist))
        i += 1

    data = json.dumps(dict_list, ensure_ascii=False)

    return data
반응형