added Hasaki crawler
This commit is contained in:
parent
28f584f829
commit
67df30ff1d
|
@ -2,35 +2,41 @@ from seleniumwire import webdriver
|
|||
from selenium.webdriver.chrome.service import Service
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
from fake_useragent import UserAgent
|
||||
import brotli
|
||||
import json
|
||||
|
||||
|
||||
def get_raw_product(url):
|
||||
ua = UserAgent(platforms='mobile')
|
||||
random_mobile_ua = ua.random
|
||||
|
||||
|
||||
op = webdriver.ChromeOptions()
|
||||
# hight = str(random.randint(640,1280))
|
||||
# width = str(random.randint(1024,1920))
|
||||
# op.add_argument("window-size="+width+","+hight+"")
|
||||
op.add_argument(f"user-agent={random_mobile_ua}")
|
||||
op.add_experimental_option("useAutomationExtension", False)
|
||||
op.add_argument('--no-sandbox')
|
||||
op.add_argument('--disable-notifications')
|
||||
op.add_argument("--lang=en-GB")
|
||||
op.add_argument("--log-level=3")
|
||||
op.headless = False
|
||||
|
||||
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=op)
|
||||
|
||||
# Access a website
|
||||
driver.get('https://hasaki.vn/san-pham/nuoc-tay-trang-bioderma-danh-cho-da-nhay-cam-500ml-9740.html')
|
||||
driver.get(url)
|
||||
|
||||
# Get all requests made by the browser
|
||||
for request in driver.requests:
|
||||
if request.response:
|
||||
if '/wap/v2/product/detail' in request.url:
|
||||
encoding = request.response.headers.get('content-encoding')
|
||||
# print(encoding)
|
||||
if encoding:
|
||||
iteminfo = brotli.decompress(request.response.body)
|
||||
else:
|
||||
iteminfo = request.response.body
|
||||
print(iteminfo)
|
||||
|
||||
# Quit the driver
|
||||
iteminfo_json = json.loads(iteminfo)
|
||||
|
||||
driver.quit()
|
||||
|
||||
return iteminfo_json
|
||||
|
||||
|
||||
get_raw_product('https://hasaki.vn/san-pham/nuoc-tay-trang-bioderma-danh-cho-da-nhay-cam-500ml-9740.html')
|
Loading…
Reference in New Issue