raena-crawler-engine/hasaki_crawler_engine/test.py

38 lines
1.3 KiB
Python
Raw Normal View History

2024-03-15 07:47:28 +00:00
import playwright
2024-03-14 05:16:59 +00:00
from playwright.sync_api import sync_playwright
2024-03-15 07:02:44 +00:00
from fake_useragent import UserAgent
import logging
2024-03-14 05:16:59 +00:00
with sync_playwright() as p:
2024-03-15 12:17:42 +00:00
browser = p.chromium.launch(headless=False, args=[
"--disable-dev-shm-usage",
"--disable-blink-features=AutomationControlled",
"--disable-component-extensions-with-background-pages"
])
2024-03-15 07:02:44 +00:00
ua = UserAgent(platforms='mobile')
random_mobile_ua = ua.random
logging.info("using user agent: {}".format(random_mobile_ua))
context = browser.new_context(user_agent=random_mobile_ua)
2024-03-15 12:17:42 +00:00
context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
2024-03-14 05:16:59 +00:00
page = context.new_page()
2024-03-15 07:47:28 +00:00
try:
page.goto("https://hasaki.vn/san-pham/kem-duong-skin1004-lam-diu-da-chiet-xuat-rau-ma-75ml-89637.html",
timeout=5000)
with page.expect_response("**/wap/v2/product/detail**") as response:
api_requests = response.value.json()
except playwright._impl._errors.TimeoutError:
logging.info("Timeout occurred. Retrying.....")
page.reload()
with page.expect_response("**/wap/v2/product/detail**") as response:
api_requests = response.value.json()
2024-03-14 05:16:59 +00:00
browser.close()
2024-03-15 07:02:44 +00:00
print(api_requests)