51 lines
2.0 KiB
Python
51 lines
2.0 KiB
Python
import time
|
|
import logging
|
|
|
|
import playwright
|
|
from fake_useragent import UserAgent
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
def get_raw_product_data(url):
|
|
retries = 2
|
|
for _ in range(retries):
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(
|
|
headless=False,
|
|
args=[
|
|
"--disable-dev-shm-usage",
|
|
"--disable-blink-features=AutomationControlled",
|
|
"--disable-component-extensions-with-background-pages"
|
|
]
|
|
)
|
|
ua = UserAgent()
|
|
random_mobile_ua = ua.random
|
|
logging.info("Using user agent: {}".format(random_mobile_ua))
|
|
|
|
context = browser.new_context(user_agent=random_mobile_ua)
|
|
context.add_init_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
page = context.new_page()
|
|
|
|
api_requests = {}
|
|
|
|
try:
|
|
page.goto(url, timeout=5000)
|
|
time.sleep(1)
|
|
page.reload()
|
|
with page.expect_response("**/wap/v2/product/detail**") as response:
|
|
api_requests = response.value.json()
|
|
except playwright._impl._errors.TimeoutError:
|
|
logging.info("Timeout occurred. Retrying.....")
|
|
continue # Retry without closing the browser
|
|
finally:
|
|
browser.close()
|
|
|
|
return api_requests
|
|
except Exception as e:
|
|
logging.error(f"An error occurred: {str(e)}")
|
|
logging.info("Retrying...")
|
|
|
|
return None
|
|
|
|
|
|
print(get_raw_product_data("https://hasaki.vn/san-pham/mat-na-naruko-y-di-nhan-do-duong-sang-da-25ml-moi-92613.html")) |