added Hasaki crawler
This commit is contained in:
parent
0fa1dc963d
commit
1fa170d8a7
|
@ -2,6 +2,8 @@ import hashlib
|
|||
import logging
|
||||
import string
|
||||
import re
|
||||
|
||||
import playwright
|
||||
import psycopg2
|
||||
from playwright.sync_api import sync_playwright
|
||||
from hasaki_db_writer import hasaki_db_writer
|
||||
|
@ -97,13 +99,19 @@ class HasakiProductInfo:
|
|||
context = browser.new_context(user_agent=random_mobile_ua)
|
||||
page = context.new_page()
|
||||
|
||||
page.goto(url)
|
||||
page.reload()
|
||||
api_requests = {}
|
||||
|
||||
with page.expect_response("**/wap/v2/product/detail**") as response:
|
||||
api_requests = response.value.json()
|
||||
|
||||
browser.close()
|
||||
try:
|
||||
page.goto(url, timeout=5000)
|
||||
with page.expect_response("**/wap/v2/product/detail**") as response:
|
||||
api_requests = response.value.json()
|
||||
except playwright._impl._errors.TimeoutError:
|
||||
logging.info("Timeout occurred. Retrying.....")
|
||||
page.reload()
|
||||
with page.expect_response("**/wap/v2/product/detail**") as response:
|
||||
api_requests = response.value.json()
|
||||
finally:
|
||||
browser.close()
|
||||
|
||||
return api_requests
|
||||
except Exception as e:
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
|
||||
import playwright
|
||||
from playwright.sync_api import sync_playwright
|
||||
from fake_useragent import UserAgent
|
||||
import logging
|
||||
|
@ -14,12 +14,17 @@ with sync_playwright() as p:
|
|||
context = browser.new_context(user_agent=random_mobile_ua)
|
||||
page = context.new_page()
|
||||
|
||||
page.goto("https://hasaki.vn/san-pham/kem-duong-skin1004-lam-diu-da-chiet-xuat-rau-ma-75ml-89637.html")
|
||||
try:
|
||||
|
||||
page.reload()
|
||||
|
||||
with page.expect_response("**/wap/v2/product/detail**") as response:
|
||||
api_requests = response.value.json()
|
||||
page.goto("https://hasaki.vn/san-pham/kem-duong-skin1004-lam-diu-da-chiet-xuat-rau-ma-75ml-89637.html",
|
||||
timeout=5000)
|
||||
with page.expect_response("**/wap/v2/product/detail**") as response:
|
||||
api_requests = response.value.json()
|
||||
except playwright._impl._errors.TimeoutError:
|
||||
logging.info("Timeout occurred. Retrying.....")
|
||||
page.reload()
|
||||
with page.expect_response("**/wap/v2/product/detail**") as response:
|
||||
api_requests = response.value.json()
|
||||
|
||||
|
||||
browser.close()
|
||||
|
|
Loading…
Reference in New Issue