import time from bs4 import BeautifulSoup from playwright.sync_api import sync_playwright import pandas as pd # Launch the Playwright browser in mobile mode with sync_playwright() as p: browser = p.chromium.launch(headless=False) context = browser.new_context(user_agent="Mozilla/5.0 (iPhone X; CPU iPhone OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.1 Mobile/15E148 Safari/604.1") page = context.new_page() page.goto("https://hasaki.vn/san-pham/nuoc-tay-trang-tuoi-mat-l-oreal-3-in-1-danh-cho-da-dau-da-hon-hop-400ml-19325.html") page.wait_for_load_state('load') #time.sleep(10) # Capture the underlying API request URL #api_requests = page.evaluate('''() => window.fetch('https://hasaki.vn/wap/v2/product/detail').then(response => response.json())''') #print(api_requests) with page.expect_response("**/wap/v2/product/detail**") as response: data = response.value.json() variant_items = data['attribute']['items'] df = pd.DataFrame({}, columns=['product_variant_name','rce_source_variant_id','product_variant_price','product_variant_stock','product_variant_sku']) data_variant = {} for variant in variant_items: for item in variant['options']: data_variant['product_variant_name'] = item['long_label'] for product in item['products']: data_variant['rce_source_variant_id'] = product['id'] data_variant['rce_product_id'] = "" data_variant['product_variant_price'] = product['price'] data_variant['product_variant_price_before_discount'] = "" data_variant['product_variant_stock'] = product['quantity'] data_variant['product_variant_sku'] = product['sku'] #variants_arr.append(data_variant) tmp = pd.DataFrame([[data_variant['product_variant_name'],data_variant['rce_source_variant_id'],data_variant['product_variant_price'],data_variant['product_variant_stock'],data_variant['product_variant_sku']]], columns=['product_variant_name', 'rce_source_variant_id', 'product_variant_price', 'product_variant_stock', 'product_variant_sku']) df = pd.concat([df, tmp]) print(data_variant) df = df.sort_values(by=['product_variant_sku']) print(df.to_string()) print("======================================") merged_df = df.groupby('product_variant_sku').agg({ 'product_variant_name': ' '.join, 'rce_source_variant_id': 'first', 'product_variant_price': 'first', 'product_variant_stock': 'first' }).reset_index() print(merged_df.to_string()) # Close the browser browser.close()