# oliveyoung.py import scrapy import requests class OliveYoungSpider(scrapy.Spider): name = 'oliveyoung_bk' start_urls = [ 'https://global.oliveyoung.com/?gad=1&gclid=CjwKCAjwq4imBhBQEiwA9Nx1Bi5w7mSF9wgKTFqfX37hyG_c3ocYHldGoXbIX1XfYKQQFxLOPECJCxoCxpEQAvD_BwE'] def parse(self, response): sections = { "Best Sellers": "//div[@class='slick-slider-customized']/div[contains(@class,'slick-slide')]", # "MD's Pick": "//section[@id='md_pick']/div[@class='item']/div[@class='product-item']", # "Featured Brands": "//section[@id='brand_list']/div[@class='product-item']", # "K-Pop": "//section[@id='kpop_list']/div[@class='product-item']", # "INNISFREE": "//section[@id='brand_zone']/div[contains(@class,'brand-inn-store')]//div[" # "@class='product-item']", # "Recommendation": "//section[@id='recommendation']/div[contains(@class,'product-item')]", } # Extract data from each section for section_name, section_xpath in sections.items(): products = response.xpath(section_xpath) for product in products: brand_name = product.xpath(".//span[@class='brand']/text()").get() product_name = product.xpath(".//span[@class='name']/text()").get() price = product.xpath(".//span[@class='num']/text()").get() if brand_name: yield { "brand_name": brand_name.strip(), "product_name": product_name.strip(), "price": price.strip(), "section": section_name, } # # Generate hashtags for each brand name # hashtags = [word.lower() for word in brand_name.split()] # hashtags = '#'.join(hashtags) # yield { # "brand_name": brand_name.strip(), # "hashtags": f"#{hashtags}", # } # # # Fetch views data from TikTok API using tiktok_api.py # views_all, views = get_hashtag_views(hashtags) # yield { # "brand_name": brand_name.strip(), # "hashtags": f"#{hashtags}", # "views_all": views_all, # "views": views, # } def get_hashtag_views(hashtag): url = f'https://ads.tiktok.com/creative_radar_api/v1/popular_trend/hashtag/detail?period=7&hashtag_name={hashtag}&country_code=IS' headers = { # Add the headers from the CURL request here } response = requests.get(url, headers=headers) data = response.json() return data.get('hashtag', {}).get('video_views_all', 0), data.get('hashtag', {}).get('video_views', 0)