raena-crawler-engine/oliveyoung_crawler/spiders/oliveyoung_bk.py

# oliveyoung.py
import scrapy
import requests


class OliveYoungSpider(scrapy.Spider):
    name = 'oliveyoung_bk'
    start_urls = [
        'https://global.oliveyoung.com/?gad=1&gclid=CjwKCAjwq4imBhBQEiwA9Nx1Bi5w7mSF9wgKTFqfX37hyG_c3ocYHldGoXbIX1XfYKQQFxLOPECJCxoCxpEQAvD_BwE']

    def parse(self, response):
        sections = {
            "Best Sellers": "//div[@class='slick-slider-customized']/div[contains(@class,'slick-slide')]",
            # "MD's Pick": "//section[@id='md_pick']/div[@class='item']/div[@class='product-item']",
            # "Featured Brands": "//section[@id='brand_list']/div[@class='product-item']",
            # "K-Pop": "//section[@id='kpop_list']/div[@class='product-item']",
            # "INNISFREE": "//section[@id='brand_zone']/div[contains(@class,'brand-inn-store')]//div["
            #              "@class='product-item']",
            # "Recommendation": "//section[@id='recommendation']/div[contains(@class,'product-item')]",
        }

        # Extract data from each section
        for section_name, section_xpath in sections.items():
            products = response.xpath(section_xpath)
            for product in products:
                brand_name = product.xpath(".//span[@class='brand']/text()").get()
                product_name = product.xpath(".//span[@class='name']/text()").get()
                price = product.xpath(".//span[@class='num']/text()").get()

                if brand_name:
                    yield {
                        "brand_name": brand_name.strip(),
                        "product_name": product_name.strip(),
                        "price": price.strip(),
                        "section": section_name,
                    }

                    # # Generate hashtags for each brand name
                    # hashtags = [word.lower() for word in brand_name.split()]
                    # hashtags = '#'.join(hashtags)
                    # yield {
                    #     "brand_name": brand_name.strip(),
                    #     "hashtags": f"#{hashtags}",
                    # }
                    #
                    # # Fetch views data from TikTok API using tiktok_api.py
                    # views_all, views = get_hashtag_views(hashtags)
                    # yield {
                    #     "brand_name": brand_name.strip(),
                    #     "hashtags": f"#{hashtags}",
                    #     "views_all": views_all,
                    #     "views": views,
                    # }


def get_hashtag_views(hashtag):
    url = f'https://ads.tiktok.com/creative_radar_api/v1/popular_trend/hashtag/detail?period=7&hashtag_name={hashtag}&country_code=IS'
    headers = {
        # Add the headers from the CURL request here
    }
    response = requests.get(url, headers=headers)
    data = response.json()
    return data.get('hashtag', {}).get('video_views_all', 0), data.get('hashtag', {}).get('video_views', 0)