raena-crawler-engine/oliveyoung_crawler/spiders/oliveyoung_bk.py

# oliveyoung.py
import scrapy
import requests


class OliveYoungSpider(scrapy.Spider):
    name = 'oliveyoung_bk'
    start_urls = [
        'https://global.oliveyoung.com/?gad=1&gclid=CjwKCAjwq4imBhBQEiwA9Nx1Bi5w7mSF9wgKTFqfX37hyG_c3ocYHldGoXbIX1XfYKQQFxLOPECJCxoCxpEQAvD_BwE']

    def parse(self, response):
        sections = {
            "Best Sellers": "//div[@class='slick-slider-customized']/div[contains(@class,'slick-slide')]",
            # "MD's Pick": "//section[@id='md_pick']/div[@class='item']/div[@class='product-item']",
            # "Featured Brands": "//section[@id='brand_list']/div[@class='product-item']",
            # "K-Pop": "//section[@id='kpop_list']/div[@class='product-item']",
            # "INNISFREE": "//section[@id='brand_zone']/div[contains(@class,'brand-inn-store')]//div["
            #              "@class='product-item']",
            # "Recommendation": "//section[@id='recommendation']/div[contains(@class,'product-item')]",
        }

        # Extract data from each section
        for section_name, section_xpath in sections.items():
            products = response.xpath(section_xpath)
            for product in products:
                brand_name = product.xpath(".//span[@class='brand']/text()").get()
                product_name = product.xpath(".//span[@class='name']/text()").get()
                price = product.xpath(".//span[@class='num']/text()").get()

                if brand_name:
                    yield {
                        "brand_name": brand_name.strip(),
                        "product_name": product_name.strip(),
                        "price": price.strip(),
                        "section": section_name,
                    }

                    # # Generate hashtags for each brand name
                    # hashtags = [word.lower() for word in brand_name.split()]
                    # hashtags = '#'.join(hashtags)
                    # yield {
                    #     "brand_name": brand_name.strip(),
                    #     "hashtags": f"#{hashtags}",
                    # }
                    #
                    # # Fetch views data from TikTok API using tiktok_api.py
                    # views_all, views = get_hashtag_views(hashtags)
                    # yield {
                    #     "brand_name": brand_name.strip(),
                    #     "hashtags": f"#{hashtags}",
                    #     "views_all": views_all,
                    #     "views": views,
                    # }


def get_hashtag_views(hashtag):
    url = f'https://ads.tiktok.com/creative_radar_api/v1/popular_trend/hashtag/detail?period=7&hashtag_name={hashtag}&country_code=IS'
    headers = {
        # Add the headers from the CURL request here
    }
    response = requests.get(url, headers=headers)
    data = response.json()
    return data.get('hashtag', {}).get('video_views_all', 0), data.get('hashtag', {}).get('video_views', 0)
first commit 2024-01-24 13:05:07 +00:00			`# oliveyoung.py`
			`import scrapy`
			`import requests`


			`class OliveYoungSpider(scrapy.Spider):`
			`name = 'oliveyoung_bk'`
			`start_urls = [`
			`'https://global.oliveyoung.com/?gad=1&gclid=CjwKCAjwq4imBhBQEiwA9Nx1Bi5w7mSF9wgKTFqfX37hyG_c3ocYHldGoXbIX1XfYKQQFxLOPECJCxoCxpEQAvD_BwE']`

			`def parse(self, response):`
			`sections = {`
			`"Best Sellers": "//div[@class='slick-slider-customized']/div[contains(@class,'slick-slide')]",`
			`# "MD's Pick": "//section[@id='md_pick']/div[@class='item']/div[@class='product-item']",`
			`# "Featured Brands": "//section[@id='brand_list']/div[@class='product-item']",`
			`# "K-Pop": "//section[@id='kpop_list']/div[@class='product-item']",`
			`# "INNISFREE": "//section[@id='brand_zone']/div[contains(@class,'brand-inn-store')]//div["`
			`# "@class='product-item']",`
			`# "Recommendation": "//section[@id='recommendation']/div[contains(@class,'product-item')]",`
			`}`

			`# Extract data from each section`
			`for section_name, section_xpath in sections.items():`
			`products = response.xpath(section_xpath)`
			`for product in products:`
			`brand_name = product.xpath(".//span[@class='brand']/text()").get()`
			`product_name = product.xpath(".//span[@class='name']/text()").get()`
			`price = product.xpath(".//span[@class='num']/text()").get()`

			`if brand_name:`
			`yield {`
			`"brand_name": brand_name.strip(),`
			`"product_name": product_name.strip(),`
			`"price": price.strip(),`
			`"section": section_name,`
			`}`

			`# # Generate hashtags for each brand name`
			`# hashtags = [word.lower() for word in brand_name.split()]`
			`# hashtags = '#'.join(hashtags)`
			`# yield {`
			`# "brand_name": brand_name.strip(),`
			`# "hashtags": f"#{hashtags}",`
			`# }`
			`#`
			`# # Fetch views data from TikTok API using tiktok_api.py`
			`# views_all, views = get_hashtag_views(hashtags)`
			`# yield {`
			`# "brand_name": brand_name.strip(),`
			`# "hashtags": f"#{hashtags}",`
			`# "views_all": views_all,`
			`# "views": views,`
			`# }`


			`def get_hashtag_views(hashtag):`
			`url = f'https://ads.tiktok.com/creative_radar_api/v1/popular_trend/hashtag/detail?period=7&hashtag_name={hashtag}&country_code=IS'`
			`headers = {`
			`# Add the headers from the CURL request here`
			`}`
			`response = requests.get(url, headers=headers)`
			`data = response.json()`
			`return data.get('hashtag', {}).get('video_views_all', 0), data.get('hashtag', {}).get('video_views', 0)`