64 lines
2.9 KiB
Python
64 lines
2.9 KiB
Python
# oliveyoung.py
|
|
import scrapy
|
|
import requests
|
|
|
|
|
|
class OliveYoungSpider(scrapy.Spider):
|
|
name = 'oliveyoung_bk'
|
|
start_urls = [
|
|
'https://global.oliveyoung.com/?gad=1&gclid=CjwKCAjwq4imBhBQEiwA9Nx1Bi5w7mSF9wgKTFqfX37hyG_c3ocYHldGoXbIX1XfYKQQFxLOPECJCxoCxpEQAvD_BwE']
|
|
|
|
def parse(self, response):
|
|
sections = {
|
|
"Best Sellers": "//div[@class='slick-slider-customized']/div[contains(@class,'slick-slide')]",
|
|
# "MD's Pick": "//section[@id='md_pick']/div[@class='item']/div[@class='product-item']",
|
|
# "Featured Brands": "//section[@id='brand_list']/div[@class='product-item']",
|
|
# "K-Pop": "//section[@id='kpop_list']/div[@class='product-item']",
|
|
# "INNISFREE": "//section[@id='brand_zone']/div[contains(@class,'brand-inn-store')]//div["
|
|
# "@class='product-item']",
|
|
# "Recommendation": "//section[@id='recommendation']/div[contains(@class,'product-item')]",
|
|
}
|
|
|
|
# Extract data from each section
|
|
for section_name, section_xpath in sections.items():
|
|
products = response.xpath(section_xpath)
|
|
for product in products:
|
|
brand_name = product.xpath(".//span[@class='brand']/text()").get()
|
|
product_name = product.xpath(".//span[@class='name']/text()").get()
|
|
price = product.xpath(".//span[@class='num']/text()").get()
|
|
|
|
if brand_name:
|
|
yield {
|
|
"brand_name": brand_name.strip(),
|
|
"product_name": product_name.strip(),
|
|
"price": price.strip(),
|
|
"section": section_name,
|
|
}
|
|
|
|
# # Generate hashtags for each brand name
|
|
# hashtags = [word.lower() for word in brand_name.split()]
|
|
# hashtags = '#'.join(hashtags)
|
|
# yield {
|
|
# "brand_name": brand_name.strip(),
|
|
# "hashtags": f"#{hashtags}",
|
|
# }
|
|
#
|
|
# # Fetch views data from TikTok API using tiktok_api.py
|
|
# views_all, views = get_hashtag_views(hashtags)
|
|
# yield {
|
|
# "brand_name": brand_name.strip(),
|
|
# "hashtags": f"#{hashtags}",
|
|
# "views_all": views_all,
|
|
# "views": views,
|
|
# }
|
|
|
|
|
|
def get_hashtag_views(hashtag):
|
|
url = f'https://ads.tiktok.com/creative_radar_api/v1/popular_trend/hashtag/detail?period=7&hashtag_name={hashtag}&country_code=IS'
|
|
headers = {
|
|
# Add the headers from the CURL request here
|
|
}
|
|
response = requests.get(url, headers=headers)
|
|
data = response.json()
|
|
return data.get('hashtag', {}).get('video_views_all', 0), data.get('hashtag', {}).get('video_views', 0)
|