From 295ac413059668205c4c038ad76a3caf4271409d Mon Sep 17 00:00:00 2001 From: "shariar@raenabeauty.com" Date: Tue, 26 Mar 2024 15:22:33 +0400 Subject: [PATCH] added Hasaki crawler --- .../hasaki_category_products.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/hasaki_crawler_engine/hasaki_category_products.py b/hasaki_crawler_engine/hasaki_category_products.py index 03ab918..67a143d 100644 --- a/hasaki_crawler_engine/hasaki_category_products.py +++ b/hasaki_crawler_engine/hasaki_category_products.py @@ -154,14 +154,28 @@ class HasakiCategoryProducts: pass + + self.cur.execute(f"""delete from {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')} where flag=1""") + sql = f""" - insert into {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')}(crawler_name,product_section, product_name, product_url, product_image, product_sold, product_brand, gift, product_rank, categoryid) - values('{self.crawler_name}','{product_section}','{product_name.replace("'","")}','{product_url}','{product_image}',{product_sold},'{product_brand}','{gift}',{product_rank},{categoryId}) + select * from {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')} where product_url = '{product_url}' """ - logging.info(sql) - self.cur.execute(sql) + res = self.cur.fetchall() + + if not res: + + sql = f""" + insert into {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')}(crawler_name,product_section, product_name, product_url, product_image, product_sold, product_brand, gift, product_rank, categoryid) + values('{self.crawler_name}','{product_section}','{product_name.replace("'","")}','{product_url}','{product_image}',{product_sold},'{product_brand}','{gift}',{product_rank},{categoryId}) + """ + + logging.info(sql) + + self.cur.execute(sql) + else: + logging.info("Product already present. skipping.....") except Exception as e: print(e)