diff --git a/hasaki_crawler_engine/hasaki_category_products.py b/hasaki_crawler_engine/hasaki_category_products.py index 03ab918..67a143d 100644 --- a/hasaki_crawler_engine/hasaki_category_products.py +++ b/hasaki_crawler_engine/hasaki_category_products.py @@ -154,14 +154,28 @@ class HasakiCategoryProducts: pass + + self.cur.execute(f"""delete from {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')} where flag=1""") + sql = f""" - insert into {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')}(crawler_name,product_section, product_name, product_url, product_image, product_sold, product_brand, gift, product_rank, categoryid) - values('{self.crawler_name}','{product_section}','{product_name.replace("'","")}','{product_url}','{product_image}',{product_sold},'{product_brand}','{gift}',{product_rank},{categoryId}) + select * from {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')} where product_url = '{product_url}' """ - logging.info(sql) - self.cur.execute(sql) + res = self.cur.fetchall() + + if not res: + + sql = f""" + insert into {self.config.get('crawler_schema')}.{self.config.get('tracker_tab')}(crawler_name,product_section, product_name, product_url, product_image, product_sold, product_brand, gift, product_rank, categoryid) + values('{self.crawler_name}','{product_section}','{product_name.replace("'","")}','{product_url}','{product_image}',{product_sold},'{product_brand}','{gift}',{product_rank},{categoryId}) + """ + + logging.info(sql) + + self.cur.execute(sql) + else: + logging.info("Product already present. skipping.....") except Exception as e: print(e)