From f8db895e10d0d825e8271ed0aa7f8bffb7d12968 Mon Sep 17 00:00:00 2001 From: "shariar@raenabeauty.com" Date: Mon, 1 Apr 2024 10:46:40 +0400 Subject: [PATCH] added Hasaki crawler --- hasaki_crawler_engine/hasaki_crawler.py | 57 +++++++++++++++++++++---- 1 file changed, 49 insertions(+), 8 deletions(-) diff --git a/hasaki_crawler_engine/hasaki_crawler.py b/hasaki_crawler_engine/hasaki_crawler.py index 50bc697..7fc4315 100644 --- a/hasaki_crawler_engine/hasaki_crawler.py +++ b/hasaki_crawler_engine/hasaki_crawler.py @@ -3,6 +3,7 @@ import json import time import smtplib import sqlite3 +import psycopg2 from hasaki_categories import HasakiCategories from hasaki_category_products import HasakiCategoryProducts @@ -56,7 +57,7 @@ def send_mail(msg): EMAIL_PASSWORD = "BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh" From = 'data_reporting@raenabeauty.com' To = 'shariar@raenabeauty.com, data_reporting@raenabeauty.com' - # To = 'shariar@raenabeauty.com' + #To = 'shariar@raenabeauty.com' html = f''' @@ -64,12 +65,12 @@ def send_mail(msg):

Hasaki Crawler Status

-
-
-
- {msg} -
-

This is system generated mail. Please do not reply

+
+
+
+ {msg} +
+

This is system generated mail. Please do not reply.

@@ -116,6 +117,44 @@ def init_tracker_tab(cur): logging.info("++++++++++++++++ process tracker tab status ++++++++++++++++++++++") cur.execute(f"""select * from process_tracker""") logging.info(cur.fetchall()) + + +def get_status(): + conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), + password=config.get('db_pass'), host=config.get('db_host'), + port=config.get('db_port')) + + conn.autocommit = True + cur = conn.cursor() + + cur.execute( + f"""select count(1) from raena_spider_management.rce_category where rce_source_id = (select id from raena_spider_management.rce_source where source_name = 'Hasaki')""") + cat_count = cur.fetchone()[0] + + cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki""") + product_total = cur.fetchone()[0] + + cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 1""") + product_successful = cur.fetchone()[0] + + cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 0""") + product_failed = cur.fetchone()[0] + + msg = f""" +

Hasaki Crawler run is completed. Please check the status below,

+
+
    +
  • Total Collected categories: {cat_count}
  • +
  • Total Collected products for categories: {product_total}
  • +
  • Total successfully collected products: {product_successful}
  • +
  • Total failed to collect products: 0 else 'style="color: green;"'}>{product_failed}
  • +
+ """ + + cur.close() + conn.close() + + return msg @@ -143,8 +182,10 @@ if __name__ == "__main__": cur.close() conn.close() + + msg = get_status() - send_mail("Hasaki crawler run complete.") + send_mail(msg) except Exception as e: logging.info("Error: ".format(e))