added Hasaki crawler

This commit is contained in:
Shariar Imtiaz 2024-04-01 10:46:40 +04:00
parent 291f094cfc
commit f8db895e10
1 changed files with 49 additions and 8 deletions

View File

@ -3,6 +3,7 @@ import json
import time import time
import smtplib import smtplib
import sqlite3 import sqlite3
import psycopg2
from hasaki_categories import HasakiCategories from hasaki_categories import HasakiCategories
from hasaki_category_products import HasakiCategoryProducts from hasaki_category_products import HasakiCategoryProducts
@ -56,7 +57,7 @@ def send_mail(msg):
EMAIL_PASSWORD = "BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh" EMAIL_PASSWORD = "BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh"
From = 'data_reporting@raenabeauty.com' From = 'data_reporting@raenabeauty.com'
To = 'shariar@raenabeauty.com, data_reporting@raenabeauty.com' To = 'shariar@raenabeauty.com, data_reporting@raenabeauty.com'
# To = 'shariar@raenabeauty.com' #To = 'shariar@raenabeauty.com'
html = f''' html = f'''
<!DOCTYPE html> <!DOCTYPE html>
@ -69,7 +70,7 @@ def send_mail(msg):
<div style="height: 800px;width:800px"> <div style="height: 800px;width:800px">
{msg} {msg}
<div style="text-align:Left;"> <div style="text-align:Left;">
<p>This is system generated mail. Please do not reply</p> <p>This is system generated mail. Please do not reply.</p>
</div> </div>
</div> </div>
</div> </div>
@ -118,6 +119,44 @@ def init_tracker_tab(cur):
logging.info(cur.fetchall()) logging.info(cur.fetchall())
def get_status():
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'),
password=config.get('db_pass'), host=config.get('db_host'),
port=config.get('db_port'))
conn.autocommit = True
cur = conn.cursor()
cur.execute(
f"""select count(1) from raena_spider_management.rce_category where rce_source_id = (select id from raena_spider_management.rce_source where source_name = 'Hasaki')""")
cat_count = cur.fetchone()[0]
cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki""")
product_total = cur.fetchone()[0]
cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 1""")
product_successful = cur.fetchone()[0]
cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 0""")
product_failed = cur.fetchone()[0]
msg = f"""
<p><b>Hasaki Crawler run is completed. Please check the status below,</b></p>
<br>
<ul style="list-style-type:disc">
<li>Total Collected categories: <b>{cat_count}</b></li>
<li>Total Collected products for categories: <b>{product_total}</b></li>
<li>Total successfully collected products: <b {'style="color: green;"' if product_successful == product_total else 'style="color: red;"'}>{product_successful}</b></li>
<li>Total failed to collect products: <b {'style="color: red;"' if product_failed > 0 else 'style="color: green;"'}>{product_failed}</b></li>
</ul>
"""
cur.close()
conn.close()
return msg
if __name__ == "__main__": if __name__ == "__main__":
logging.info("Starting Hasaki Crawler.......") logging.info("Starting Hasaki Crawler.......")
@ -144,7 +183,9 @@ if __name__ == "__main__":
cur.close() cur.close()
conn.close() conn.close()
send_mail("Hasaki crawler run complete.") msg = get_status()
send_mail(msg)
except Exception as e: except Exception as e:
logging.info("Error: ".format(e)) logging.info("Error: ".format(e))