added Hasaki crawler
This commit is contained in:
parent
291f094cfc
commit
f8db895e10
|
@ -3,6 +3,7 @@ import json
|
||||||
import time
|
import time
|
||||||
import smtplib
|
import smtplib
|
||||||
import sqlite3
|
import sqlite3
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
from hasaki_categories import HasakiCategories
|
from hasaki_categories import HasakiCategories
|
||||||
from hasaki_category_products import HasakiCategoryProducts
|
from hasaki_category_products import HasakiCategoryProducts
|
||||||
|
@ -56,7 +57,7 @@ def send_mail(msg):
|
||||||
EMAIL_PASSWORD = "BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh"
|
EMAIL_PASSWORD = "BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh"
|
||||||
From = 'data_reporting@raenabeauty.com'
|
From = 'data_reporting@raenabeauty.com'
|
||||||
To = 'shariar@raenabeauty.com, data_reporting@raenabeauty.com'
|
To = 'shariar@raenabeauty.com, data_reporting@raenabeauty.com'
|
||||||
# To = 'shariar@raenabeauty.com'
|
#To = 'shariar@raenabeauty.com'
|
||||||
|
|
||||||
html = f'''
|
html = f'''
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
|
@ -64,12 +65,12 @@ def send_mail(msg):
|
||||||
<body>
|
<body>
|
||||||
<div style="background-color:#eee;padding:10px 20px;">
|
<div style="background-color:#eee;padding:10px 20px;">
|
||||||
<h2 style="font-family:Georgia, 'Times New Roman', Times, serif;color#454349;">Hasaki Crawler Status</h2>
|
<h2 style="font-family:Georgia, 'Times New Roman', Times, serif;color#454349;">Hasaki Crawler Status</h2>
|
||||||
</div>
|
</div>
|
||||||
<div style="padding:20px 0px">
|
<div style="padding:20px 0px">
|
||||||
<div style="height: 800px;width:800px">
|
<div style="height: 800px;width:800px">
|
||||||
{msg}
|
{msg}
|
||||||
<div style="text-align:Left;">
|
<div style="text-align:Left;">
|
||||||
<p>This is system generated mail. Please do not reply</p>
|
<p>This is system generated mail. Please do not reply.</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
@ -118,6 +119,44 @@ def init_tracker_tab(cur):
|
||||||
logging.info(cur.fetchall())
|
logging.info(cur.fetchall())
|
||||||
|
|
||||||
|
|
||||||
|
def get_status():
|
||||||
|
conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'),
|
||||||
|
password=config.get('db_pass'), host=config.get('db_host'),
|
||||||
|
port=config.get('db_port'))
|
||||||
|
|
||||||
|
conn.autocommit = True
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute(
|
||||||
|
f"""select count(1) from raena_spider_management.rce_category where rce_source_id = (select id from raena_spider_management.rce_source where source_name = 'Hasaki')""")
|
||||||
|
cat_count = cur.fetchone()[0]
|
||||||
|
|
||||||
|
cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki""")
|
||||||
|
product_total = cur.fetchone()[0]
|
||||||
|
|
||||||
|
cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 1""")
|
||||||
|
product_successful = cur.fetchone()[0]
|
||||||
|
|
||||||
|
cur.execute(f"""select count(1) from raena_spider_management.crawler_tracker_hasaki where flag = 0""")
|
||||||
|
product_failed = cur.fetchone()[0]
|
||||||
|
|
||||||
|
msg = f"""
|
||||||
|
<p><b>Hasaki Crawler run is completed. Please check the status below,</b></p>
|
||||||
|
<br>
|
||||||
|
<ul style="list-style-type:disc">
|
||||||
|
<li>Total Collected categories: <b>{cat_count}</b></li>
|
||||||
|
<li>Total Collected products for categories: <b>{product_total}</b></li>
|
||||||
|
<li>Total successfully collected products: <b {'style="color: green;"' if product_successful == product_total else 'style="color: red;"'}>{product_successful}</b></li>
|
||||||
|
<li>Total failed to collect products: <b {'style="color: red;"' if product_failed > 0 else 'style="color: green;"'}>{product_failed}</b></li>
|
||||||
|
</ul>
|
||||||
|
"""
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
return msg
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logging.info("Starting Hasaki Crawler.......")
|
logging.info("Starting Hasaki Crawler.......")
|
||||||
|
@ -144,7 +183,9 @@ if __name__ == "__main__":
|
||||||
cur.close()
|
cur.close()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
send_mail("Hasaki crawler run complete.")
|
msg = get_status()
|
||||||
|
|
||||||
|
send_mail(msg)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.info("Error: ".format(e))
|
logging.info("Error: ".format(e))
|
||||||
|
|
Loading…
Reference in New Issue