raena-crawler-engine/noon_crawler_engine/noon_crawler.py

115 lines
3.9 KiB
Python
Raw Normal View History

2024-01-24 13:05:07 +00:00
import logging
import psycopg2
import json
from datetime import datetime
import smtplib
from email.message import EmailMessage
import requests
from noon_products import noon_products
##### Looger ######
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
config = {}
def slack_notification(message):
webhook_url = "https://hooks.slack.com/services/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"
slack_data = {"text": "Issue occurred on Noon Crawler. Error: " + str(message)}
response = requests.post(
webhook_url, data=json.dumps(slack_data),
headers={"Content-Type": "application/json"}
)
if response.status_code != 200:
raise ValueError(
f"Request to Slack returned an error {response.status_code}, {response.text}"
)
def send_mail():
try:
EMAIL_ADDRESS = "AKIAR2YL57QC6NITTJN5"
EMAIL_PASSWORD = "BAs9W772KNxLL1xnMzYhdIkpflQ8H+KP0Zbl8dphQZWh"
From = 'data_reporting@raenabeauty.com'
To = 'shariar@raenabeauty.com'
#To = 'shariar@raenabeauty.com'
html = f'''
<!DOCTYPE html>
<html>
<body>
<div style="background-color:#eee;padding:10px 20px;">
<h2 style="font-family:Georgia, 'Times New Roman', Times, serif;color#454349;">Amazon Crawler Status</h2>
</div>
<div style="padding:20px 0px">
<div style="height: 800px;width:800px">
Error occured. Please check Amazon Pipeline.
<div style="text-align:Left;">
<p>This is system generated mail. Please do not reply</p>
</div>
</div>
</div>
</body>
</html>
'''
msg = EmailMessage()
msg['Subject'] = 'Amazon Crawler Status'
msg['From'] = From
msg['To'] = To
msg.set_content(html, subtype='html')
with smtplib.SMTP('email-smtp.ap-southeast-1.amazonaws.com', 587) as smtp:
smtp.ehlo()
smtp.starttls()
smtp.login(EMAIL_ADDRESS, EMAIL_PASSWORD)
smtp.send_message(msg)
except Exception as e:
logging.info("Error while sending mail: {}".format(e))
def main():
# start = datetime.now()
# categories = amazon_categories(config)
# categories.start_processing()
# end = datetime.now()
# logging.info('Total time taken to fetch the categories: {}'.format(str(end-start)))
#
# start = datetime.now()
# products = amazon_category_products(config)
# products.start_processing()
# end = datetime.now()
# logging.info('Total time taken to fetch the category products: {}'.format(str(end-start)))
product_info = noon_products(config)
product_info.start_processing()
# ###### For test
# item = (100, 'raena_crawler_enginer_amazon', '3066', 'Up to 25 AED', 'https://www.amazon.ae/Ross-Massager-Shampoo-Silicone-Bristles/dp/B09JGH1WM3?ref_=Oct_d_oup_d_12149480031_0&pd_rd_w=lfMTW&content-id=amzn1.sym.d6d96598-a48c-43a2-8244-52a2329bf791&pf_rd_p=d6d96598-a48c-43a2-8244-52a2329bf791&pf_rd_r=C1QM2XCSJDBVMS27JV7E&pd_rd_wg=gkRZv&pd_rd_r=f5af13ee-c6c4-4d8a-8677-cba9cbacdace&pd_rd_i=B09JGH1WM3', '8f0540b5919e176303cf24a1d46b0e1c', 0)
# product_info.get_product_info(item)
if __name__ == "__main__":
logging.info("Starting Shopee Crawler.......")
try:
logging.info("Loading config file.......")
with open("conf.json", "r") as jsonfile:
config = json.load(jsonfile)
logging.info("Config file loaded.......")
print(config)
main()
#raise Exception("Sorry, no numbers below zero")
except Exception as e:
logging.info("Error: ".format(e))
#logging.info("Cannot load config file. Please check. Exiting......")
#send_mail()
slack_notification(e)
exit(1)