from shopee_sub_categories import shopee_sub_categories from shopee_category_products import shopee_category_products from shopee_products import shopee_products import logging import psycopg2 import json ###### Looger ###### format = "%(asctime)s: %(message)s" logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S") config = {} def get_sub_category(): sub_cat = shopee_sub_categories(config) sub_cat.get_sub_categories() def get_category_products(cur, slave01, slave02): products = shopee_category_products(config) products.browse_category_page() if not slave01: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',1)" cur.execute(sql) else: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'" cur.execute(sql) if not slave02: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',1)" cur.execute(sql) else: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'" cur.execute(sql) def get_products_info(): product_info = shopee_products(config) product_info.get_shopee_products() def main(): crawler_main = int(config.get('crawler_main')) crawler_slave_no = int(config.get('crawler_slave_no')) if config.get('crawler_slave_no') else None if crawler_main: crawler_master() else: if crawler_slave_no == 1: crawler_slave1() elif crawler_slave_no ==2: crawler_slave2() def crawler_master(): conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port')) conn.autocommit = True cur = conn.cursor() sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) res = cur.fetchone() sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'" cur.execute(sql) slave01 = cur.fetchone() sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'" cur.execute(sql) slave02 = cur.fetchone() if not res: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_master',0)" cur.execute(sql) if not slave01: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',0)" cur.execute(sql) else: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'" cur.execute(sql) if not slave02: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',0)" cur.execute(sql) else: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'" cur.execute(sql) get_sub_category() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) get_category_products(cur, slave01, slave02) sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) get_products_info() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) else: if res[2]==0: if not slave01: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave01',0)" cur.execute(sql) else: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'" cur.execute(sql) if not slave02: sql = "insert into "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" (crawler_name,keyword,flag) values('flag','"+config.get('crawler_name')+"_slave02',0)" cur.execute(sql) else: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'" cur.execute(sql) get_sub_category() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=1 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) get_category_products(cur, slave01, slave02) sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) get_products_info() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) elif res[2]==1: get_category_products(cur, slave01, slave02) sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) get_products_info() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) elif res[2]==2: get_products_info() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=3 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) elif res[2]==3: if slave01[2]==2 and slave02[2]==2: sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=0 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_master'" cur.execute(sql) main() else: logging.info("Slaves are working.....") conn.close() conn.close() def crawler_slave1(): conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port')) conn.autocommit = True cur = conn.cursor() sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'" cur.execute(sql) res = cur.fetchone() if res: if res[2]==1: get_products_info() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave01'" cur.execute(sql) else: logging.info("Slave02 or Master are working.....") conn.close() def crawler_slave2(): conn = psycopg2.connect(database=config.get('database'), user=config.get('db_user'), password=config.get('db_pass'), host=config.get('db_host'), port=config.get('db_port')) conn.autocommit = True cur = conn.cursor() sql = "select crawler_name,keyword,flag from "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'" cur.execute(sql) res = cur.fetchone() if res: if res[2]==1: get_products_info() sql = "update "+config.get('crawler_schema')+"."+config.get('tracker_tab')+" set flag=2 where crawler_name='flag' and keyword='"+config.get('crawler_name')+"_slave02'" cur.execute(sql) else: logging.info("Slave01 or Master are working.....") conn.close() if __name__ == "__main__": logging.info("Starting Shopee Crawler.......") try: logging.info("Loading config file.......") with open("conf.json", "r") as jsonfile: config = json.load(jsonfile) logging.info("Config file loaded.......") main() except Exception as e: #logging.info("Error: ".format(e)) logging.info("Cannot load cofig file. Please check. Exiting......") exit(1)