raena-crawler-engine/amazon_crawler_engine/amazon_db_writer.py

590 lines
38 KiB
Python
Raw Normal View History

2024-01-24 13:05:07 +00:00
import logging
import psycopg2
###### Looger ######
format = "%(asctime)s: %(message)s"
logging.basicConfig(format=format, level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S")
class amazon_db_writer:
def __init__(self, config):
self.config = config
self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port'))
self.conn.autocommit = True
self.cur = self.conn.cursor()
def __del__(self):
logging.info("Closing connection.....")
self.conn.close()
def rce_category(self, data):
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" where category_name = '"+str(data['category_name'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
cat_name = data['category_name'].replace("'","''")
cat_url = data['category_page_url'].replace("'","''")
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" (parent_category_id,rce_source_id," \
"rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name) values (" \
+str(data['parent_category_id'])+","+str(data['rce_source_id'])+", "+str(data['rce_source_category_id'])+", "+str(data['rce_source_status'])+", " \
"'"+str(cat_url)+"', '"+str(data['category_page_url_hash'])+"', '"+str(cat_name)+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+"(id,parent_category_id,rce_source_id," \
"rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat) " \
"select id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash," \
"category_name,createdat,updatedat from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" " \
"where rce_source_category_id = "+ str(data['rce_source_category_id'])
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['parent_category_id'])==str(res[1]) and str(data['rce_source_category_id'])==str(res[3]) and str(data['category_name']) == str(res[7]) and \
str(data['category_page_url'])==str(res[5]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" set updatedat=now() " \
"where category_name = '"+ str(res[7])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" set parent_category_id = " \
""+str(data['parent_category_id'])+", rce_source_category_id = "+str(data['rce_source_category_id'])+", " \
"category_name='"+str(cat_name)+"', category_page_url='"+str(cat_url)+"', " \
"category_page_url_hash='"+str(data['category_page_url_hash'])+"', updatedat=now() where " \
"category_name = '"+ str(res[7])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+"(id,parent_category_id,rce_source_id," \
"rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat) " \
"select id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash," \
"category_name,createdat,updatedat from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" " \
"where category_name = '"+ str(res[7])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_product(self, data):
data['product_page_url'] = data['product_page_url'].replace("'","''")
data['rce_source_product_name'] = data['rce_source_product_name'].replace("'","''")
data['product_description'] = data['product_description'].replace("'","''")
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" where product_page_url = '"+str(data['product_page_url'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" (rce_source_product_id," \
"rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \
"rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \
"product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \
"product_section,rce_source_id) values("+str(data['rce_source_product_id'])+","+str(data['rce_source_product_status'])+",'"+str(data['product_page_url'])+"'," \
"'"+str(data['product_page_url_hash'])+"',"+str(data['rce_category_id'])+","+str(data['rce_brand_id'])+","+str(data['rce_store_id'])+"," \
"'"+str(data['rce_source_product_name'])+"','"+str(data['product_images'])+"','"+str(data['product_description'])+"',"+str(data['product_sold_total'])+"," \
""+str(data['product_sold'])+",'"+str(data['product_price_min'])+"','"+str(data['product_price_min_before_discount'])+"','"+str(data['product_price_max'])+"'," \
"'"+str(data['product_price_max_before_discount'])+"','"+str(data['ratings'])+"','"+str(data['product_section'])+"',"+str(data['rce_source_id'])+")"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" (id,rce_source_product_id," \
"rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \
"rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \
"product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \
"product_section,createdat,updatedat,rce_source_id) select id,rce_source_product_id," \
"rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \
"rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \
"product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \
"product_section,createdat,updatedat,rce_source_id from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" where " \
"product_page_url='"+str(data['product_page_url'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_source_product_id'])==str(res[1]) and str(data['rce_source_product_status'])==str(res[2]) and \
str(data['product_page_url'])==str(res[3]) and str(data['product_page_url_hash'])==str(res[4]) and str(data['rce_category_id'])==str(res[5]) and \
str(data['rce_brand_id'])==str(res[6]) and str(data['rce_store_id'])==str(res[7]) and str(data['rce_source_product_name'])==str(res[8]) and \
str(data['product_images'])==str(res[9]) and str(data['product_sold_total'])==str(res[11]) and \
str(data['product_sold'])==str(res[12]) and str(data['product_price_min'])==str(res[13]) and str(data['product_price_min_before_discount'])==str(res[14]) and \
str(data['product_price_max'])==str(res[15]) and str(data['product_price_max_before_discount'])==str(res[16]) and str(data['ratings'])==str(res[17]) \
and str(data['rce_source_id'])==str(res[21]) \
and str(data['product_section'])==str(res[22]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" set updatedat=now() " \
"where product_page_url = '"+ str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" set rce_source_product_id="+str(data['rce_source_product_id'])+"," \
"rce_source_product_status="+str(data['rce_source_product_status'])+",product_page_url='"+str(data['product_page_url'])+"',product_page_url_hash= " \
"'"+str(data['product_page_url_hash'])+"',rce_category_id="+str(data['rce_category_id'])+",rce_brand_id="+str(data['rce_brand_id'])+"," \
"rce_store_id="+str(data['rce_store_id'])+",rce_source_product_name='"+str(data['rce_source_product_name'])+"',product_images='"+str(data['product_images'])+"'" \
",product_description='"+str(data['product_description'])+"',product_sold_total="+str(data['product_sold_total'])+",product_sold="+str(data['product_sold'])+"," \
"product_price_min='"+str(data['product_price_min'])+"',product_price_min_before_discount='"+str(data['product_price_min_before_discount'])+"'," \
"product_price_max='"+str(data['product_price_max'])+"',product_price_max_before_discount='"+str(data['product_price_max_before_discount'])+"',ratings='"+str(data['ratings'])+"'," \
"product_section='"+str(data['product_section'])+"', updatedat=now(), rce_source_id="+str(data['rce_source_id'])+" where product_page_url = '"+ str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" (id,rce_source_product_id," \
"rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \
"rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \
"product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \
"product_section,createdat,updatedat,rce_source_id) select id,rce_source_product_id," \
"rce_source_product_status,product_page_url,product_page_url_hash,rce_category_id,rce_brand_id," \
"rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold," \
"product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings," \
"product_section,createdat,updatedat,rce_source_id from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" where " \
"product_page_url='"+str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_product_variant(self, data):
data['product_variant_name'] = data['product_variant_name'].replace("'","''")
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" where product_variant_name = '"+str(data['product_variant_name'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" (rce_source_variant_id,rce_product_id," \
"product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock) values("+str(data['rce_source_variant_id'])+"," \
""+str(data['rce_product_id'])+",'"+str(data['product_variant_name'])+"','"+str(data['product_variant_price'])+"'," \
"'"+str(data['product_variant_price_before_discount'])+"',"+str(data['product_variant_stock'])+")"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('variant_tab')+" (id,rce_source_variant_id,rce_product_id," \
"product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,createdat,updatedat) select * from " \
""+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" where product_variant_name='"+str(data['product_variant_name'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_source_variant_id'])==str(res[1]) and str(data['rce_product_id'])==str(res[2]) and str(data['product_variant_name'])==str(res[3]) and \
str(data['product_variant_price'])==str(res[4]) and str(data['product_variant_price_before_discount'])==str(res[5]) and str(data['product_variant_stock'])==str(res[6]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" set updatedat=now() " \
"where product_variant_name = '"+ str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('variant_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" set rce_source_variant_id="+str(data['rce_source_variant_id'])+", " \
"rce_product_id="+str(data['rce_product_id'])+", product_variant_name='"+str(data['product_variant_name'])+"', product_variant_price=" \
"'"+str(data['product_variant_price'])+"',product_variant_price_before_discount='"+str(data['product_variant_price_before_discount'])+"'," \
"product_variant_stock="+str(data['product_variant_stock'])+", updatedat=now() where product_variant_name = '"+ str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('variant_tab')+" (id,rce_source_variant_id,rce_product_id," \
"product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,createdat,updatedat) select * from " \
""+self.config.get('crawler_schema')+"."+self.config.get('variant_tab')+" where product_variant_name='"+str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_brand(self, data):
data['brand_page_url'] = data['brand_page_url'].replace("'","''")
data['brand_name'] = data['brand_name'].replace("'","''")
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" where brand_page_url = '"+str(data['brand_page_url'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" (rce_source_id,rce_source_brand_status," \
"brand_page_url,brand_page_url_hash,brand_name) values("+str(data['rce_source_id'])+"," \
""+str(data['rce_source_brand_status'])+",'"+str(data['brand_page_url'])+"','"+str(data['brand_page_url_hash'])+"'," \
"'"+str(data['brand_name'])+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('brand_tab')+" (id,rce_source_id,rce_source_brand_status," \
"brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat) select id,rce_source_id,rce_source_brand_status," \
"brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" where brand_page_url='"+str(data['brand_page_url'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_source_id'])==str(res[1]) and str(data['rce_source_brand_status'])==str(res[3]) and str(data['brand_page_url'])==str(res[4]) and \
str(data['brand_page_url_hash'])==str(res[5]) and str(data['brand_name'])==str(res[6]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" set updatedat=now() " \
"where brand_page_url = '"+ str(res[4])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('brand_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" set rce_source_id="+str(data['rce_source_id'])+", " \
"rce_source_brand_status="+str(data['rce_source_brand_status'])+", brand_page_url='"+str(data['brand_page_url'])+"', brand_page_url_hash=" \
"'"+str(data['brand_page_url_hash'])+"',brand_name='"+str(data['brand_name'])+"', updatedat=now() where brand_page_url = '"+ str(res[4])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('brand_tab')+" (id,rce_source_id,rce_source_brand_status," \
"brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat) select id,rce_source_id,rce_source_brand_status, " \
"brand_page_url,brand_page_url_hash,brand_name,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('brand_tab')+" where brand_page_url='"+str(res[4])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_reseller(self, data):
data['reseller_name'] = data['reseller_name'].replace("'","''")
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where reseller_name = '"+str(data['reseller_name'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" (rce_source_id,rce_source_reseller_status," \
"reseller_name,reseller_average_rating,reseller_description) values("+str(data['rce_source_id'])+"," \
""+str(data['rce_source_reseller_status'])+",'"+str(data['reseller_name'])+"','"+str(data['reseller_average_rating'])+"'," \
"'"+str(data['reseller_description'])+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" (id,rce_source_id,rce_source_reseller_status," \
"reseller_name,reseller_average_rating,reseller_description,createdat,updatedat) select id,rce_source_id,rce_source_reseller_status," \
"reseller_name,reseller_average_rating,reseller_description,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where reseller_name='"+str(data['reseller_name'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_source_reseller_status'])==str(res[3]) and str(data['reseller_name'])==str(res[4]) and \
str(data['reseller_average_rating'])==str(res[5]) and str(data['reseller_description'])==str(res[6]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" set updatedat=now() " \
"where reseller_name = '"+ str(res[4])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" set rce_source_id="+str(data['rce_source_id'])+", " \
"rce_source_reseller_status="+str(data['rce_source_reseller_status'])+", reseller_name='"+str(data['reseller_name'])+"', reseller_average_rating=" \
"'"+str(data['reseller_average_rating'])+"',reseller_description='"+str(data['reseller_description'])+"', updatedat=now() where reseller_name = '"+ str(res[4])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" (id,rce_source_id,rce_source_reseller_status," \
"reseller_name,reseller_average_rating,reseller_description,createdat,updatedat) select id,rce_source_id,rce_source_reseller_status," \
"reseller_name,reseller_average_rating,reseller_description,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where reseller_name='"+str(res[4])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_reseller_store(self, data):
data['store_page_url'] = data['store_page_url'].replace("'","''")
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where store_page_url = '"+str(data['store_page_url'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" (rce_source_store_status," \
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,rce_source_id) values(" \
""+str(data['rce_source_store_status'])+",'"+str(data['store_page_url'])+"','"+str(data['store_page_url_hash'])+"'," \
"'"+str(data['store_location'])+"', "+str(data['rce_reseller_id'])+", "+str(data['rce_source_id'])+")"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" (id,rce_source_store_status," \
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id) select id,rce_source_store_status," \
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id from " \
""+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where store_page_url='"+str(data['store_page_url'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_source_store_status'])==str(res[2]) and str(data['store_page_url'])==str(res[3]) and \
str(data['store_page_url_hash'])==str(res[4]) and str(data['store_location'])==str(res[5]) and \
str(data['rce_reseller_id'])==str(res[6]) and str(data['rce_source_id'])==str(res[9]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" set updatedat=now() " \
"where store_page_url = '"+ str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" set " \
"rce_source_store_status="+str(data['rce_source_store_status'])+", store_page_url='"+str(data['store_page_url'])+"', store_page_url_hash=" \
"'"+str(data['store_page_url_hash'])+"',store_location='"+str(data['store_location'])+"', rce_reseller_id="+str(data['rce_reseller_id'])+", " \
"updatedat=now(), rce_source_id="+str(data['rce_source_id'])+" where store_page_url = '"+ str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" (id,rce_source_store_status," \
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id) select id,rce_source_store_status," \
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id from " \
""+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where store_page_url='"+str(res[3])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_ratings_reviews(self, data):
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" where rce_product_id = "+str(data['rce_product_id'])+" and username ='"+str(data['username'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
data['username'] = data['username'].replace("'","''")
data['img_url'] = data['img_url'].replace("'","''")
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" (id,rce_product_id,username," \
"review,img_url,review_like_count,user_tier,shop_id,video_url,rating) values("+str(data['id'])+","+str(data['rce_product_id'])+"," \
"'"+str(data['username'])+"','"+str(data['review'])+"','"+str(data['img_url'])+"',"+str(data['review_like_count'])+",'"+str(data['user_tier'])+"'," \
""+str(data['shop_id'])+", '"+str(data['video_url'])+"', '"+str(data['rating'])+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" (id,rce_product_id,username," \
"review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat) select id,rce_product_id,username," \
"review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" where rce_product_id="+str(data['rce_product_id'])+" and username ='"+str(data['username'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_product_id'])==str(res[1]) and str(data['username'])==str(res[2]) and str(data['review'])==str(res[3]) and \
str(data['img_url'])==str(res[4]) and str(data['review_like_count'])==str(res[5]) and str(data['user_tier'])==str(res[6]) and \
str(data['shop_id'])==str(res[7]) and str(data['video_url'])==str(res[8]) and str(data['rating'])==str(res[9]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" set updatedat=now() " \
"where rce_product_id = "+ str(res[1])+" and username ='"+res[2]+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" set rce_product_id="+str(data['rce_product_id'])+", " \
"username='"+str(data['username'])+"', review='"+str(data['review'])+"', img_url=" \
"'"+str(data['img_url'])+"',review_like_count="+str(data['review_like_count'])+", user_tier='"+str(data['user_tier'])+"', " \
"shop_id="+str(data['shop_id'])+", video_url='"+str(data['video_url'])+"', rating='"+str(data['rating'])+"', updatedat=now() " \
"where rce_product_id = "+ str(res[1])+" and username ='"+str(data['username'])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" (id,rce_product_id,username," \
"review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat) select id,rce_product_id,username," \
"review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" where rce_product_id="+str(res[1])+" and username ='"+str(data['username'])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_ratings_reviews_productmodels(self,data):
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id = "+str(data['rce_rating_id'])
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" (rce_rating_id,model_id) " \
"values("+str(data['rce_rating_id'])+",'"+str(data['model_id'])+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \
"createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(data['rce_rating_id'])+""
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_rating_id'])==str(res[1]) and str(data['model_id'])==str(res[2]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set updatedat=now() " \
"where rce_rating_id = "+ str(res[1])
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set model_id="+str(data['model_id'])+", " \
"updatedat=now() where rce_source_store_id = "+ str(res[1])
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \
"createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(res[1])+""
#logging.info(sql)
self.cur.execute(sql)
def rce_tags(self,data):
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description = '"+str(data['description'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" (id,description) " \
"values("+str(data['id'])+",'"+str(data['description'])+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \
"createdat,updatedat) select id,description,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(data['description'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['description'])==str(res[1]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set updatedat=now() " \
"where description = '"+ str(res[1])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set description='"+str(data['description'])+"', " \
"updatedat=now() where description = "+ str(res[1])
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \
"createdat,updatedat) select id,description,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(res[1])+"'"
#logging.info(sql)
self.cur.execute(sql)
def rce_ratings_reviews_producttags(self,data):
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id = '"+str(data['rce_rating_id'])+"'"
self.cur.execute(sql)
res = self.cur.fetchone()
if not res:
sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" (rce_rating_id,tag_ids) " \
"values("+str(data['rce_rating_id'])+",'"+str(data['tag_ids'])+"')"
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \
"createdat,updatedat) select id,rce_rating_id,tag_ids,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id='"+str(data['rce_rating_id'])+"'"
#logging.info(sql)
self.cur.execute(sql)
else:
if str(data['rce_rating_id'])==str(res[1]):
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set updatedat=now() " \
"where rce_rating_id = '"+ str(res[1])+"'"
#logging.info(sql)
self.cur.execute(sql)
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" a set updatedat=b.updatedat " \
"from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" b where a.id=b.id and b.id = "+str(res[0])
#logging.info(sql)
self.cur.execute(sql)
else:
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set rce_rating_id='"+str(data['rce_rating_id'])+"', " \
"updatedat=now() where rce_rating_id = "+ str(res[1])
#logging.info(sql)
self.cur.execute(sql)
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \
"createdat,updatedat) select id,description,createdat,updatedat from " \
""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where description='"+str(res[1])+"'"
#logging.info(sql)
self.cur.execute(sql)