760 lines
48 KiB
Python
Executable File
760 lines
48 KiB
Python
Executable File
import logging
|
|
import psycopg2
|
|
|
|
###### Looger ######
|
|
logname = '/home/ubuntu/logs/hasaki_crawler.log'
|
|
#logname = 'hasaki_crawler.log'
|
|
logging.basicConfig(filename=logname,
|
|
filemode='a',
|
|
format='%(asctime)s,%(msecs)d %(name)s %(levelname)s: %(message)s',
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
level=logging.INFO)
|
|
|
|
class hasaki_db_writer:
|
|
def __init__(self, config):
|
|
self.config = config
|
|
self.conn = psycopg2.connect(database=self.config.get('database'), user=self.config.get('db_user'), password=self.config.get('db_pass'), host=self.config.get('db_host'), port=self.config.get('db_port'))
|
|
self.conn.autocommit = True
|
|
self.cur = self.conn.cursor()
|
|
|
|
def __del__(self):
|
|
logging.info("Closing connection.....")
|
|
self.conn.close()
|
|
|
|
def get_id(self, schema, table):
|
|
sql = f"""
|
|
select max(id) from {schema}.{table}
|
|
"""
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
if res[0] != None:
|
|
id = res[0] + 1
|
|
else:
|
|
id = 1
|
|
|
|
return id
|
|
|
|
def get_aud_id(self, schema, table):
|
|
sql = f"""
|
|
select max(auditid) from {schema}.{table}
|
|
"""
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
if res[0] != None:
|
|
id = res[0] + 1
|
|
else:
|
|
id = 1
|
|
|
|
return id
|
|
|
|
def rce_category(self, data):
|
|
sql = f"""
|
|
select * from {self.config.get('crawler_schema')}.{self.config.get('category_tab')} where category_name = '{data['category_name']}' and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('category_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('category_tab'))
|
|
|
|
|
|
if not res:
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('category_tab')}(id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,category_parent_name)
|
|
values({id_main},{data['parent_category_id']},{data['rce_source_id']},{data['rce_source_category_id']},{data['rce_source_status']},'{data['category_page_url']}','{data['category_page_url_hash']}','{data['category_name']}','{data['category_parent_name']}')
|
|
"""
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('category_tab')}(auditid,id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat,category_parent_name)
|
|
select {id_aud},id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat,category_parent_name from {self.config.get('crawler_schema')}.{self.config.get('category_tab')}
|
|
where category_name = '{data['category_name']}' and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
if str(data['parent_category_id'])==str(res[1]) and str(data['rce_source_category_id'])==str(res[3]) and str(data['category_name']) == str(res[7]) and \
|
|
str(data['category_page_url'])==str(res[5]) and str(data['category_parent_name'])==str(res[12]):
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('category_tab')} set updatedat=GETDATE()
|
|
where category_name = '{data['category_name']}' and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+" a set updatedat=b.updatedat " \
|
|
"from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('category_tab')} set parent_category_id={data['parent_category_id']}, rce_source_category_id = {data['rce_source_category_id']},
|
|
category_name = '{data['category_name']}', category_page_url = '{data['category_page_url']}', category_page_url_hash = '{data['category_page_url_hash']}', category_parent_name = '{data['category_parent_name']}',
|
|
updatedat=GETDATE() where category_name = '{data['category_name']}' and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('category_tab')+"(auditid,id,parent_category_id,rce_source_id," \
|
|
"rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash,category_name,createdat,updatedat,category_parent_name) " \
|
|
"select "+str(id_aud)+", id,parent_category_id,rce_source_id,rce_source_category_id,rce_source_status,category_page_url,category_page_url_hash," \
|
|
"category_name,createdat,updatedat,category_parent_name from "+self.config.get('crawler_schema')+"."+self.config.get('category_tab')+" " \
|
|
"where category_name = '"+ str(res[7])+"'"
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
def rce_product(self, data):
|
|
sql = f"""
|
|
select * from {self.config.get('crawler_schema')}.{self.config.get('product_tab')}
|
|
where rce_source_product_id = {data['rce_source_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('product_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('product_tab'))
|
|
|
|
if not res:
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('product_tab')}(id,rce_source_product_id,rce_source_product_status,product_page_url,
|
|
product_page_url_hash,rce_category_id,rce_brand_id,rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold,
|
|
product_price_min,product_price_min_before_discount,product_price_max,product_price_max_before_discount,ratings,product_section,
|
|
rce_source_id,countryoforigin,rank,ships_from) values({id_main},{data['rce_source_product_id']},{data['rce_source_product_status']},'{data['product_page_url']}',
|
|
'{data['product_page_url_hash']}',{data['rce_category_id']},{data['rce_brand_id']},{data['rce_store_id']},'{data['rce_source_product_name']}','{data['product_images']}','{data['product_description']}',{data['product_sold_total']},{data['product_sold']},
|
|
{data['product_price_min']},{data['product_price_min_before_discount']},{data['product_price_max']},{data['product_price_max_before_discount']},{data['ratings']},'{data['product_section']}',
|
|
{data['rce_source_id']},'{data['countryoforigin']}',{data['rank']},'{data['ships_from']}')
|
|
"""
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('product_tab')}(auditid,id,rce_source_product_id,rce_source_product_status,product_page_url,product_page_url_hash,
|
|
rce_category_id,rce_brand_id,rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold,product_price_min,product_price_min_before_discount,
|
|
product_price_max,product_price_max_before_discount,ratings,ships_from,product_section,createdat,updatedat,rce_source_id,countryoforigin,rank)
|
|
select {id_aud},id,rce_source_product_id,rce_source_product_status,product_page_url,product_page_url_hash,
|
|
rce_category_id,rce_brand_id,rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold,product_price_min,product_price_min_before_discount,
|
|
product_price_max,product_price_max_before_discount,ratings,ships_from,product_section,createdat,updatedat,rce_source_id,countryoforigin,rank from {self.config.get('crawler_schema')}.{self.config.get('product_tab')}
|
|
where rce_source_product_id = {data['rce_source_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
|
|
if str(data['rce_source_product_id'])==str(res[1]) and str(data['rce_source_product_status'])==str(res[2]) and \
|
|
str(data['product_page_url'])==str(res[3]) and str(data['product_page_url_hash'])==str(res[4]) and str(data['rce_category_id'])==str(res[5]) and \
|
|
str(data['rce_brand_id'])==str(res[6]) and str(data['rce_store_id'])==str(res[7]) and str(data['rce_source_product_name'])==str(res[8]) and \
|
|
str(data['product_images'])==str(res[9]) and str(data['product_sold_total'])==str(res[11]) and \
|
|
str(data['product_sold'])==str(res[12]) and str(data['product_price_min'])==str(res[13]) and str(data['product_price_min_before_discount'])==str(res[14]) and \
|
|
str(data['product_price_max'])==str(res[15]) and str(data['product_price_max_before_discount'])==str(res[16]) and str(data['ratings'])==str(res[17]) \
|
|
and str(data['ships_from'])==str(res[18]) and str(data['rce_source_id'])==str(res[21]) \
|
|
and str(data['product_section'])==str(res[22]) and str(data['countryoforigin'])==str(res[23])\
|
|
and str(data['rank'])==str(res[24]):
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('product_tab')} set updatedat=GETDATE()
|
|
where rce_source_product_id = {data['rce_source_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('product_tab')+" a set updatedat=b.updatedat " \
|
|
"from "+self.config.get('crawler_schema')+"."+self.config.get('product_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('product_tab')} set rce_source_product_id = {data['rce_source_product_id']}, rce_source_product_status={data['rce_source_product_status']}, product_page_url='{data['product_page_url']}',
|
|
product_page_url_hash='{data['product_page_url_hash']}', rce_category_id={data['rce_category_id']}, rce_brand_id={data['rce_brand_id']}, rce_store_id={data['rce_store_id']},
|
|
rce_source_product_name='{data['rce_source_product_name']}', product_images='{data['product_images']}', product_description='{data['product_description']}', product_sold_total={data['product_sold_total']},
|
|
product_sold={data['product_sold']}, product_price_min='{data['product_price_min']}',product_price_min_before_discount='{data['product_price_min_before_discount']}',
|
|
product_price_max='{data['product_price_max']}', product_price_max_before_discount='{data['product_price_max_before_discount']}', ratings={data['ratings']},
|
|
ships_from='{data['ships_from']}',product_section='{data['product_section']}',countryoforigin='{data['countryoforigin']}',rank={data['rank']}, updatedat=GETDATE()
|
|
where rce_source_product_id = {data['rce_source_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('product_tab')}(auditid,id,rce_source_product_id,rce_source_product_status,product_page_url,product_page_url_hash,
|
|
rce_category_id,rce_brand_id,rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold,product_price_min,product_price_min_before_discount,
|
|
product_price_max,product_price_max_before_discount,ratings,ships_from,product_section,createdat,updatedat,rce_source_id,countryoforigin,rank)
|
|
select {id_aud},id,rce_source_product_id,rce_source_product_status,product_page_url,product_page_url_hash,
|
|
rce_category_id,rce_brand_id,rce_store_id,rce_source_product_name,product_images,product_description,product_sold_total,product_sold,product_price_min,product_price_min_before_discount,
|
|
product_price_max,product_price_max_before_discount,ratings,ships_from,product_section,createdat,updatedat,rce_source_id,countryoforigin,rank from {self.config.get('crawler_schema')}.{self.config.get('product_tab')}
|
|
where rce_source_product_id = {data['rce_source_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
|
|
def rce_product_variant(self, data):
|
|
sql = f"""
|
|
select * from {self.config.get('crawler_schema')}.{self.config.get('variant_tab')} where
|
|
rce_source_variant_id = {data['rce_source_variant_id']} and rce_product_id = {data['rce_product_id']}
|
|
"""
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('variant_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('variant_tab'))
|
|
|
|
if not res:
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('variant_tab')}(id,rce_source_variant_id,rce_product_id,product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,product_variant_sku)
|
|
values({id_main},{data['rce_source_variant_id']},{data['rce_product_id']},'{data['product_variant_name']}',{data['product_variant_price']},{data['product_variant_price_before_discount']},{data['product_variant_stock']},'{data['product_variant_sku']}')
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('variant_tab')}(auditid,id,rce_source_variant_id,rce_product_id,product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,product_variant_sku,createdat,updatedat)
|
|
select {id_aud},id,rce_source_variant_id,rce_product_id,product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,product_variant_sku,createdat,updatedat
|
|
from {self.config.get('crawler_schema')}.{self.config.get('variant_tab')} where rce_source_variant_id = {data['rce_source_variant_id']} and rce_product_id = {data['rce_product_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
if str(data['rce_source_variant_id'])==str(res[1]) and str(data['rce_product_id'])==str(res[2]) and str(data['product_variant_name'])==str(res[3]) and \
|
|
str(data['product_variant_price'])==str(res[4]) and str(data['product_variant_price_before_discount'])==str(res[5]) and str(data['product_variant_stock'])==str(res[6])\
|
|
and str(data['product_variant_sku'])==str(res[9]):
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('variant_tab')} set updatedat=GETDATE()
|
|
where rce_source_variant_id = {data['rce_source_variant_id']} and rce_product_id = {data['rce_product_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.aud_{self.config.get('variant_tab')} a set updatedat=b.updatedat
|
|
from {self.config.get('crawler_schema')}.{self.config.get('variant_tab')} b where a.id=b.id and b.id = {res[0]}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('variant_tab')} set rce_source_variant_id={data['rce_source_variant_id']},
|
|
rce_product_id={data['rce_product_id']},product_variant_name='{data['product_variant_name']}',product_variant_price={data['product_variant_price']},
|
|
product_variant_price_before_discount={data['product_variant_price_before_discount']},product_variant_stock={data['product_variant_stock']},
|
|
product_variant_sku={data['product_variant_sku']}, updatedat=GETDATE()
|
|
where rce_source_variant_id = {data['rce_source_variant_id']} and rce_product_id = {data['rce_product_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('variant_tab')}(auditid,id,rce_source_variant_id,rce_product_id,product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,product_variant_sku,createdat,updatedat)
|
|
select {id_aud},id,rce_source_variant_id,rce_product_id,product_variant_name,product_variant_price,product_variant_price_before_discount,product_variant_stock,product_variant_sku,createdat,updatedat
|
|
from {self.config.get('crawler_schema')}.{self.config.get('variant_tab')} where rce_source_variant_id = {data['rce_source_variant_id']} and rce_product_id = {data['rce_product_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
|
|
def rce_brand(self, data):
|
|
sql = f"""
|
|
select * from {self.config.get('crawler_schema')}.{self.config.get('brand_tab')} where rce_source_brand_id = {data['rce_source_brand_id']}
|
|
and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('brand_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('brand_tab'))
|
|
|
|
if not res:
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('brand_tab')}(id,rce_source_id,rce_source_brand_id,rce_source_brand_status,brand_page_url,brand_page_url_hash,brand_name,brand_following,brand_rating)
|
|
values({id_main},{data['rce_source_id']},{data['rce_source_brand_id']},{data['rce_source_brand_status']},'{data['brand_page_url']}','{data['brand_page_url_hash']}','{data['brand_name']}',{data['brand_following']},{data['brand_rating']})
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('brand_tab')}(auditid,id,rce_source_id,rce_source_brand_id,rce_source_brand_status,brand_page_url,brand_page_url_hash,brand_name,brand_following,brand_rating,createdat,updatedat)
|
|
select {id_aud}, id,rce_source_id,rce_source_brand_id,rce_source_brand_status,brand_page_url,brand_page_url_hash,brand_name,brand_following,brand_rating,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('brand_tab')}
|
|
where rce_source_brand_id={data['rce_source_brand_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
|
|
if str(data['rce_source_id'])==str(res[1]) and str(data['rce_source_brand_status'])==str(res[3]) and str(data['brand_page_url'])==str(res[4]) and \
|
|
str(data['brand_page_url_hash'])==str(res[5]) and str(data['brand_name'])==str(res[6]) and str(data['rce_source_brand_id'])==str(res[2]):
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('brand_tab')} set updatedat=GETDATE() where rce_source_brand_id={data['rce_source_brand_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.aud_{self.config.get('brand_tab')} a set updatedat=b.updatedat
|
|
from {self.config.get('crawler_schema')}.{self.config.get('brand_tab')} b where a.id=b.id and b.id = {res[0]} and
|
|
b.rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('brand_tab')} set rce_source_id={data['rce_source_id']}, rce_source_brand_id={data['rce_source_brand_id']},
|
|
rce_source_brand_status={data['rce_source_brand_status']}, brand_page_url='{data['brand_page_url']}', brand_page_url_hash='{data['brand_page_url_hash']}',
|
|
brand_name='{data['brand_name']}', brand_following={data['brand_following']}, brand_rating={data['brand_rating']}, updatedat=GETDATE() where rce_source_brand_id={data['rce_source_brand_id']}
|
|
and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('brand_tab')}(auditid,id,rce_source_id,rce_source_brand_id,rce_source_brand_status,brand_page_url,brand_page_url_hash,brand_name,brand_following,brand_rating,createdat,updatedat)
|
|
select {id_aud}, id,rce_source_id,rce_source_brand_id,rce_source_brand_status,brand_page_url,brand_page_url_hash,brand_name,brand_following,brand_rating,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('brand_tab')}
|
|
where rce_source_brand_id={data['rce_source_brand_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
def rce_reseller(self, data):
|
|
data['reseller_name'] = data['reseller_name']
|
|
|
|
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where reseller_name = '"+str(data['reseller_name'])+"'"
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('reseller_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('reseller_tab'))
|
|
|
|
|
|
if not res:
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('reseller_tab')}(id,rce_source_id,rce_source_reseller_status,reseller_name)
|
|
values({id_main},'{data['rce_source_id']}','{data['rce_source_reseller_status']}','{data['reseller_name']}')
|
|
"""
|
|
#logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('reseller_tab')}(auditid,id,rce_source_id,rce_source_reseller_status,reseller_name,createdat,updatedat)
|
|
select {id_aud}, id,rce_source_id,rce_source_reseller_status,reseller_name,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('reseller_tab')}
|
|
where reseller_name='{data['reseller_name']}'
|
|
"""
|
|
#logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
|
|
if str(data['rce_source_reseller_status'])==str(res[3]) and str(data['reseller_name'])==str(res[4]):
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" set updatedat=GETDATE() " \
|
|
"where reseller_name = '"+ str(res[4])+"'"
|
|
#logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" a set updatedat=b.updatedat " \
|
|
"from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
#logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" set rce_source_id="+str(data['rce_source_id'])+", " \
|
|
"rce_source_reseller_status="+str(data['rce_source_reseller_status'])+", reseller_name='"+str(data['reseller_name'])+"', reseller_average_rating=" \
|
|
"'"+str(data['reseller_average_rating'])+"',reseller_description='"+str(data['reseller_description'])+"', updatedat=GETDATE() where reseller_name = '"+ str(res[4])+"'"
|
|
#logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_tab')+" (auditid,id,rce_source_id,rce_source_reseller_status," \
|
|
"reseller_name,reseller_average_rating,reseller_description,createdat,updatedat) select "+str(id_aud)+", id,rce_source_id,rce_source_reseller_status," \
|
|
"reseller_name,reseller_average_rating,reseller_description,createdat,updatedat from " \
|
|
""+self.config.get('crawler_schema')+"."+self.config.get('reseller_tab')+" where reseller_name='"+str(res[4])+"'"
|
|
#logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
def rce_reseller_store(self, data):
|
|
|
|
data['store_page_url'] = data['store_page_url'].replace("'","''")
|
|
|
|
sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where store_page_url = '"+str(data['store_page_url'])+"'"
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('reseller_store_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('reseller_store_tab'))
|
|
|
|
if not res:
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('reseller_store_tab')}(id,rce_source_store_status,store_page_url,store_page_url_hash,rce_reseller_id,rce_source_id)
|
|
values({id_main},'{data['rce_source_store_status']}','{data['store_page_url']}','{data['store_page_url_hash']}',{data['rce_reseller_id']},{data['rce_source_id']})
|
|
"""
|
|
#logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('reseller_store_tab')}(auditid,id,rce_source_store_status,store_page_url,store_page_url_hash,rce_reseller_id,createdat,updatedat,rce_source_id)
|
|
select {id_aud}, id,rce_source_store_status,store_page_url,store_page_url_hash,rce_reseller_id,createdat,updatedat,rce_source_id from {self.config.get('crawler_schema')}.{self.config.get('reseller_store_tab')}
|
|
where store_page_url= '{data['store_page_url']}'
|
|
"""
|
|
#logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
|
|
if str(data['rce_source_store_status'])==str(res[2]) and str(data['store_page_url'])==str(res[3]) and \
|
|
str(data['store_page_url_hash'])==str(res[4]) and \
|
|
str(data['rce_reseller_id'])==str(res[6]) and str(data['rce_source_id'])==str(res[9]):
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" set updatedat=GETDATE() " \
|
|
"where store_page_url = '"+ str(res[3])+"'"
|
|
#logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" a set updatedat=b.updatedat " \
|
|
"from "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
#logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" set " \
|
|
"rce_source_store_status="+str(data['rce_source_store_status'])+", store_page_url='"+str(data['store_page_url'])+"', store_page_url_hash=" \
|
|
"'"+str(data['store_page_url_hash'])+"',store_location='"+str(data['store_location'])+"', rce_reseller_id="+str(data['rce_reseller_id'])+", " \
|
|
"updatedat=GETDATE(), rce_source_id="+str(data['rce_source_id'])+" where store_page_url = '"+ str(res[3])+"'"
|
|
#logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('reseller_store_tab')+" (auditid,id,rce_source_store_status," \
|
|
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id) select "+id_aud+", id,rce_source_store_status," \
|
|
"store_page_url,store_page_url_hash,store_location,rce_reseller_id,createdat,updatedat,rce_source_id from " \
|
|
""+self.config.get('crawler_schema')+"."+self.config.get('reseller_store_tab')+" where store_page_url='"+str(res[3])+"'"
|
|
#logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
def rce_ratings_reviews(self, data):
|
|
|
|
sql = f"""
|
|
select * from {self.config.get('crawler_schema')}.{self.config.get('review_tab')}
|
|
where rce_product_id = {data['rce_product_id']} and username = '{data['username']}'
|
|
"""
|
|
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
data['username'] = data['username'].replace("'","")
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('review_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('review_tab'))
|
|
|
|
if not res:
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('review_tab')}(id,rce_product_id,username,review,img_url,review_like_count,user_tier,shop_id,video_url,rating)
|
|
values({id_main},{data['rce_product_id']},'{data['username']}','{data['review']}','{data['img_url']}',{data['review_like_count']},'{data['user_tier']}',{data['shop_id']},'{data['video_url']}',{data['rating']})
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('review_tab')}(auditid,id,rce_product_id,username,review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat)
|
|
select {id_aud},id,rce_product_id,username,review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('review_tab')}
|
|
where rce_product_id = {data['rce_product_id']} and username = '{data['username']}'
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
|
|
if str(data['rce_product_id'])==str(res[1]) and str(data['username'])==str(res[2]) and str(data['review'])==str(res[3]) and \
|
|
str(data['img_url'])==str(res[4]) and str(data['review_like_count'])==str(res[5]) and str(data['user_tier'])==str(res[6]) and \
|
|
str(data['shop_id'])==str(res[7]) and str(data['video_url'])==str(res[8]) and str(data['rating'])==str(res[9]):
|
|
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" set updatedat=GETDATE() " \
|
|
"where rce_product_id = "+ str(res[1])+" and username ='"+res[2]+"'"
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tab')+" a set updatedat=b.updatedat " \
|
|
"from "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
|
|
sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tab')+" set rce_product_id="+str(data['rce_product_id'])+", " \
|
|
"username='"+str(data['username'])+"', review='"+str(data['review'])+"', img_url=" \
|
|
"'"+str(data['img_url'])+"',review_like_count="+str(data['review_like_count'])+", user_tier='"+str(data['user_tier'])+"', " \
|
|
"shop_id="+str(data['shop_id'])+", video_url='"+str(data['video_url'])+"', rating='"+str(data['rating'])+"', updatedat=GETDATE() " \
|
|
"where rce_product_id = "+ str(res[1])+" and username ='"+str(data['username'])+"'"
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('review_tab')}(auditid,id,rce_product_id,username,review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat)
|
|
select {id_aud},id,rce_product_id,username,review,img_url,review_like_count,user_tier,shop_id,video_url,rating,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('review_tab')}
|
|
where rce_product_id = {data['rce_product_id']} and username = '{data['username']}'
|
|
"""
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
def rce_seo(self, data):
|
|
sql = f"""
|
|
select * from {self.config.get('crawler_schema')}.{self.config.get('seo_tab')}
|
|
where rce_product_id = {data['rce_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
self.cur.execute(sql)
|
|
res = self.cur.fetchone()
|
|
|
|
id_main = self.get_id(self.config.get('crawler_schema'), self.config.get('seo_tab'))
|
|
id_aud = self.get_aud_id(self.config.get('crawler_schema'), "aud_" + self.config.get('seo_tab'))
|
|
|
|
if not res:
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.{self.config.get('seo_tab')}(id,rce_product_id,rce_source_id,seo_title,seo_description,seo_url,seo_url_hash,seo_image,seo_price_amount,seo_price_currency,seo_product_band,seo_product_availability,seo_product_category,seo_product_condition,seo_product_retailer_item_id,seo_product_robots)
|
|
values({id_main},{data['rce_product_id']},{data['rce_source_id']},'{data['seo_title']}','{data['seo_description']}','{data['seo_url']}','{data['seo_url_hash']}','{data['seo_image']}',{data['seo_price_amount']},'{data['seo_price_currency']}','{data['seo_product_band']}','{data['seo_product_availability']}','{data['seo_product_category']}',
|
|
'{data['seo_product_condition']}',{data['seo_product_retailer_item_id']},'{data['seo_product_robots']}')
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('seo_tab')}(auditid,id,rce_product_id,rce_source_id,seo_title,seo_description,seo_url,seo_url_hash,seo_image,seo_price_amount,seo_price_currency,seo_product_band,seo_product_availability,seo_product_category,seo_product_condition,seo_product_retailer_item_id,seo_product_robots,createdat,updatedat)
|
|
select {id_aud},id,rce_product_id,rce_source_id,seo_title,seo_description,seo_url,seo_url_hash,seo_image,seo_price_amount,seo_price_currency,seo_product_band,seo_product_availability,seo_product_category,seo_product_condition,seo_product_retailer_item_id,seo_product_robots,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('seo_tab')}
|
|
where rce_product_id = {data['rce_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
else:
|
|
|
|
if (str(data['rce_product_id']) == str(res[1]) and str(data['rce_source_id']) == str(res[2]) and str(data['seo_title']) == str(res[3]) and \
|
|
str(data['seo_description']) == str(res[4]) and str(data['seo_url']) == str(res[5]) and str(data['seo_url_hash']) == str(res[6]) and \
|
|
str(data['seo_image']) == str(res[7]) and str(data['seo_price_amount']) == str(res[8]) and str(data['seo_price_currency']) == str(res[9]) and \
|
|
str(data['seo_product_band']) == str(res[10])) and str(data['seo_product_availability']) == str(res[11]) and str(data['seo_product_category']) == str(res[12]) and \
|
|
str(data['seo_product_condition']) == str(res[13]) and str(data['seo_product_retailer_item_id']) == str(res[14]) and str(data['seo_product_robots']) == str(res[15]):
|
|
|
|
sql = "update " + self.config.get('crawler_schema') + "." + self.config.get('seo_tab') + " set updatedat=GETDATE() " \
|
|
"where rce_product_id = " + str(res[1]) + " and rce_source_id =" + str(data['rce_source_id'])
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = "update " + self.config.get('crawler_schema') + ".aud_" + self.config.get('seo_tab') + " a set updatedat=b.updatedat " \
|
|
"from " + self.config.get('crawler_schema') + "." + self.config.get('seo_tab') + " b where a.id=b.id and b.id = " + str(res[0])
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
else:
|
|
|
|
sql = f"""
|
|
update {self.config.get('crawler_schema')}.{self.config.get('seo_tab')} set rce_product_id={data['rce_product_id']}, rce_source_id={data['rce_source_id']}, seo_title='{data['seo_title']}', seo_description='{data['seo_description']}',
|
|
seo_url='{data['seo_url']}', seo_url_hash='{data['seo_url_hash']}', seo_image='{data['seo_image']}', seo_price_amount='{data['seo_price_amount']}', seo_price_currency='{data['seo_price_currency']}', seo_product_band='{data['seo_product_band']}',
|
|
seo_product_availability='{data['seo_product_availability']}', seo_product_category='{data['seo_product_category']}', seo_product_condition='{data['seo_product_condition']}', seo_product_retailer_item_id={data['seo_product_retailer_item_id']},
|
|
seo_product_robots='{data['seo_product_robots']}' where rce_product_id = {data['rce_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
|
|
logging.info(sql)
|
|
self.cur.execute(sql)
|
|
|
|
sql = f"""
|
|
insert into {self.config.get('crawler_schema')}.aud_{self.config.get('seo_tab')}(auditid,id,rce_product_id,rce_source_id,seo_title,seo_description,seo_url,seo_url_hash,seo_image,seo_price_amount,seo_price_currency,seo_product_band,seo_product_availability,seo_product_category,seo_product_condition,seo_product_retailer_item_id,seo_product_robots,createdat,updatedat)
|
|
select {id_aud},id,rce_product_id,rce_source_id,seo_title,seo_description,seo_url,seo_url_hash,seo_image,seo_price_amount,seo_price_currency,seo_product_band,seo_product_availability,seo_product_category,seo_product_condition,seo_product_retailer_item_id,seo_product_robots,createdat,updatedat from {self.config.get('crawler_schema')}.{self.config.get('seo_tab')}
|
|
where rce_product_id = {data['rce_product_id']} and rce_source_id = {data['rce_source_id']}
|
|
"""
|
|
logging.info(sql)
|
|
|
|
self.cur.execute(sql)
|
|
|
|
|
|
|
|
# def rce_ratings_reviews_productmodels(self,data):
|
|
#
|
|
# sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id = "+str(data['rce_rating_id'])
|
|
# self.cur.execute(sql)
|
|
# res = self.cur.fetchone()
|
|
#
|
|
#
|
|
# if not res:
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" (rce_rating_id,model_id) " \
|
|
# "values("+str(data['rce_rating_id'])+",'"+str(data['model_id'])+"')"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \
|
|
# "createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \
|
|
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(data['rce_rating_id'])+""
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
# else:
|
|
#
|
|
# if str(data['rce_rating_id'])==str(res[1]) and str(data['model_id'])==str(res[2]):
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set updatedat=GETDATE() " \
|
|
# "where rce_rating_id = "+ str(res[1])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" a set updatedat=b.updatedat " \
|
|
# "from "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
# else:
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" set model_id="+str(data['model_id'])+", " \
|
|
# "updatedat=GETDATE() where rce_source_store_id = "+ str(res[1])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_productmodels_tab')+" (id,rce_rating_id,model_id," \
|
|
# "createdat,updatedat) select id,rce_rating_id,model_id,createdat,updatedat from " \
|
|
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_productmodels_tab')+" where rce_rating_id="+str(res[1])+""
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
#
|
|
# def rce_tags(self,data):
|
|
#
|
|
# sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description = '"+str(data['description'])+"'"
|
|
# self.cur.execute(sql)
|
|
# res = self.cur.fetchone()
|
|
#
|
|
#
|
|
# if not res:
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" (id,description) " \
|
|
# "values("+str(data['id'])+",'"+str(data['description'])+"')"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \
|
|
# "createdat,updatedat) select id,description,createdat,updatedat from " \
|
|
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(data['description'])+"'"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
# else:
|
|
#
|
|
# if str(data['description'])==str(res[1]):
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set updatedat=GETDATE() " \
|
|
# "where description = '"+ str(res[1])+"'"
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" a set updatedat=b.updatedat " \
|
|
# "from "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
# else:
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" set description='"+str(data['description'])+"', " \
|
|
# "updatedat=GETDATE() where description = "+ str(res[1])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_tags_tab')+" (id,description," \
|
|
# "createdat,updatedat) select id,description,createdat,updatedat from " \
|
|
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_tags_tab')+" where description='"+str(res[1])+"'"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
#
|
|
# def rce_ratings_reviews_producttags(self,data):
|
|
#
|
|
# sql = "select * from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id = '"+str(data['rce_rating_id'])+"'"
|
|
# self.cur.execute(sql)
|
|
# res = self.cur.fetchone()
|
|
#
|
|
#
|
|
# if not res:
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" (rce_rating_id,tag_ids) " \
|
|
# "values("+str(data['rce_rating_id'])+",'"+str(data['tag_ids'])+"')"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \
|
|
# "createdat,updatedat) select id,rce_rating_id,tag_ids,createdat,updatedat from " \
|
|
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where rce_rating_id='"+str(data['rce_rating_id'])+"'"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
# else:
|
|
#
|
|
# if str(data['rce_rating_id'])==str(res[1]):
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set updatedat=GETDATE() " \
|
|
# "where rce_rating_id = '"+ str(res[1])+"'"
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" a set updatedat=b.updatedat " \
|
|
# "from "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" b where a.id=b.id and b.id = "+str(res[0])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
# else:
|
|
#
|
|
# sql = "update "+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" set rce_rating_id='"+str(data['rce_rating_id'])+"', " \
|
|
# "updatedat=GETDATE() where rce_rating_id = "+ str(res[1])
|
|
# #logging.info(sql)
|
|
# self.cur.execute(sql)
|
|
#
|
|
# sql = "insert into "+self.config.get('crawler_schema')+".aud_"+self.config.get('review_producttags_tab')+" (id,rce_rating_id,tag_ids," \
|
|
# "createdat,updatedat) select id,description,createdat,updatedat from " \
|
|
# ""+self.config.get('crawler_schema')+"."+self.config.get('review_producttags_tab')+" where description='"+str(res[1])+"'"
|
|
# #logging.info(sql)
|
|
#
|
|
# self.cur.execute(sql)
|
|
#
|
|
#
|
|
|