raena_analytics_scripts/postgresql/revenue_leakage/revenue_leakage_etl.sh

640 lines
23 KiB
Bash
Raw Normal View History

2024-06-24 12:26:08 +00:00
#!/bin/bash
echo -e " \n----------- ACCEPTING NUMBER OF DAYS BEFORE THE RUN DATE FOR WHICH THE REPORT IS TO BE RUN --------------\n"
backDay=$1
echo $backDay
echo -e " \n------------- DATE IN THE REQUIRED FORMAT --------------\n"
reportDate=$(date -d"$backDay day ago" "+%Y-%m-%d")
date
echo 'reportDate'=$reportDate
echo "
DROP TABLE IF EXISTS raena_analytics.revenue_leakage_new_stage1;
CREATE TABLE raena_analytics.revenue_leakage_new_stage1 AS
SELECT order_placed_by,
reseller_id,
customer_id,
sku,
payment_status,
created_at,
payment_amount,
payment_status_sort,
quantity
FROM
(SELECT *,
row_number() over (partition BY reseller_id,sku,payment_status,cast(created_at AS date)
ORDER BY created_at,payment_status_sort,payment_amount DESC) AS R
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
o.created_at,
oi.payment_price AS payment_amount,
CASE WHEN payment_status='Paid' THEN 1 WHEN payment_status='Expired' THEN 2 ELSE 3 END AS payment_status_sort,
oi.quantity
FROM RAENA_ORDER_MANAGEMENT.order o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status IN ('Expired',
'Failed')
AND o.status='Order_Placed'
AND cast(o.created_at AS date) >='2022-01-01') inn) m
WHERE R=1 ;
DROP TABLE IF EXISTS raena_analytics.revenue_leakage_new_stage2;
CREATE TABLE raena_analytics.revenue_leakage_new_stage2 AS
SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
o.created_at,
oi.quantity,
oi.payment_price AS payment_amount
FROM
(SELECT id,
order_placed_by ,
reseller_id,
customer_id,
payment_status,
created_at
FROM RAENA_ORDER_MANAGEMENT.order
WHERE payment_status='Paid'
AND cast(created_at AS date) >='2022-01-01') o
INNER JOIN
(SELECT order_id,
sku,
quantity,
payment_price
FROM RAENA_ORDER_MANAGEMENT.order_item
WHERE cast(created_at AS date) >='2022-01-01') oi ON o.id=oi.order_id;
DROP TABLE IF EXISTS raena_analytics.om_revenue_leakage_qty_match;
CREATE TABLE raena_analytics.om_revenue_leakage_qty_match AS
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_sku,
a_payment_status,
a_created_at,
(a_payment_amount) AS revenue_leakage,
a_order_placed_by || a_reseller_id || a_sku || a_payment_status || cast(a_created_at AS date) AS concat1
FROM
(SELECT a.order_placed_by AS a_order_placed_by,
a.reseller_id AS a_reseller_id,
a.customer_id AS a_customer_id,
a.sku AS a_sku,
a.payment_status AS a_payment_status,
a.created_at AS a_created_at,
a.quantity AS a_quantity,
a.payment_amount AS a_payment_amount,
b.order_placed_by AS b_order_placed_by,
b.reseller_id AS b_reseller_id,
b.customer_id AS b_customer_id,
b.sku AS b_sku,
b.payment_status AS b_payment_status,
b.created_at AS b_created_at,
b.quantity AS b_quantity,
b.payment_amount AS b_payment_amount
FROM raena_analytics.revenue_leakage_new_stage1 a
LEFT JOIN raena_analytics.revenue_leakage_new_stage2 b ON a.order_placed_by=b.order_placed_by
AND a.reseller_id=b.reseller_id
AND a.sku=b.sku
AND a.quantity=b.quantity
AND a.customer_id IS NULL
AND b.customer_id IS NULL
AND b.created_at BETWEEN a.created_at AND a.created_at + INTERVAL '24 HOURS'
WHERE a.customer_id IS NULL) c
WHERE b_reseller_id IS NULL
UNION
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_sku,
a_payment_status,
a_created_at,
(a_payment_amount) AS payment_amount,
a_order_placed_by || a_reseller_id || a_customer_id || a_sku || a_payment_status || cast(a_created_at AS date)
FROM
(SELECT a.order_placed_by AS a_order_placed_by,
a.reseller_id AS a_reseller_id,
a.customer_id AS a_customer_id,
a.sku AS a_sku,
a.payment_status AS a_payment_status,
a.created_at AS a_created_at,
a.quantity AS a_quantity,
a.payment_amount AS a_payment_amount,
b.order_placed_by AS b_order_placed_by,
b.reseller_id AS b_reseller_id,
b.customer_id AS b_customer_id,
b.sku AS b_sku,
b.payment_status AS b_payment_status,
b.created_at AS b_created_at,
b.quantity AS b_quantity,
b.payment_amount AS b_payment_amount
FROM raena_analytics.revenue_leakage_new_stage1 a
LEFT JOIN raena_analytics.revenue_leakage_new_stage2 b ON a.order_placed_by=b.order_placed_by
AND a.reseller_id=b.reseller_id
AND a.sku=b.sku
AND a.quantity=b.quantity
AND a.customer_id = b.customer_id
AND b.created_at BETWEEN a.created_at AND a.created_at + INTERVAL '24 HOURS'
WHERE a.customer_id IS NOT NULL) c
WHERE b_reseller_id IS NULL;
DROP TABLE IF EXISTS raena_analytics.revenue_leakage_new_stage3;
CREATE TABLE raena_analytics.revenue_leakage_new_stage3 AS
SELECT ist.order_placed_by,
ist.reseller_id,
ist.customer_id,
ist.sku,
ist.payment_status,
nxt.created_at,
ist.quantity,
ist.payment_price
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
cast(o.created_at AS date) AS created_at,
sum(oi.quantity) AS quantity,
sum(oi.payment_price) AS payment_price
FROM RAENA_ORDER_MANAGEMENT.order o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status='Paid'
AND cast(o.created_at AS date) >='2022-01-01'
AND customer_id IS NOT NULL
GROUP BY 1,
2,
3,
4,
5,
6) ist
INNER JOIN
(SELECT order_placed_by,
reseller_id,
customer_id,
sku,
payment_status,
created_at
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
o.created_at,
row_number() over (partition BY o.order_placed_by,o.reseller_id,o.customer_id,sku,cast(o.created_at AS date)
ORDER BY o.created_at DESC) AS R
FROM RAENA_ORDER_MANAGEMENT.order o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status='Paid'
AND cast(o.created_at AS date) >='2022-01-01'
AND customer_id IS NOT NULL) a
WHERE a.R=1) nxt ON ist.order_placed_by=nxt.order_placed_by
AND ist.reseller_id=nxt.reseller_id
AND ist.customer_id=nxt.customer_id
AND ist.sku=nxt.sku
AND ist.created_at=cast(nxt.created_at AS date);
DROP TABLE IF EXISTS public.om_revenue_leakage_qty_not_match;
CREATE TABLE public.om_revenue_leakage_qty_not_match AS
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_sku,
a_payment_status,
a_created_at,
(a_payment_amount)-(b_payment_amount) AS revenue_leakage,
a_order_placed_by || a_reseller_id || a_sku || a_payment_status || cast(a_created_at AS date) AS concat2
FROM
(SELECT a.order_placed_by AS a_order_placed_by,
a.reseller_id AS a_reseller_id,
a.customer_id AS a_customer_id,
a.sku AS a_sku,
a.payment_status AS a_payment_status,
a.created_at AS a_created_at,
a.quantity AS a_quantity,
a.payment_amount AS a_payment_amount,
b.order_placed_by AS b_order_placed_by,
b.reseller_id AS b_reseller_id,
b.customer_id AS b_customer_id,
b.sku AS b_sku,
b.payment_status AS b_payment_status,
b.created_at AS b_created_at,
b.quantity AS b_quantity,
b.payment_price AS b_payment_amount
FROM raena_analytics.revenue_leakage_new_stage1 a
INNER JOIN raena_analytics.revenue_leakage_new_stage3 b ON a.order_placed_by=b.order_placed_by
AND a.reseller_id=b.reseller_id
AND a.sku=b.sku
AND b.quantity < a.quantity
AND a.payment_amount>b.payment_price
AND a.customer_id ISNULL
AND b.customer_id ISNULL
AND b.created_at BETWEEN a.created_at AND a.created_at + INTERVAL '24 HOURS') c
UNION
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_sku,
a_payment_status,
a_created_at,
(a_payment_amount)-(b_payment_amount) AS revenue_leakage,
a_order_placed_by || a_reseller_id || a_customer_id || a_sku || a_payment_status || cast(a_created_at AS date)
FROM
(SELECT a.order_placed_by AS a_order_placed_by,
a.reseller_id AS a_reseller_id,
a.customer_id AS a_customer_id,
a.sku AS a_sku,
a.payment_status AS a_payment_status,
a.created_at AS a_created_at,
a.quantity AS a_quantity,
a.payment_amount AS a_payment_amount,
b.order_placed_by AS b_order_placed_by,
b.reseller_id AS b_reseller_id,
b.customer_id AS b_customer_id,
b.sku AS b_sku,
b.payment_status AS b_payment_status,
b.created_at AS b_created_at,
b.quantity AS b_quantity,
b.payment_price AS b_payment_amount
FROM raena_analytics.revenue_leakage_new_stage1 a
INNER JOIN raena_analytics.revenue_leakage_new_stage3 b ON a.order_placed_by=b.order_placed_by
AND a.reseller_id=b.reseller_id
AND a.sku=b.sku
AND b.quantity < a.quantity
AND a.payment_amount>b.payment_price
AND a.customer_id = b.customer_id
AND b.created_at BETWEEN a.created_at AND a.created_at + INTERVAL '24 HOURS') c
ORDER BY 2,
4;
drop table if exists raena_analytics.revenue_leakage_new_stage4;
create table raena_analytics.revenue_leakage_new_stage4
as
SELECT order_placed_by,
reseller_id,
customer_id,
sku,
payment_status,
created_at,
payment_price,
payment_status_sort,
quantity
FROM
(SELECT *,
row_number() over (partition BY reseller_id,sku,payment_status,cast(created_at AS date)
ORDER BY created_at,payment_status_sort,payment_price DESC) AS R
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
o.created_at,
oi.payment_price,
CASE WHEN payment_status='Paid' THEN 1 WHEN payment_status='Expired' THEN 2 ELSE 3 END AS payment_status_sort,
oi.quantity
FROM RAENA_ORDER_MANAGEMENT.
ORDER o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status!='Paid'
AND payment_status!='Pending'
AND o.status='Order_Placed'
AND cast(o.created_at AS date) >= '2022-01-01') inn) m
WHERE R=1;
drop table if exists raena_analytics.revenue_leakage_new_stage5;
create table raena_analytics.revenue_leakage_new_stage5
as
SELECT ist.order_placed_by,
ist.reseller_id,
ist.customer_id,
ist.sku,
ist.payment_status,
nxt.created_at,
ist.quantity,
ist.payment_price
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
cast(o.created_at AS date) AS created_at,
sum(oi.quantity) AS quantity,
sum(oi.payment_price) AS payment_price
FROM RAENA_ORDER_MANAGEMENT.
ORDER o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status='Paid'
AND cast(o.created_at AS date) >= '2022-01-01'
AND customer_id ISNULL
GROUP BY 1,
2,
3,
4,
5,
6) ist
INNER JOIN
(SELECT order_placed_by,
reseller_id,
customer_id,
sku,
payment_status,
created_at
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
o.created_at,
row_number() over (partition BY o.order_placed_by,o.reseller_id,o.customer_id,sku,cast(o.created_at AS date)
ORDER BY o.created_at DESC) AS R
FROM RAENA_ORDER_MANAGEMENT.
ORDER o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status='Paid'
AND cast(o.created_at AS date) >= '2022-01-01'
AND customer_id ISNULL) a
WHERE a.R=1) nxt ON ist.order_placed_by=nxt.order_placed_by
AND ist.reseller_id=nxt.reseller_id
AND ist.sku=nxt.sku
AND ist.created_at=cast(nxt.created_at AS date);
drop table raena_analytics.revenue_leakage_new_stage6;
create table raena_analytics.revenue_leakage_new_stage6
as
SELECT ist.order_placed_by,
ist.reseller_id,
ist.customer_id,
ist.sku,
ist.payment_status,
nxt.created_at,
ist.quantity,
ist.payment_price
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
cast(o.created_at AS date) AS created_at,
sum(oi.quantity) AS quantity,
sum(oi.payment_price) AS payment_price
FROM RAENA_ORDER_MANAGEMENT.
ORDER o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status='Paid'
AND cast(o.created_at AS date) >= '2022-01-01'
AND customer_id NOTNULL
GROUP BY 1,
2,
3,
4,
5,
6) ist
INNER JOIN
(SELECT order_placed_by,
reseller_id,
customer_id,
sku,
payment_status,
created_at
FROM
(SELECT DISTINCT o.order_placed_by,
o.reseller_id,
o.customer_id,
sku,
payment_status,
o.created_at,
row_number() over (partition BY o.order_placed_by,o.reseller_id,o.customer_id,sku,cast(o.created_at AS date)
ORDER BY o.created_at DESC) AS R
FROM RAENA_ORDER_MANAGEMENT.
ORDER o
INNER JOIN RAENA_ORDER_MANAGEMENT.order_item oi ON o.id=oi.order_id
WHERE o.payment_status='Paid'
AND cast(o.created_at AS date) >= '2022-01-01'
AND customer_id NOTNULL) a
WHERE a.R=1) nxt ON ist.order_placed_by=nxt.order_placed_by
AND ist.reseller_id=nxt.reseller_id
AND ist.customer_id=nxt.customer_id
AND ist.sku=nxt.sku
AND ist.created_at=cast(nxt.created_at AS date);
DROP TABLE IF EXISTS raena_analytics.OM_Revenue_leakage;
CREATE TABLE raena_analytics.OM_Revenue_leakage AS
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_payment_status,
a_created_at,
sum(revenue_leakage) AS revenue_leakage
FROM
(SELECT *
FROM
(SELECT qm.*,
qnm.concat2
FROM raena_analytics.om_revenue_leakage_qty_match qm
LEFT JOIN raena_analytics.om_revenue_leakage_qty_not_match qnm ON qm.concat1=qnm.concat2) n
WHERE concat2 ISNULL) s
GROUP BY 1,
2,
3,
4,
5
UNION
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_payment_status,
a_created_at,
sum(a_payment_amount)-sum(b_payment_amount) AS revenue_leakage
FROM
(SELECT a.order_placed_by AS a_order_placed_by,
a.reseller_id AS a_reseller_id,
a.customer_id AS a_customer_id,
a.sku AS a_sku,
a.payment_status AS a_payment_status,
a.created_at AS a_created_at,
a.quantity AS a_quantity,
a.payment_price AS a_payment_amount,
b.order_placed_by AS b_order_placed_by,
b.reseller_id AS b_reseller_id,
b.customer_id AS b_customer_id,
b.sku AS b_sku,
b.payment_status AS b_payment_status,
b.created_at AS b_created_at,
b.quantity AS b_quantity,
b.payment_price AS b_payment_amount
from raena_analytics.revenue_leakage_new_stage4 a
INNER join raena_analytics.revenue_leakage_new_stage5 b ON a.order_placed_by=b.order_placed_by
AND a.reseller_id=b.reseller_id
AND a.sku=b.sku
AND b.quantity < a.quantity
AND a.payment_price>b.payment_price
AND a.customer_id ISNULL
AND b.customer_id ISNULL
AND b.created_at BETWEEN a.created_at AND a.created_at + INTERVAL '24 HOURS') c
GROUP BY 1,
2,
3,
4,
5
UNION
SELECT DISTINCT a_order_placed_by,
a_reseller_id,
a_customer_id,
a_payment_status,
a_created_at,
sum(a_payment_amount)-sum(b_payment_amount) AS revenue_leakage
FROM
(SELECT a.order_placed_by AS a_order_placed_by,
a.reseller_id AS a_reseller_id,
a.customer_id AS a_customer_id,
a.sku AS a_sku,
a.payment_status AS a_payment_status,
a.created_at AS a_created_at,
a.quantity AS a_quantity,
a.payment_price AS a_payment_amount,
b.order_placed_by AS b_order_placed_by,
b.reseller_id AS b_reseller_id,
b.customer_id AS b_customer_id,
b.sku AS b_sku,
b.payment_status AS b_payment_status,
b.created_at AS b_created_at,
b.quantity AS b_quantity,
b.payment_price AS b_payment_amount
from raena_analytics.revenue_leakage_new_stage4 a
INNER JOIN raena_analytics.revenue_leakage_new_stage6 b ON a.order_placed_by=b.order_placed_by
AND a.reseller_id=b.reseller_id
AND a.sku=b.sku
AND b.quantity < a.quantity
AND a.payment_price>b.payment_price
AND a.customer_id = b.customer_id
AND b.created_at BETWEEN a.created_at AND a.created_at + INTERVAL '24 HOURS') c
GROUP BY 1,
2,
3,
4,
5
ORDER BY 2,
4;
DROP TABLE IF EXISTS raena_analytics.OM_Revenue_leakage_dump;
CREATE TABLE raena_analytics.OM_Revenue_leakage_dump AS
SELECT *
FROM raena_analytics.om_revenue_leakage_qty_match
UNION
SELECT *
FROM raena_analytics.om_revenue_leakage_qty_not_match;
DROP TABLE IF EXISTS raena_analytics.om_conversion_order_data_1;
CREATE TABLE raena_analytics.om_conversion_order_data_1 AS
SELECT DISTINCT cast(created_at + interval '7 Hours' AS date) AS created_date,
date_part('year',created_at + interval '7 Hours') AS order_year,
date_part('month',created_at + interval '7 Hours') AS order_month,
To_char(created_at + interval '7 Hours','month') Month_name,
a.reseller_id,
id AS order_id,
reseller_info::json->>'email' AS transacted_email,
replace(reseller_info::json->>'mobile','+','') AS transacted_mobile,
payment_amount,
shipping_to,
order_placed_by,
b.Monthly_Tier,
c.Max_Tier_of_any_month,
rank() over(partition BY a.reseller_id
ORDER BY date_part('year',created_at + interval '7 Hours'),date_part('month',created_at + interval '7 Hours')) AS R
FROM raena_order_management.ORDER a
LEFT JOIN
( SELECT DISTINCT reseller_id,
order_year,
order_month,
CASE
WHEN revenue<2000000 THEN '<2 mn'
WHEN revenue>=2000000
AND revenue<=10000000 THEN '2-10 mn'
WHEN revenue>10000000 THEN '10+ mn'
END AS Monthly_Tier
FROM
( SELECT date_part('year',created_at + interval '7 Hours') AS order_year,
date_part('month',created_at + interval '7 Hours') AS order_month,
reseller_id ,
sum(payment_amount) AS revenue
FROM raena_order_management.ORDER
WHERE payment_status='Paid'
AND cast(is_archived AS varchar)='false'
GROUP BY date_part('year',created_at + interval '7 Hours'),
date_part('month',created_at + interval '7 Hours'),
reseller_id ) AA
WHERE reseller_id is not NULL) b ON a.reseller_id=b.reseller_id
AND date_part('year',a.created_at + interval '7 Hours') =b.order_year
AND date_part('month',a.created_at + interval '7 Hours')=b.order_month
LEFT JOIN
( SELECT DISTINCT reseller_id,
CASE
WHEN revenue<2000000 THEN '<2 mn'
WHEN revenue>=2000000
AND revenue<=10000000 THEN '2-10 mn'
WHEN revenue>10000000 THEN '10+ mn'
END AS Max_Tier_of_any_month
FROM
( SELECT reseller_id,
max(payment_amount) AS revenue
FROM raena_order_management.ORDER
WHERE payment_status='Paid'
AND cast(is_archived AS varchar)='false'
GROUP BY reseller_id ) BB
WHERE reseller_id is not NULL) c ON a.reseller_id=c.reseller_id
WHERE payment_status='Paid'
AND cast(is_archived AS varchar)='false'
AND cast(is_campaign AS varchar)='false'
ORDER BY 1,
3;
" > /home/ec2-user/cronjob/postgresql/revenue_leakage/revenue_leakage_etl.sql
psql "host=analytics-db-instance-1.cd7qipz3esdx.ap-southeast-1.rds.amazonaws.com user=dbadmin dbname=analytics port=5432 password=5qCif6eyY3Kmg4z" -f /home/ec2-user/cronjob/postgresql/revenue_leakage/revenue_leakage_etl.sql