COMPLEX QUERIES: ALL POSSIBLE QUERIES

–find duplicate record ALL WAYS

SELECT * FROM USERS;

–USING ROWNUM

SELECT * FROM(
SELECT USER_ID,USER_NAME,EMAIL,
ROW_NUMBER() OVER (PARTITION BY USER_NAME,EMAIL ORDER BY USER_ID) AS RN
FROM USERS
) WHERE RN=2;

–OUTPUT:IF WE HAVE TO FETCH ONLY DUPLICATE RECORDS ONCE ALL ROWS WONT BE RETURNS, BELOW IS THE SOLUTION

–RETURN ALL DUPLICATE RECORDS

SELECT * FROM USERS WHERE (USER_NAME,EMAIL) IN (
SELECT USER_NAME,EMAIL FROM(
SELECT USER_ID,USER_NAME,EMAIL,
ROW_NUMBER() OVER (PARTITION BY USER_NAME,EMAIL ORDER BY USER_ID) AS RN
FROM USERS
) WHERE RN=2
);

–USING GROUP BY
SELECT * FROM USERS WHERE (USER_NAME,EMAIL) IN (
SELECT USER_NAME,EMAIL FROM(
SELECT USER_NAME,EMAIL, COUNT(1) CNT
FROM USERS
GROUP BY USER_NAME,EMAIL
) WHERE CNT>1
);

–second last record
–USING WINDOW FUNCTION
SELECT * FROM(
SELECT
EMPLOYEE_ID, FIRST_NAME, LAST_NAME, EMAIL, PHONE_NUMBER, HIRE_DATE, JOB_ID, SALARY, MANAGER_ID, DEPARTMENT_ID,
ROW_NUMBER() OVER ( ORDER BY EMPLOYEE_ID DESC) AS RN
FROM EMPLOYEES
) WHERE RN=2;

–FIND MAX AND MIN SALARY BY DEPARMENT
–WAY 1 USING GROUP BY
SELECT MAX(SALARY),MIN(SALARY) , DEPARTMENT_ID FROM EMPLOYEES GROUP BY DEPARTMENT_ID having department_id is not null;

–WAY 2 using window function
select DISTINCT DEPARTMENT_ID,
MAX(SALARY) OVER (PARTITION BY DEPARTMENT_ID) MAX_SAL,
MIN(SALARY) OVER (PARTITION BY DEPARTMENT_ID) MIN_SAL
FROM EMPLOYEES;

–DEPARTMENT WISE COMPLETE EMPLOYEE DETAILS WHO ARE WITHDRAWING EITHER MAX OR MIN SALARY
WITH MIN_MAX_SAL AS(select DISTINCT DEPARTMENT_ID,
MAX(SALARY) OVER (PARTITION BY DEPARTMENT_ID) MAX_SAL,
MIN(SALARY) OVER (PARTITION BY DEPARTMENT_ID) MIN_SAL
FROM EMPLOYEES)
SELECT EMP.EMPLOYEE_ID, FIRST_NAME, LAST_NAME, EMAIL, PHONE_NUMBER, HIRE_DATE, JOB_ID, SALARY, MANAGER_ID, EMP.DEPARTMENT_ID
FROM EMPLOYEES EMP, MIN_MAX_SAL WHERE MIN_MAX_SAL.DEPARTMENT_ID = EMP.DEPARTMENT_ID AND
(MIN_MAX_SAL.MAX_SAL = EMP.SALARY OR MIN_MAX_SAL.MIN_SAL = EMP.SALARY);

— NTH HIGHEST AND NTH LOWEST SALARY
–THERE IS NO WAY TO BUILD QUERY USING GROUP BY SO WE HAVE TO USE WINDOW FUNCTION
SELECT EMPLOYEE_ID, FIRST_NAME, LAST_NAME, EMAIL, PHONE_NUMBER, HIRE_DATE, JOB_ID, SALARY, MANAGER_ID, DEPARTMENT_ID FROM(
SELECT
ROW_NUMBER() OVER ( ORDER BY SALARY DESC) AS MAX_RN,
ROW_NUMBER() OVER ( ORDER BY SALARY) AS MIN_RN,
EMPLOYEE_ID, FIRST_NAME, LAST_NAME, EMAIL, PHONE_NUMBER, HIRE_DATE, JOB_ID, SALARY, MANAGER_ID, DEPARTMENT_ID
FROM EMPLOYEES
) WHERE MAX_RN=2 OR MIN_RN=2;

— ABOVE QUERY WILL ONLY GIVE SINGLE RECORD PER DEPARTMENT WHAT IF 2 EMPLOYEES ARE WITHDRWAING SAME MAX OR MIN SALARY
–IN THAT CASE ONLY 1 RECORD WILL BE SKIPPED BELOW IS THE SOLUTION
SELECT * FROM EMPLOYEES WHERE SALARY IN (SELECT SALARY FROM(
SELECT DISTINCT
ROW_NUMBER() OVER ( ORDER BY SALARY DESC) AS MAX_RN,
ROW_NUMBER() OVER ( ORDER BY SALARY) AS MIN_RN,
SALARY,department_id
FROM EMPLOYEES
) WHERE MAX_RN=3 OR MIN_RN=3);

–NOW FETCH EMPLOYEES WHO ARE WITHDRAWING NTH MAX AND NTH MIN SALARY BUT DEPRTEMENT WISE

SELECT * FROM EMPLOYEES WHERE SALARY IN (SELECT SALARY FROM(
SELECT DISTINCT
ROW_NUMBER() OVER ( PARTITION BY DEPARTMENT_ID ORDER BY SALARY DESC) AS MAX_RN,
ROW_NUMBER() OVER ( PARTITION BY DEPARTMENT_ID ORDER BY SALARY) AS MIN_RN,
SALARY,department_id
FROM EMPLOYEES
) WHERE MAX_RN=3 OR MIN_RN=3);

–From the doctors table, fetch the details of doctors who work in the same hospital but in different specialty
–WINDOW FUNCTION CANT BE APPLIED HERE BECAUSE WE CAN APPLY SELF JOIN
SELECT * FROM DOCTORS D1 JOIN DOCTORS D2
ON D2.ID <> D1.ID
AND D2.HOSPITAL = D1.HOSPITAL
AND D2.SPECIALITY <> D1.SPECIALITY;

–From the doctors table, fetch the details of doctors who work in the same hospital but in SAME specialty
–WINDOW FUNCTION CANT BE APPLIED HERE BECAUSE WE CAN APPLY SELF JOIN
SELECT * FROM DOCTORS D1 JOIN DOCTORS D2
ON D2.ID <> D1.ID
AND D2.HOSPITAL = D1.HOSPITAL
AND D2.SPECIALITY = D1.SPECIALITY;

–From the doctors table, fetch the details of doctors who work in the same hospital IRRESPECTIVE specialty
–WINDOW FUNCTION CANT BE APPLIED HERE BECAUSE WE CAN APPLY SELF JOIN
SELECT * FROM DOCTORS D1 JOIN DOCTORS D2
ON D2.ID <> D1.ID
AND D2.HOSPITAL = D1.HOSPITAL;

SELECT USER_NAME FROM(
SELECT DISTINCT USER_NAME,
case when USER_NAME = lead(USER_NAME) over (order by login_id) and USER_NAME = lead(USER_NAME,2) over (order by login_id) then USER_NAME else null
END AS REPEATED_NAMES
FROM login_details
) WHERE REPEATED_NAMES IS NOT NULL;

–From the students table, write a SQL query to interchange the adjacent student names.
create table students
(
id int primary key,
student_name varchar(50) not null
);
insert into students values(1, ‘James’);
insert into students values(2, ‘Michael’);
insert into students values(3, ‘George’);
insert into students values(4, ‘Stewart’);
insert into students values(5, ‘Robin’);

SELECT ID,STUDENT_NAME,
CASE WHEN MOD(ID,2) <> 0 THEN LEAD(STUDENT_NAME,1,STUDENT_NAME) OVER (ORDER BY ID)
WHEN MOD(ID,2) = 0 THEN LAG(STUDENT_NAME) OVER (ORDER BY ID)
END AS ADJACENT_NAME
FROM STUDENTS;

–From the weather table, fetch all the records when London had extremely cold temperature for 3 consecutive days or more

create table weather
(
id int,
city varchar(50),
temperature int,
day date
);

delete from weather;

insert into weather values(1, ‘London’, -1, to_date(‘2021-01-01′,’yyyy-mm-dd’));
insert into weather values(2, ‘London’, -2, to_date(‘2021-01-02′,’yyyy-mm-dd’));
insert into weather values(3, ‘London’, 4, to_date(‘2021-01-03′,’yyyy-mm-dd’));
insert into weather values(4, ‘London’, 1, to_date(‘2021-01-04′,’yyyy-mm-dd’));
insert into weather values(5, ‘London’, -2, to_date(‘2021-01-05′,’yyyy-mm-dd’));
insert into weather values(6, ‘London’, -5, to_date(‘2021-01-06′,’yyyy-mm-dd’));
insert into weather values(7, ‘London’, -7, to_date(‘2021-01-07′,’yyyy-mm-dd’));
insert into weather values(8, ‘London’, 5, to_date(‘2021-01-08′,’yyyy-mm-dd’));

SELECT * FROM(
SELECT ID, CITY, TEMPERATURE, DAY,
CASE WHEN TEMPERATURE<0 AND TEMPERATURE > LEAD(TEMPERATURE) OVER (ORDER BY ID) AND
TEMPERATURE > LEAD(TEMPERATURE,2) OVER (ORDER BY ID) THEN ‘YES’
WHEN TEMPERATURE<0 AND TEMPERATURE < LAG(TEMPERATURE) OVER (ORDER BY ID) AND
TEMPERATURE > LEAD(TEMPERATURE) OVER (ORDER BY ID) THEN ‘YES’
WHEN TEMPERATURE<0 AND TEMPERATURE < LAG(TEMPERATURE) OVER (ORDER BY ID) AND
TEMPERATURE < LAG(TEMPERATURE,2) OVER (ORDER BY ID) THEN ‘YES’ ELSE NULL
END AS CONSIQUITIVE_FLAG
FROM WEATHER) WHERE CONSIQUITIVE_FLAG=’YES’;

 

–Find the top 2 accounts with the maximum number of unique patients on a monthly basis.

–Note: Prefer the account if with the least value in case of same number of unique patients
–Table Structure:

drop table patient_Logs;

create table patient_logs
(
account_id int,
date1 date,
patient_id int
);

insert into patient_logs values (1, to_date(’02-01-2020′,’dd-mm-yyyy’), 100);
insert into patient_logs values (1, to_date(’27-01-2020′,’dd-mm-yyyy’), 200);
insert into patient_logs values (2, to_date(’01-01-2020′,’dd-mm-yyyy’), 300);
insert into patient_logs values (2, to_date(’21-01-2020′,’dd-mm-yyyy’), 400);
insert into patient_logs values (2, to_date(’21-01-2020′,’dd-mm-yyyy’), 300);
insert into patient_logs values (2, to_date(’01-01-2020′,’dd-mm-yyyy’), 500);
insert into patient_logs values (3, to_date(’20-01-2020′,’dd-mm-yyyy’), 400);
insert into patient_logs values (1, to_date(’04-03-2020′,’dd-mm-yyyy’), 500);
insert into patient_logs values (3, to_date(’20-01-2020′,’dd-mm-yyyy’), 450);

SELECT * FROM patient_logs;

–USING ROW NUM, GROUP BY
SELECT ACCOUNT_ID, MONTH, PATINET_PER_MONTH FROM(
SELECT ACCOUNT_ID,MONTH,PATINET_PER_MONTH,
ROW_NUMBER() OVER (PARTITION BY MONTH ORDER BY ACCOUNT_ID DESC) AS RN
FROM
(
SELECT ACCOUNT_ID,MONTH, COUNT(2) PATINET_PER_MONTH FROM(
select DISTINCT ACCOUNT_ID, PATIENT_ID,TO_CHAR(date1,’MONTH’) MONTH
from patient_logs
)
GROUP BY ACCOUNT_ID,MONTH)
) WHERE RN<3;

Popular posts from this blog

Window function in PySpark with Joins example using 2 Dataframes (inner join)

Complex SQL: fetch the users who logged in consecutively 3 or more times (lead perfect example)

Credit Card Data Analysis using PySpark (how to use auto broadcast join after disabling it)