-- Check jobs with same job ID
SELECT *
FROM (
SELECT
*,
COUNT(*) OVER (PARTITION BY job_id) AS cnt
FROM `arbet-470613.dataset.6jobs`
)
WHERE cnt > 1;
-- Because each role's data is scripted separately, a job can be both taken for data analyst and bi analyst
SELECT *
FROM `arbet-470613.dataset.ex_digital`
WHERE search_term = 'data analytics'
AND LOWER(title) LIKE '%business intelligence%';
-- Delete the one that doesn't seem reasonable, and keep only one
DELETE
FROM `arbet-470613.dataset.ex_digital`
WHERE search_term = 'data analytics'
AND LOWER(title) LIKE '%business intelligence%';
-- Change some job's role type
SELECT *
FROM `arbet-470613.dataset.6jobs`
WHERE role = 'Data Analyst'
AND regexp_contains(LOWER(title), r'marketing')
UPDATE `arbet-470613.dataset.6jobs`
SET role = 'Operation Analyst'
WHERE role = 'Data Analyst'
AND REGEXP_CONTAINS(LOWER(title), r'supply planning');
DELETE FROM `arbet-470613.dataset.5jobs`
WHERE role = 'Data Analyst'
AND REGEXP_CONTAINS(LOWER(title), r'marketing');
-- Check and delete obvious spam
SELECT *
FROM `arbet-470613.dataset.6jobs`
WHERE description IN(
SELECT description
FROM `arbet-470613.dataset.6jobs`
GROUP BY description
HAVING COUNT(*) >10
)