-- Check jobs with same job ID
SELECT *
FROM (
  SELECT 
    *,
    COUNT(*) OVER (PARTITION BY job_id) AS cnt
  FROM `arbet-470613.dataset.6jobs`
)
WHERE cnt > 1;

-- Because each role's data is scripted separately, a job can be both taken for data analyst and bi analyst
SELECT *
FROM `arbet-470613.dataset.ex_digital`
WHERE search_term = 'data analytics'
  AND LOWER(title) LIKE '%business intelligence%';
  
-- Delete the one that doesn't seem reasonable, and keep only one  
DELETE 
FROM `arbet-470613.dataset.ex_digital`
WHERE search_term = 'data analytics'
  AND LOWER(title) LIKE '%business intelligence%';
-- Change some job's role type
SELECT *
FROM `arbet-470613.dataset.6jobs`
WHERE role = 'Data Analyst'
  AND regexp_contains(LOWER(title), r'marketing')
  
  
  
  UPDATE `arbet-470613.dataset.6jobs`
SET role = 'Operation Analyst'
WHERE role = 'Data Analyst'
  AND REGEXP_CONTAINS(LOWER(title), r'supply planning');

  

DELETE FROM `arbet-470613.dataset.5jobs`
WHERE role = 'Data Analyst'
  AND REGEXP_CONTAINS(LOWER(title), r'marketing');

-- Check and delete obvious spam
SELECT *
FROM `arbet-470613.dataset.6jobs` 
WHERE description IN(
  SELECT description
  FROM `arbet-470613.dataset.6jobs` 
  GROUP BY description
  HAVING COUNT(*) >10
)