Quote from: Spheron on September 05, 2025, 09:36:46 AMThe script is running about 1h bevor i canceld it.
It was rather inefficient.
Try this one instead:
Code Select
DO $$
DECLARE
node_record RECORD;
tbl_name TEXT;
dup_count INTEGER;
total_duplicates INTEGER := 0;
BEGIN
FOR node_record IN SELECT id FROM nodes
LOOP
tbl_name := 'idata_' || node_record.id;
IF EXISTS (
SELECT 1
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = tbl_name
) THEN
RAISE NOTICE 'Processing table %', tbl_name;
EXECUTE format('
WITH ranked AS (
SELECT ctid,
ROW_NUMBER() OVER (PARTITION BY item_id, idata_timestamp ORDER BY ctid) as rn
FROM public.%I
)
SELECT COUNT(*)
FROM ranked
WHERE rn > 1
', tbl_name) INTO dup_count;
IF dup_count > 0 THEN
RAISE NOTICE 'Table % has % duplicate rows', tbl_name, dup_count;
total_duplicates := total_duplicates + dup_count;
END IF;
END IF;
END LOOP;
RAISE NOTICE 'Total duplicate rows found: %', total_duplicates;
END $$;
And then to delete duplicates:
Code Select
DO $$
DECLARE
node_record RECORD;
tbl_name TEXT;
deleted_count INTEGER;
total_deleted INTEGER := 0;
BEGIN
FOR node_record IN SELECT id FROM nodes
LOOP
tbl_name := 'idata_' || node_record.id;
IF EXISTS (
SELECT 1
FROM information_schema.tables
WHERE table_schema = 'public'
AND table_name = tbl_name
) THEN
RAISE NOTICE 'Processing table %', tbl_name;
EXECUTE format('
WITH duplicates AS (
SELECT ctid,
ROW_NUMBER() OVER (
PARTITION BY item_id, idata_timestamp
ORDER BY ctid
) as rn
FROM public.%I
)
DELETE FROM public.%I
WHERE ctid IN (
SELECT ctid
FROM duplicates
WHERE rn > 1
)', tbl_name, tbl_name);
GET DIAGNOSTICS deleted_count = ROW_COUNT;
IF deleted_count > 0 THEN
RAISE NOTICE 'Deleted % duplicate rows from table %', deleted_count, tbl_name;
total_deleted := total_deleted + deleted_count;
ELSE
RAISE NOTICE 'Table % has no duplicates', tbl_name;
END IF;
END IF;
END LOOP;
RAISE NOTICE 'Total duplicate rows deleted: %', total_deleted;
END $$;