Let's create a table with 20 millions of random data. Be sure to index data column for comparing performance results.
CREATE TABLE not_partitioned_table (
id SERIAL PRIMARY KEY,
user_id INTEGER NOT NULL,
transaction_date DATE NOT NULL,
amount DECIMAL(10, 2),
status VARCHAR(20),
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Create an index on the transaction_date column
CREATE INDEX idx_transaction_date ON not_partitioned_table(transaction_date);
DO $$
DECLARE
i INT;
BEGIN
FOR i IN 1..20000000 LOOP
INSERT INTO partitioned_table (user_id, transaction_date, amount, status, description)
VALUES (
(RANDOM() * 100000)::INT,
CURRENT_DATE - ((RANDOM() * 3650)::INT), -- Random date within the last 10 years
(RANDOM() * 1000)::DECIMAL(10,2),
CASE WHEN RANDOM() < 0.5 THEN 'completed' ELSE 'pending' END,
md5(random()::text)
);
END LOOP;
END $$;
It may take a while. After process is finished, let's create the partitioned new table and all partitions. We will partition the new table depending on transaction_date per year-month. Just check minimum and maximum date values and run the partitioning script depending on your fits. And we are going to index every partition's transaction_date column because we are querying database according to it.
CREATE TABLE partitioned_table_new (
id SERIAL,
user_id INTEGER NOT NULL,
transaction_date DATE NOT NULL,
amount DECIMAL(10, 2),
status VARCHAR(20),
description TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (id, transaction_date) -- Include transaction_date in the primary key
) PARTITION BY RANGE (transaction_date);
DO $$
DECLARE
start_date DATE := '2014-09-01';
end_date DATE := '2024-10-01';
partition_end DATE;
partition_name TEXT;
BEGIN
WHILE start_date < end_date LOOP
partition_end := start_date + INTERVAL '1 month';
partition_name := 'partition_' || TO_CHAR(start_date, 'YYYY_MM');
EXECUTE format(
'CREATE TABLE %I PARTITION OF partitioned_table_new FOR VALUES FROM (%L) TO (%L);',
partition_name,
start_date,
partition_end
);
start_date := partition_end;
END LOOP;
END $$;
DO $$
DECLARE
partition_name RECORD;
BEGIN
FOR partition_name IN
SELECT tablename
FROM pg_tables
WHERE schemaname = 'public'
AND tablename LIKE 'partition_%'
LOOP
EXECUTE format('CREATE INDEX idx_%I_transaction_date ON %I (transaction_date);', partition_name.tablename, partition_name.tablename);
END LOOP;
END $$;
And now the final step. Add all datas from old table:
INSERT INTO partitioned_table_new (user_id, transaction_date, amount, status, description, created_at, updated_at)
SELECT user_id, transaction_date, amount, status, description, created_at, updated_at FROM not_partitioned_table;
Then query same data from both to see the difference at heap blocks:
select * from not_partitioned_table where transaction_date between '2024-01-01' and '2024-01-10';
select * from partitioned_table_new where transaction_date between '2024-01-01' and '2024-01-10';
Comments
Post a Comment