Partitioning existing table in PostgreSQL

September 03, 2024

Let's create a table with 20 millions of random data. Be sure to index data column for comparing performance results.

CREATE TABLE not_partitioned_table (

id SERIAL PRIMARY KEY,

user_id INTEGER NOT NULL,

transaction_date DATE NOT NULL,

amount DECIMAL(10, 2),

status VARCHAR(20),

description TEXT,

created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP

);

-- Create an index on the transaction_date column

CREATE INDEX idx_transaction_date ON not_partitioned_table(transaction_date);

DO $$

DECLARE

i INT;

BEGIN

FOR i IN 1..20000000 LOOP

INSERT INTO partitioned_table (user_id, transaction_date, amount, status, description)

VALUES (

(RANDOM() * 100000)::INT,

CURRENT_DATE - ((RANDOM() * 3650)::INT), -- Random date within the last 10 years

(RANDOM() * 1000)::DECIMAL(10,2),

CASE WHEN RANDOM() < 0.5 THEN 'completed' ELSE 'pending' END,

md5(random()::text)

);

END LOOP;

END $$;

It may take a while. After process is finished, let's create the partitioned new table and all partitions. We will partition the new table depending on transaction_date per year-month. Just check minimum and maximum date values and run the partitioning script depending on your fits. And we are going to index every partition's transaction_date column because we are querying database according to it.

CREATE TABLE partitioned_table_new (

id SERIAL,

user_id INTEGER NOT NULL,

transaction_date DATE NOT NULL,

amount DECIMAL(10, 2),

status VARCHAR(20),

description TEXT,

created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,

PRIMARY KEY (id, transaction_date) -- Include transaction_date in the primary key

) PARTITION BY RANGE (transaction_date);

DO $$

DECLARE

start_date DATE := '2014-09-01';

end_date DATE := '2024-10-01';

partition_end DATE;

partition_name TEXT;

BEGIN

WHILE start_date < end_date LOOP

partition_end := start_date + INTERVAL '1 month';

partition_name := 'partition_' || TO_CHAR(start_date, 'YYYY_MM');

EXECUTE format(

'CREATE TABLE %I PARTITION OF partitioned_table_new FOR VALUES FROM (%L) TO (%L);',

partition_name,

start_date,

partition_end

);

start_date := partition_end;

END LOOP;

END $$;

DO $$

DECLARE

partition_name RECORD;

BEGIN

FOR partition_name IN

SELECT tablename

FROM pg_tables

WHERE schemaname = 'public'

AND tablename LIKE 'partition_%'

LOOP

EXECUTE format('CREATE INDEX idx_%I_transaction_date ON %I (transaction_date);', partition_name.tablename, partition_name.tablename);

END LOOP;

END $$;

And now the final step. Add all datas from old table:

INSERT INTO partitioned_table_new (user_id, transaction_date, amount, status, description, created_at, updated_at)

SELECT user_id, transaction_date, amount, status, description, created_at, updated_at FROM not_partitioned_table;

Then query same data from both to see the difference at heap blocks:

select * from not_partitioned_table where transaction_date between '2024-01-01' and '2024-01-10';

select * from partitioned_table_new where transaction_date between '2024-01-01' and '2024-01-10';

Search This Blog

My Precious Databank

Partitioning existing table in PostgreSQL

Comments

Post a Comment

Popular posts from this blog

Oracle Database Upgrade With OPatch Tool (RHEL/Centos/OEL)

POSTGRESQL UPGRADE WITH PG_UPGRADE UTILITY IN RHEL/CENTOS/OEL

Backup Recovery Scenarios