|
9 | 9 |
|
10 | 10 | PG_CREDS = dlt.secrets.get("sources.pg_replication.credentials", PostgresCredentials)
|
11 | 11 |
|
12 |
| -def replicate_with_initial_load(pipeline_name: str, slot_name: str, pub_name: str) -> None: |
13 |
| - """Production example: Sets up replication with initial load. |
14 | 12 |
|
15 |
| - Demonstrates usage of `persist_snapshots` argument and snapshot resource |
16 |
| - returned by `init_replication` helper. |
| 13 | +def replicate_with_initial_load( |
| 14 | + schema_name: str, slot_name: str, pub_name: str |
| 15 | +) -> None: |
| 16 | + """Production example: Sets up replication with initial load for your existing PostgreSQL database. |
| 17 | +
|
| 18 | + Use this when you have real data that needs to be replicated. This is NOT a simulation - |
| 19 | + it connects to your actual database and performs initial load plus ongoing replication. |
| 20 | +
|
| 21 | + Demonstrates usage of `persist_snapshots` argument and snapshot resource returned by `init_replication`. |
17 | 22 | """
|
18 |
| - # create source and destination pipelines |
| 23 | + # create destination pipeline |
19 | 24 | dest_pl = dlt.pipeline(
|
20 |
| - pipeline_name=pipeline_name, |
21 |
| - destination='duckdb', |
22 |
| - dataset_name="replication_postgres", |
| 25 | + pipeline_name="pg_replication_pipeline", |
| 26 | + destination="duckdb", |
| 27 | + dataset_name="replicate_with_initial_load", |
| 28 | + dev_mode=True, |
23 | 29 | )
|
24 |
| - schema_name = "public" |
25 | 30 |
|
26 |
| - creds = dlt.secrets.get( |
27 |
| - f"{pipeline_name}.sources.pg_replication.credentials", PostgresCredentials |
28 |
| - ) |
29 |
| - print(creds) |
30 | 31 | snapshot = init_replication( # requires the Postgres user to have the REPLICATION attribute assigned
|
31 | 32 | slot_name=slot_name,
|
32 |
| - credentials=creds, |
33 | 33 | pub_name=pub_name,
|
| 34 | + table_names=[], |
34 | 35 | schema_name=schema_name,
|
35 | 36 | persist_snapshots=True, # persist snapshot table(s) and let function return resource(s) for initial load
|
36 |
| - reset=True |
| 37 | + reset=True, |
37 | 38 | )
|
38 |
| - print("replication initialized") |
| 39 | + |
39 | 40 | # perform initial load to capture all records present in source table prior to replication initialization
|
40 | 41 | dest_pl.run(snapshot)
|
41 |
| - print("replication run") |
42 |
| - # insert record in source table and propagate change to destination |
43 |
| - changes = replication_resource(slot_name, pub_name, credentials=creds) |
44 |
| - print("changes initialized") |
| 42 | + show_destination_table(dest_pl) |
| 43 | + |
| 44 | + # assuming there were changes in the source table, propagate change to destination |
| 45 | + changes = replication_resource(slot_name, pub_name) |
45 | 46 | dest_pl.run(changes)
|
46 |
| - print("changes run") |
47 |
| - |
| 47 | + show_destination_table(dest_pl) |
| 48 | + |
48 | 49 |
|
49 | 50 | def replicate_single_table_demo() -> None:
|
50 |
| - """Sets up replication for a single Postgres table and loads changes into a destination. |
| 51 | + """Demonstrates PostgreSQL replication by simulating a source table and changes. |
| 52 | +
|
| 53 | + Shows basic usage of `init_replication` helper and `replication_resource` resource. |
| 54 | + This demo creates a source table and simulates INSERT, UPDATE, and DELETE operations |
| 55 | + to show how replication works end-to-end. In production, you would have an existing |
| 56 | + PostgreSQL database with real changes instead of simulating them. |
51 | 57 |
|
52 |
| - Demonstrates basic usage of `init_replication` helper and `replication_resource` resource. |
53 |
| - Uses `src_pl` to create and change the replicated Postgres table—this |
54 |
| - is only for demonstration purposes, you won't need this when you run in production |
55 |
| - as you'll probably have another process feeding your Postgres instance. |
| 58 | + For production use with existing data, see `replicate_with_initial_load()`. |
56 | 59 | """
|
57 | 60 | # create source and destination pipelines
|
58 | 61 | src_pl = get_postgres_pipeline()
|
@@ -100,16 +103,14 @@ def replicate_single_table_demo() -> None:
|
100 | 103 | show_destination_table(dest_pl)
|
101 | 104 |
|
102 | 105 |
|
103 |
| -def demo_initial_load_replication() -> None: |
104 |
| - """Sets up replication with initial load. |
| 106 | +def replicate_with_initial_load_demo() -> None: |
| 107 | + """Demonstrates PostgreSQL replication with initial load by simulating a source table and changes. |
105 | 108 |
|
106 |
| - Demonstrates usage of `persist_snapshots` argument and snapshot resource |
107 |
| - returned by `init_replication` helper. |
| 109 | + Shows usage of `persist_snapshots` argument and snapshot resource returned by `init_replication` helper. |
| 110 | + This demo creates a source table with existing data, then simulates additional changes to show how |
| 111 | + initial load captures pre-existing records and replication handles subsequent changes. |
108 | 112 |
|
109 |
| - Notes: |
110 |
| - - This function also creates the source table itself. That’s only useful for demos or |
111 |
| - when starting with a brand-new database. In production you normally won’t create tables here, |
112 |
| - since your application/database already has them. |
| 113 | + For production use with existing data, see `replicate_with_initial_load()`. |
113 | 114 | """
|
114 | 115 | # create source and destination pipelines
|
115 | 116 | src_pl = get_postgres_pipeline()
|
@@ -154,10 +155,16 @@ def demo_initial_load_replication() -> None:
|
154 | 155 |
|
155 | 156 |
|
156 | 157 | def replicate_entire_schema_demo() -> None:
|
157 |
| - """Demonstrates setup and usage of schema replication. |
| 158 | + """Demonstrates schema-level replication by simulating multiple tables and changes. |
| 159 | +
|
| 160 | + Shows setup and usage of schema replication, which captures changes across all tables |
| 161 | + in a schema. This demo creates multiple source tables and simulates changes to show |
| 162 | + how schema replication works, including tables added after replication starts. |
158 | 163 |
|
159 |
| - Schema replication requires a Postgres server version of 15 or higher. An |
160 |
| - exception is raised if that's not the case. |
| 164 | + Schema replication requires PostgreSQL server version 15 or higher. An exception |
| 165 | + is raised if that's not the case. |
| 166 | +
|
| 167 | + For production use with existing schemas, adapt this pattern to your real database. |
161 | 168 | """
|
162 | 169 | # create source and destination pipelines
|
163 | 170 | src_pl = get_postgres_pipeline()
|
@@ -214,9 +221,13 @@ def replicate_entire_schema_demo() -> None:
|
214 | 221 |
|
215 | 222 |
|
216 | 223 | def replicate_with_column_selection_demo() -> None:
|
217 |
| - """Sets up replication with column selection. |
| 224 | + """Demonstrates column selection in replication by simulating tables with selective column capture. |
| 225 | +
|
| 226 | + Shows usage of `include_columns` argument to replicate only specific columns from tables. |
| 227 | + This demo creates source tables and simulates changes to show how column selection works, |
| 228 | + where some tables have filtered columns while others include all columns by default. |
218 | 229 |
|
219 |
| - Demonstrates usage of `include_columns` argument. |
| 230 | + For production use, apply the `include_columns` pattern to your existing tables. |
220 | 231 | """
|
221 | 232 | # create source and destination pipelines
|
222 | 233 | src_pl = get_postgres_pipeline()
|
@@ -327,7 +338,8 @@ def show_destination_table(
|
327 | 338 |
|
328 | 339 |
|
329 | 340 | if __name__ == "__main__":
|
| 341 | + # replicate_with_initial_load(schema_name="public", slot_name="test_slot", pub_name="test_pub") |
330 | 342 | replicate_single_table_demo()
|
331 |
| - # replicate_with_initial_load() |
| 343 | + # replicate_with_initial_load_demo() |
332 | 344 | # replicate_entire_schema_demo()
|
333 | 345 | # replicate_with_column_selection_demo()
|
0 commit comments