supabase-community · psteinroe · Sep 18, 2025 · Sep 10, 2025 · Sep 10, 2025 · Sep 11, 2025
diff --git a/.gitignore b/.gitignore
@@ -24,3 +24,5 @@ node_modules/
 
 **/dist/
 .claude-session-id
+
+squawk/
diff --git a/...26cacb3c9ed5f915b7e98052d58df98d480b.json → ...4718faaa1005845c8f8f14c5c78e76a258eb.json b/...26cacb3c9ed5f915b7e98052d58df98d480b.json → ...4718faaa1005845c8f8f14c5c78e76a258eb.json
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/agentic/port_squawk_rules.md b/agentic/port_squawk_rules.md
@@ -0,0 +1,106 @@
+The goal is to port all missing rules from Squawk to our analyser.
+
+Our analyser lives in the `pgt_analyser` crate. There is a `CONTRIBUTING.md` guide in that crate which explains how to add new rules. Please also read existing rules to see how it all works.
+
+Then, I want you to check the rules in the squawk project which I copied here for convenience. The rules are in `squawk/linter/src/rules`. The implementation should be very similar to what we have, and porting them straightforward. Here a few things to watch out for though:
+
+- although both libraries are using `libpg_query` to parse the SQL, the bindings can be different. Ours is in the `pgt_query` crate of you need a reference. The `protobuf.rs` file contains the full thing.
+- the context for each rule is different, but you can get the same information out of it:
+```rust
+pub struct RuleContext<'a, R: Rule> {
+    // the ast of the target statement
+    stmt: &'a pgt_query::NodeEnum,
+    // options for that specific rule
+    options: &'a R::Options,
+    // the schema cache - also includes the postgres version
+    schema_cache: Option<&'a SchemaCache>,
+    // the file context which contains other statements in that file in case you need them
+    file_context: &'a AnalysedFileContext,
+}
+
+
+pub struct AnalysedFileContext<'a> {
+    // all statements in this file
+    pub stmts: &'a Vec<pgt_query::NodeEnum>,
+
+    pos: usize,
+}
+
+impl<'a> AnalysedFileContext<'a> {
+    pub fn new(stmts: &'a Vec<pgt_query::NodeEnum>) -> Self {
+        Self { stmts, pos: 0 }
+    }
+
+    // all statements before the currently analysed one
+    pub fn previous_stmts(&self) -> &[pgt_query::NodeEnum] {
+        &self.stmts[0..self.pos]
+    }
+
+    // total count of statements in this file
+    pub fn stmt_count(&self) -> usize {
+        self.stmts.len()
+    }
+}
+```
+
+In squawk, you will see:
+```rust
+    // all statements of that file -> our analyser goes statement by statement but has access to the files content via `file_context`
+    tree: &[RawStmt],
+    // the postgres version -> we store it in the schema cache
+    _pg_version: Option<Version>,
+    // for us, this is always true
+    _assume_in_transaction: bool,
+
+```
+
+Please always write idiomatic code!
+Only add comments to explain WHY the code is doing something. DO NOT write comments to explain WHAT the code is doing.
+
+If you learn something new that might help in porting all the rules, please update this document.
+
+LEARNINGS:
+- Use `cargo clippy` to check your code after writing it
+- The `just new-lintrule` command expects severity to be "info", "warn", or "error" (not "warning")
+- RuleDiagnostic methods: `detail(span, msg)` takes two parameters, `note(msg)` takes only one parameter
+- To check Postgres version: access `ctx.schema_cache().is_some_and(|sc| sc.version.major_version)` which gives e.g. 17
+- NEVER skip anything, or use a subset of something. ALWAYS do the full thing. For example, copy the entire non-volatile functions list from Squawk, not just a subset.
+- If you are missing features from our rule context to be able to properly implement a rule, DO NOT DO IT. Instead, add that rule to the NEEDS FEATURES list below. The node enum is generated from the same source as it is in squawk, so they have feature parity.
+- Remember to run `just gen-lint` after creating a new rule to generate all necessary files
+
+Please update the list below with the rules that we need to migrate, and the ones that are already migrated. Keep the list up-to-date.
+
+NEEDS FEATURES:
+
+TODO:
+
+DONE:
+- adding_field_with_default ✓ (ported from Squawk)
+- adding_foreign_key_constraint ✓ (ported from Squawk)
+- adding_not_null_field ✓ (ported from Squawk)
+- adding_primary_key_constraint ✓ (ported from Squawk)
+- adding_required_field (already exists in pgt_analyser)
+- ban_char_field ✓ (ported from Squawk)
+- ban_concurrent_index_creation_in_transaction ✓ (ported from Squawk)
+- ban_drop_column (already exists in pgt_analyser)
+- changing_column_type ✓ (ported from Squawk)
+- constraint_missing_not_valid ✓ (ported from Squawk)
+- ban_drop_database (already exists in pgt_analyser, as bad_drop_database in squawk)
+- ban_drop_not_null (already exists in pgt_analyser)
+- ban_drop_table (already exists in pgt_analyser)
+- prefer_big_int ✓ (ported from Squawk)
+- prefer_bigint_over_int ✓ (ported from Squawk)
+- prefer_bigint_over_smallint ✓ (ported from Squawk)
+- prefer_identity ✓ (ported from Squawk)
+- prefer_jsonb ✓ (new rule added)
+- prefer_text_field ✓ (ported from Squawk)
+- prefer_timestamptz ✓ (ported from Squawk)
+- disallow_unique_constraint ✓ (ported from Squawk)
+- renaming_column ✓ (ported from Squawk)
+- renaming_table ✓ (ported from Squawk)
+- require_concurrent_index_creation ✓ (ported from Squawk)
+- require_concurrent_index_deletion ✓ (ported from Squawk)
+- transaction_nesting ✓ (ported from Squawk)
+- prefer_robust_stmts ✓ (ported from Squawk - simplified version)
+
+
diff --git a/crates/pgt_analyse/src/analysed_file_context.rs b/crates/pgt_analyse/src/analysed_file_context.rs
@@ -1,7 +1,23 @@
-#[derive(Default)]
-pub struct AnalysedFileContext {}
+pub struct AnalysedFileContext<'a> {
+    pub stmts: &'a Vec<pgt_query::NodeEnum>,
 
-impl AnalysedFileContext {
-    #[allow(unused)]
-    pub fn update_from(&mut self, stmt_root: &pgt_query::NodeEnum) {}
+    pos: usize,
+}
+
+impl<'a> AnalysedFileContext<'a> {
+    pub fn new(stmts: &'a Vec<pgt_query::NodeEnum>) -> Self {
+        Self { stmts, pos: 0 }
+    }
+
+    pub fn previous_stmts(&self) -> &[pgt_query::NodeEnum] {
+        &self.stmts[0..self.pos]
+    }
+
+    pub fn stmt_count(&self) -> usize {
+        self.stmts.len()
+    }
+
+    pub fn next(&mut self) {
+        self.pos += 1;
+    }
 }
diff --git a/crates/pgt_analyse/src/context.rs b/crates/pgt_analyse/src/context.rs
@@ -10,7 +10,7 @@ pub struct RuleContext<'a, R: Rule> {
     stmt: &'a pgt_query::NodeEnum,
     options: &'a R::Options,
     schema_cache: Option<&'a SchemaCache>,
-    file_context: &'a AnalysedFileContext,
+    file_context: &'a AnalysedFileContext<'a>,
 }
 
 impl<'a, R> RuleContext<'a, R>

diff --git a/crates/pgt_analyse/src/registry.rs b/crates/pgt_analyse/src/registry.rs
@@ -159,7 +159,7 @@ impl RuleRegistry {
 pub struct RegistryRuleParams<'a> {
     pub root: &'a pgt_query::NodeEnum,
     pub options: &'a AnalyserOptions,
-    pub analysed_file_context: &'a AnalysedFileContext,
+    pub analysed_file_context: &'a AnalysedFileContext<'a>,
     pub schema_cache: Option<&'a pgt_schema_cache::SchemaCache>,
 }
 

diff --git a/crates/pgt_analyse/src/rule.rs b/crates/pgt_analyse/src/rule.rs
@@ -291,6 +291,8 @@ impl RuleDiagnostic {
 pub enum RuleSource {
     /// Rules from [Squawk](https://squawkhq.com)
     Squawk(&'static str),
+    /// Rules from [Eugene](https://github.com/kaaveland/eugene)
+    Eugene(&'static str),
 }
 
 impl PartialEq for RuleSource {
@@ -303,6 +305,7 @@ impl std::fmt::Display for RuleSource {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::Squawk(_) => write!(f, "Squawk"),
+            Self::Eugene(_) => write!(f, "Eugene"),
         }
     }
 }
@@ -325,18 +328,23 @@ impl RuleSource {
     pub fn as_rule_name(&self) -> &'static str {
         match self {
             Self::Squawk(rule_name) => rule_name,
+            Self::Eugene(rule_name) => rule_name,
         }
     }
 
     pub fn to_namespaced_rule_name(&self) -> String {
         match self {
             Self::Squawk(rule_name) => format!("squawk/{rule_name}"),
+            Self::Eugene(rule_name) => format!("eugene/{rule_name}"),
         }
     }
 
     pub fn to_rule_url(&self) -> String {
         match self {
             Self::Squawk(rule_name) => format!("https://squawkhq.com/docs/{rule_name}"),
+            Self::Eugene(rule_name) => {
+                format!("https://kaveland.no/eugene/hints/{rule_name}/index.html")
+            }
         }
     }
 

diff --git a/crates/pgt_analyser/Cargo.toml b/crates/pgt_analyser/Cargo.toml
@@ -16,12 +16,14 @@ pgt_analyse      = { workspace = true }
 pgt_console      = { workspace = true }
 pgt_diagnostics  = { workspace = true }
 pgt_query        = { workspace = true }
+pgt_query_ext    = { workspace = true }
 pgt_schema_cache = { workspace = true }
 pgt_text_size    = { workspace = true }
 serde            = { workspace = true }
 
 [dev-dependencies]
-insta           = { version = "1.42.1" }
-pgt_diagnostics = { workspace = true }
-pgt_test_macros = { workspace = true }
-termcolor       = { workspace = true }
+insta                  = { version = "1.42.1" }
+pgt_diagnostics        = { workspace = true }
+pgt_statement_splitter = { workspace = true }
+pgt_test_macros        = { workspace = true }
+termcolor              = { workspace = true }
diff --git a/crates/pgt_analyser/src/lib.rs b/crates/pgt_analyser/src/lib.rs
@@ -62,25 +62,29 @@ impl<'a> Analyser<'a> {
     pub fn run(&self, params: AnalyserParams) -> Vec<RuleDiagnostic> {
         let mut diagnostics = vec![];
 
-        let mut file_context = AnalysedFileContext::default();
+        let roots: Vec<pgt_query::NodeEnum> = params.stmts.iter().map(|s| s.root.clone()).collect();
+        let mut file_context = AnalysedFileContext::new(&roots);
+
+        for (i, stmt) in params.stmts.into_iter().enumerate() {
+            let stmt_diagnostics: Vec<_> = {
+                let rule_params = RegistryRuleParams {
+                    root: &roots[i],
+                    options: self.options,
+                    analysed_file_context: &file_context,
+                    schema_cache: params.schema_cache,
+                };
 
-        for stmt in params.stmts {
-            let rule_params = RegistryRuleParams {
-                root: &stmt.root,
-                options: self.options,
-                analysed_file_context: &file_context,
-                schema_cache: params.schema_cache,
-            };
-
-            diagnostics.extend(
                 self.registry
                     .rules
                     .iter()
                     .flat_map(|rule| (rule.run)(&rule_params))
-                    .map(|r| r.span(stmt.range)),
-            );
+                    .map(|r| r.span(stmt.range))
+                    .collect()
+            }; // end immutable borrow
+
+            diagnostics.extend(stmt_diagnostics);
 
-            file_context.update_from(&stmt.root);
+            file_context.next();
         }
 
         diagnostics

diff --git a/crates/pgt_analyser/src/lint/safety.rs b/crates/pgt_analyser/src/lint/safety.rs
@@ -1,10 +1,32 @@
 //! Generated file, do not edit by hand, see `xtask/codegen`
 
 use pgt_analyse::declare_lint_group;
+pub mod adding_field_with_default;
+pub mod adding_foreign_key_constraint;
+pub mod adding_not_null_field;
+pub mod adding_primary_key_constraint;
 pub mod adding_required_field;
+pub mod ban_char_field;
+pub mod ban_concurrent_index_creation_in_transaction;
 pub mod ban_drop_column;
 pub mod ban_drop_database;
 pub mod ban_drop_not_null;
 pub mod ban_drop_table;
 pub mod ban_truncate_cascade;
-declare_lint_group! { pub Safety { name : "safety" , rules : [self :: adding_required_field :: AddingRequiredField , self :: ban_drop_column :: BanDropColumn , self :: ban_drop_database :: BanDropDatabase , self :: ban_drop_not_null :: BanDropNotNull , self :: ban_drop_table :: BanDropTable , self :: ban_truncate_cascade :: BanTruncateCascade ,] } }
+pub mod changing_column_type;
+pub mod constraint_missing_not_valid;
+pub mod disallow_unique_constraint;
+pub mod prefer_big_int;
+pub mod prefer_bigint_over_int;
+pub mod prefer_bigint_over_smallint;
+pub mod prefer_identity;
+pub mod prefer_jsonb;
+pub mod prefer_robust_stmts;
+pub mod prefer_text_field;
+pub mod prefer_timestamptz;
+pub mod renaming_column;
+pub mod renaming_table;
+pub mod require_concurrent_index_creation;
+pub mod require_concurrent_index_deletion;
+pub mod transaction_nesting;
+declare_lint_group! { pub Safety { name : "safety" , rules : [self :: adding_field_with_default :: AddingFieldWithDefault , self :: adding_foreign_key_constraint :: AddingForeignKeyConstraint , self :: adding_not_null_field :: AddingNotNullField , self :: adding_primary_key_constraint :: AddingPrimaryKeyConstraint , self :: adding_required_field :: AddingRequiredField , self :: ban_char_field :: BanCharField , self :: ban_concurrent_index_creation_in_transaction :: BanConcurrentIndexCreationInTransaction , self :: ban_drop_column :: BanDropColumn , self :: ban_drop_database :: BanDropDatabase , self :: ban_drop_not_null :: BanDropNotNull , self :: ban_drop_table :: BanDropTable , self :: ban_truncate_cascade :: BanTruncateCascade , self :: changing_column_type :: ChangingColumnType , self :: constraint_missing_not_valid :: ConstraintMissingNotValid , self :: disallow_unique_constraint :: DisallowUniqueConstraint , self :: prefer_big_int :: PreferBigInt , self :: prefer_bigint_over_int :: PreferBigintOverInt , self :: prefer_bigint_over_smallint :: PreferBigintOverSmallint , self :: prefer_identity :: PreferIdentity , self :: prefer_jsonb :: PreferJsonb , self :: prefer_robust_stmts :: PreferRobustStmts , self :: prefer_text_field :: PreferTextField , self :: prefer_timestamptz :: PreferTimestamptz , self :: renaming_column :: RenamingColumn , self :: renaming_table :: RenamingTable , self :: require_concurrent_index_creation :: RequireConcurrentIndexCreation , self :: require_concurrent_index_deletion :: RequireConcurrentIndexDeletion , self :: transaction_nesting :: TransactionNesting ,] } }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,3 +24,5 @@ node_modules/

		**/dist/
		.claude-session-id

		squawk/