Skip to content

Commit c56054f

Browse files
committed
Add support for multiline string sqls.
1 parent 61458f8 commit c56054f

File tree

2 files changed

+73
-3
lines changed

2 files changed

+73
-3
lines changed

lib/activerecord/cte/string_cte_parser.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,29 +110,33 @@ module StringCteParser
110110
# ASS (too many letters)
111111
AS_KEYWORD = /AS/i.freeze
112112

113-
# Matches the SQL expression inside parentheses (greedy match for everything inside)
113+
# Matches the SQL expression inside parentheses (greedy match for everything inside, including newlines)
114+
# Using [\s\S] to match any character including newlines (equivalent to . with DOTALL flag)
114115
# Examples that MATCH:
115116
# (SELECT * FROM posts) → captures "SELECT * FROM posts"
116117
# (SELECT id, name FROM users WHERE active = true) → captures "SELECT id, name FROM users WHERE active = true"
117118
# (SELECT * FROM posts WHERE views > (SELECT AVG(views) FROM posts)) → captures "SELECT * FROM posts WHERE views > (SELECT AVG(views) FROM posts)"
119+
# (SELECT *\n FROM posts\n WHERE views > 100) → captures "SELECT *\n FROM posts\n WHERE views > 100"
118120
# Examples that DON'T match:
119121
# SELECT * FROM posts (no parentheses)
120122
# (SELECT * FROM posts (missing closing paren)
121123
# SELECT * FROM posts) (missing opening paren)
122-
EXPRESSION_PATTERN = /\((.+)\)/.freeze
124+
EXPRESSION_PATTERN = /\(([\s\S]+)\)/.freeze
123125

124126
# Complete CTE string pattern: optional whitespace + table_name + whitespace + AS + whitespace + (expression) + optional whitespace
127+
# Uses multiline mode to handle strings with newlines
125128
# Examples that MATCH:
126129
# "popular_posts AS (SELECT * FROM posts WHERE views_count > 100)"
127130
# " `user stats` AS (SELECT COUNT(*) FROM users) "
128131
# '"complex_table" as (SELECT * FROM posts)'
129132
# "table_2023 AS (SELECT id FROM posts WHERE created_at > '2023-01-01')"
133+
# "multiline_cte AS (\n SELECT *\n FROM posts\n WHERE active = true\n)"
130134
# Examples that DON'T match:
131135
# "popular_posts (SELECT * FROM posts)" (missing AS)
132136
# "popular_posts AS SELECT * FROM posts" (missing parentheses)
133137
# "AS (SELECT * FROM posts)" (missing table name)
134138
# "123_table AS (SELECT * FROM posts)" (invalid table name)
135-
CTE_STRING_PATTERN = /\A\s*#{TABLE_NAME_PATTERN}\s+#{AS_KEYWORD}\s+#{EXPRESSION_PATTERN}\s*\z/i.freeze
139+
CTE_STRING_PATTERN = /\A\s*#{TABLE_NAME_PATTERN}\s+#{AS_KEYWORD}\s+#{EXPRESSION_PATTERN}\s*\z/im.freeze
136140

137141
# ---------------------------------------------------------------------------
138142
# Main parsing method that converts a CTE string into an Arel::Nodes::As node

test/activerecord/cte_test.rb

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,4 +326,70 @@ def test_string_cte_with_underscores_and_numbers
326326
popular_posts = Post.where("views_count > 100")
327327
assert_equal popular_posts.to_a, popular_posts_from_cte
328328
end
329+
330+
def test_string_cte_with_multiline_expressions
331+
# Test multiline CTE expressions with complex formatting
332+
multiline_cte = <<~SQL.strip
333+
filtered_tracker_issue_extras AS (
334+
SELECT tie.scheduled_in_external_sprint_ids,
335+
tie.tracker_project_issue_id
336+
FROM tracker_issue_extras tie
337+
JOIN repo_issues ri
338+
ON ri.tracker_project_issue_id = tie.tracker_project_issue_id
339+
WHERE ri.primary_committer_id = ANY(ARRAY[1]::bigint[])
340+
AND ri.repo_id = ANY(ARRAY[2, 3, 1]::bigint[])
341+
)
342+
SQL
343+
344+
# This should parse successfully without raising an error
345+
assert_nothing_raised do
346+
Post.with(multiline_cte).to_sql
347+
end
348+
349+
# Test with newlines in different positions
350+
cte_with_newlines = "popular_posts AS (\n SELECT *\n FROM posts\n WHERE views_count > 100\n)"
351+
assert_nothing_raised do
352+
Post.with(cte_with_newlines).to_sql
353+
end
354+
355+
# Test with complex nested subqueries and multiline formatting
356+
complex_multiline_cte = <<~SQL.strip
357+
complex_analysis AS (
358+
SELECT
359+
p.id,
360+
p.title,
361+
(SELECT COUNT(*)
362+
FROM comments c
363+
WHERE c.post_id = p.id
364+
AND c.created_at > '2023-01-01') as recent_comments,
365+
CASE
366+
WHEN p.views_count > 1000 THEN 'popular'
367+
WHEN p.views_count > 100 THEN 'moderate'
368+
ELSE 'low'
369+
END as popularity
370+
FROM posts p
371+
WHERE p.published_at IS NOT NULL
372+
)
373+
SQL
374+
375+
assert_nothing_raised do
376+
Post.with(complex_multiline_cte).to_sql
377+
end
378+
end
379+
380+
def test_string_cte_with_user_provided_multiline_example
381+
# Test the specific multiline example provided by the user
382+
user_multiline_cte = "filtered_tracker_issue_extras AS (\n SELECT tie.scheduled_in_external_sprint_ids,\n tie.tracker_project_issue_id\n FROM tracker_issue_extras tie\n JOIN repo_issues ri\n ON ri.tracker_project_issue_id = tie.tracker_project_issue_id\n WHERE ri.primary_committer_id = ANY(ARRAY[[1]]::bigint[])\n AND ri.repo_id = ANY(ARRAY[[2, 3, 1]]::bigint[])\n)\n"
383+
384+
# This should parse successfully without raising an error
385+
result = Post.with(user_multiline_cte).to_sql
386+
387+
# The table name gets quoted by PostgreSQL, so check for quoted version
388+
assert result.include?("WITH \"filtered_tracker_issue_extras\" AS"), "Should include WITH clause with user's table name (quoted)"
389+
assert result.include?("tie.scheduled_in_external_sprint_ids"), "Should include specific column from user's example"
390+
assert result.include?("tracker_issue_extras tie"), "Should include table alias from user's example"
391+
assert result.include?("JOIN repo_issues ri"), "Should include JOIN clause from user's example"
392+
assert result.include?("ARRAY[[1]]::bigint[]"), "Should preserve complex array syntax"
393+
assert result.include?("ARRAY[[2, 3, 1]]::bigint[]"), "Should preserve complex array with multiple values"
394+
end
329395
end

0 commit comments

Comments
 (0)