Skip to content

Commit 388e50c

Browse files
committed
feat(firestore-bigquery-export): add collection group query support
- Add --is-collection-group-query flag to gen-schema-view script - Support both regular collection and collection group queries in Gemini schema generation - Update sampleFirestoreDocuments function with isCollectionGroupQuery parameter - Add interactive prompt for collection group query selection - Update documentation with examples and explanations - Add comprehensive test coverage for new functionality - Maintain backward compatibility with existing functionality This allows users to generate schemas for subcollections that appear across multiple parent documents by using collection group queries instead of regular collection queries.
1 parent aca45bf commit 388e50c

File tree

11 files changed

+192
-9
lines changed

11 files changed

+192
-9
lines changed

firestore-bigquery-export/guides/GENERATE_SCHEMA_VIEWS.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ You'll be prompted for:
6060
- BigQuery dataset ID
6161
- Table Prefix
6262
- Firestore collection path to sample
63+
- Whether to use collection group query
6364
- Google AI API key
6465
- Directory and filename for the schema
6566

@@ -78,6 +79,34 @@ npx @firebaseextensions/fs-bq-schema-views \
7879
--gemini-schema-file-name=user_schema
7980
```
8081

82+
For collection group queries (to query all collections with the same name across your database):
83+
84+
```bash
85+
npx @firebaseextensions/fs-bq-schema-views \
86+
--non-interactive \
87+
--project=my-firebase-project \
88+
--big-query-project=my-bq-project \
89+
--dataset=firestore_changelog \
90+
--table-name-prefix=user_profiles \
91+
--use-gemini=secure \
92+
--is-collection-group-query \
93+
--google-ai-key=$GOOGLE_API_KEY \
94+
--schema-directory=./schemas \
95+
--gemini-schema-file-name=user_schema
96+
```
97+
98+
#### Understanding Collection vs Collection Group Queries
99+
100+
- **Collection Query** (default): Queries documents from a specific collection path
101+
102+
- Example: `users/123/orders` - queries orders for a specific user
103+
- Use when you have a specific collection path
104+
105+
- **Collection Group Query** (`--is-collection-group-query`): Queries all collections with the same name across your entire database
106+
- Example: `orders` - queries all order collections regardless of their parent path
107+
- Use when you have collections with the same name under different documents
108+
- Useful for subcollections that appear in multiple places
109+
81110
⚠️ **Important**: Always review generated schemas before using them in production.
82111

83112
### Option 2: Create a Schema File Manually
@@ -133,6 +162,19 @@ npx @firebaseextensions/fs-bq-schema-views \
133162
--schema-files=./test_schema.json
134163
```
135164

165+
For collection group queries with manual schemas:
166+
167+
```bash
168+
npx @firebaseextensions/fs-bq-schema-views \
169+
--non-interactive \
170+
--project=YOUR_PROJECT_ID \
171+
--big-query-project=YOUR_BIGQUERY_PROJECT_ID \
172+
--dataset=YOUR_DATASET_ID \
173+
--table-name-prefix=YOUR_TABLE_PREFIX \
174+
--schema-files=./test_schema.json \
175+
--is-collection-group-query
176+
```
177+
136178
For multiple schema files, use comma separation:
137179

138180
```

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/index.test.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ describe("parseConfig", () => {
6868
googleAiKey: undefined,
6969
schemaDirectory: undefined,
7070
useGemini: false,
71+
isCollectionGroupQuery: undefined,
7172
});
7273
});
7374

@@ -107,6 +108,7 @@ describe("parseConfig", () => {
107108
googleAiKey: "test-key",
108109
geminiAnalyzeCollectionPath: "test-collection",
109110
schemaDirectory: "test-directory",
111+
isCollectionGroupQuery: true,
110112
outputHelp: jest.fn(),
111113
};
112114

@@ -120,6 +122,7 @@ describe("parseConfig", () => {
120122
expect(result.geminiAnalyzeCollectionPath).toBe("test-collection");
121123
expect(result.schemaDirectory).toBe("test-directory");
122124
expect(result.agentSampleSize).toBe(100);
125+
expect(result.isCollectionGroupQuery).toBe(true);
123126
});
124127

125128
it("should exit if required parameters are missing", async () => {

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/interactive.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ describe("Interactive Prompts", () => {
2424

2525
describe("questions array", () => {
2626
it("should have the correct number of questions", () => {
27-
expect(questions).toHaveLength(10);
27+
expect(questions).toHaveLength(11);
2828
});
2929

3030
it("should have properly formatted questions with required properties", () => {

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/config/non-interactive.test.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ describe("Command Line Parser", () => {
6969
expect(commander.version).toHaveBeenCalledWith("1.0.0");
7070

7171
// Check that all options are configured
72-
expect(commander.option).toHaveBeenCalledTimes(10);
72+
expect(commander.option).toHaveBeenCalledTimes(11);
7373

7474
// Check specific options - just a sample to ensure we're setting up correctly
7575
expect(commander.option).toHaveBeenCalledWith(
@@ -89,6 +89,12 @@ describe("Command Line Parser", () => {
8989
collect,
9090
[]
9191
);
92+
93+
expect(commander.option).toHaveBeenCalledWith(
94+
"--is-collection-group-query",
95+
"Use collection group query instead of regular collection query",
96+
false
97+
);
9298
});
9399

94100
it("should return the configured program", () => {

firestore-bigquery-export/scripts/gen-schema-view/src/__tests__/genkit/sampleFirestoreDocuments.test.ts

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
interface FirestoreModule {
1818
(): {
1919
collection: jest.Mock;
20+
collectionGroup: jest.Mock;
2021
where: jest.Mock;
2122
limit: jest.Mock;
2223
get: jest.Mock;
@@ -31,6 +32,7 @@ interface FirestoreModule {
3132
jest.mock("firebase-admin", () => {
3233
const mockFirestore = {
3334
collection: jest.fn().mockReturnThis(),
35+
collectionGroup: jest.fn().mockReturnThis(),
3436
where: jest.fn().mockReturnThis(),
3537
limit: jest.fn().mockReturnThis(),
3638
get: jest.fn().mockResolvedValue({
@@ -136,4 +138,96 @@ describe("sampleFirestoreDocuments", () => {
136138
sampleFirestoreDocuments(collectionPath, sampleSize)
137139
).rejects.toThrow("Firestore error");
138140
});
141+
142+
describe("collection group queries", () => {
143+
beforeEach(() => {
144+
jest.clearAllMocks();
145+
});
146+
147+
it("should sample documents from Firestore collection group", async () => {
148+
const collectionPath = "orders";
149+
const sampleSize = 2;
150+
const isCollectionGroupQuery = true;
151+
152+
// Mock collection group data (subcollections from different parents)
153+
const firebase = require("firebase-admin");
154+
const mockFirestore = firebase.firestore();
155+
156+
mockFirestore.get.mockResolvedValueOnce({
157+
docs: [
158+
{
159+
data: () => ({ orderId: "order1", amount: 50, userId: "user1" }),
160+
id: "order1",
161+
},
162+
{
163+
data: () => ({ orderId: "order2", amount: 75, userId: "user2" }),
164+
id: "order2",
165+
},
166+
],
167+
});
168+
169+
const result = await sampleFirestoreDocuments(
170+
collectionPath,
171+
sampleSize,
172+
isCollectionGroupQuery
173+
);
174+
175+
expect(mockFirestore.collectionGroup).toHaveBeenCalledWith(
176+
collectionPath
177+
);
178+
expect(mockFirestore.collection).not.toHaveBeenCalled();
179+
expect(mockFirestore.where).toHaveBeenCalledWith(
180+
"__name__",
181+
">=",
182+
expect.any(String)
183+
);
184+
expect(mockFirestore.limit).toHaveBeenCalledWith(sampleSize);
185+
expect(mockFirestore.get).toHaveBeenCalled();
186+
187+
expect(result).toHaveLength(2);
188+
expect(result[0]).toHaveProperty("orderId", "order1");
189+
expect(result[0]).toHaveProperty("amount", 50);
190+
expect(result[0]).toHaveProperty("userId", "user1");
191+
});
192+
193+
it("should default to regular collection query when isCollectionGroupQuery is false", async () => {
194+
const collectionPath = "test-collection";
195+
const sampleSize = 2;
196+
const isCollectionGroupQuery = false;
197+
198+
const result = await sampleFirestoreDocuments(
199+
collectionPath,
200+
sampleSize,
201+
isCollectionGroupQuery
202+
);
203+
204+
const firebase = require("firebase-admin");
205+
const mockFirestore = firebase.firestore();
206+
207+
expect(mockFirestore.collection).toHaveBeenCalledWith(collectionPath);
208+
expect(mockFirestore.collectionGroup).not.toHaveBeenCalled();
209+
expect(result).toHaveLength(2);
210+
});
211+
212+
it("should handle errors properly for collection group queries", async () => {
213+
const firebase = require("firebase-admin");
214+
const mockFirestore = firebase.firestore();
215+
216+
mockFirestore.get.mockRejectedValueOnce(
217+
new Error("Collection group error")
218+
);
219+
220+
const collectionPath = "orders";
221+
const sampleSize = 2;
222+
const isCollectionGroupQuery = true;
223+
224+
await expect(
225+
sampleFirestoreDocuments(
226+
collectionPath,
227+
sampleSize,
228+
isCollectionGroupQuery
229+
)
230+
).rejects.toThrow("Collection group error");
231+
});
232+
});
139233
});

firestore-bigquery-export/scripts/gen-schema-view/src/config/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ export interface CliConfig {
3333
googleAiKey?: string;
3434
schemaDirectory?: string;
3535
geminiSchemaFileName?: string;
36+
isCollectionGroupQuery?: boolean;
3637
}
3738

3839
export async function parseConfig(): Promise<CliConfig> {
@@ -55,6 +56,7 @@ export async function parseConfig(): Promise<CliConfig> {
5556
googleAiKey: program.googleAiKey,
5657
schemaDirectory: program.schemaDirectory,
5758
geminiSchemaFileName: program.geminiSchemaFileName,
59+
isCollectionGroupQuery: program.isCollectionGroupQuery,
5860
};
5961
}
6062
const {
@@ -68,6 +70,7 @@ export async function parseConfig(): Promise<CliConfig> {
6870
googleAiKey,
6971
schemaDirectory,
7072
geminiSchemaFileName,
73+
isCollectionGroupQuery,
7174
} = await promptInquirer();
7275

7376
return {
@@ -82,5 +85,6 @@ export async function parseConfig(): Promise<CliConfig> {
8285
googleAiKey,
8386
schemaDirectory,
8487
geminiSchemaFileName,
88+
isCollectionGroupQuery,
8589
};
8690
}

firestore-bigquery-export/scripts/gen-schema-view/src/config/interactive.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ export const questions = [
112112
when: (answers) => answers.useGemini,
113113
default: "schema",
114114
},
115+
{
116+
message:
117+
"Do you want to use a collection group query instead of a regular collection query?",
118+
name: "isCollectionGroupQuery",
119+
type: "confirm",
120+
when: (answers) => answers.useGemini,
121+
default: false,
122+
},
115123
];
116124

117125
export const promptInquirer = () => {

firestore-bigquery-export/scripts/gen-schema-view/src/config/non-interactive.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ export const configureProgram = () => {
7474
"--gemini-schema-file-name <file-name>",
7575
"Name of schema json file generated by Gemini (without .json extension)",
7676
"schema"
77+
)
78+
.option(
79+
"--is-collection-group-query",
80+
"Use collection group query instead of regular collection query",
81+
false
7782
);
7883

7984
return program;

firestore-bigquery-export/scripts/gen-schema-view/src/schema/genkit.ts

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,25 @@ import inquirer from "inquirer";
2424

2525
export async function sampleFirestoreDocuments(
2626
collectionPath: string,
27-
sampleSize: number
27+
sampleSize: number,
28+
isCollectionGroupQuery: boolean = false
2829
): Promise<any[]> {
2930
const db = firebase.firestore();
3031

3132
try {
32-
const snapshot = await db
33-
.collection(collectionPath)
34-
.where("__name__", ">=", Math.random().toString())
35-
.limit(sampleSize)
36-
.get();
33+
const query = isCollectionGroupQuery
34+
? db.collectionGroup(collectionPath)
35+
: db.collection(collectionPath);
36+
37+
let snapshot = null;
38+
if (isCollectionGroupQuery) {
39+
snapshot = await query.limit(sampleSize).get();
40+
} else {
41+
snapshot = await query
42+
.where("__name__", ">=", Math.random().toString())
43+
.limit(sampleSize)
44+
.get();
45+
}
3746

3847
const documents = snapshot.docs.map((doc) => {
3948
const data = doc.data();
@@ -197,7 +206,8 @@ export const generateSchemaFilesWithGemini = async (config: CliConfig) => {
197206
// get sample data from Firestore
198207
const sampleData = await sampleFirestoreDocuments(
199208
config.geminiAnalyzeCollectionPath!,
200-
config.agentSampleSize!
209+
config.agentSampleSize!,
210+
config.isCollectionGroupQuery || false
201211
);
202212

203213
if (sampleData.length === 0) {

package-lock.json

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)