Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions firestore-bigquery-export/guides/GENERATE_SCHEMA_VIEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ You'll be prompted for:
- BigQuery dataset ID
- Table Prefix
- Firestore collection path to sample
- Whether to use collection group query
- Google AI API key
- Directory and filename for the schema

Expand All @@ -78,6 +79,34 @@ npx @firebaseextensions/fs-bq-schema-views \
--gemini-schema-file-name=user_schema
```

For collection group queries (to query all collections with the same name across your database):

```bash
npx @firebaseextensions/fs-bq-schema-views \
--non-interactive \
--project=my-firebase-project \
--big-query-project=my-bq-project \
--dataset=firestore_changelog \
--table-name-prefix=user_profiles \
--use-gemini=secure \
--query-collection-group \
--google-ai-key=$GOOGLE_API_KEY \
--schema-directory=./schemas \
--gemini-schema-file-name=user_schema
```

#### Understanding Collection vs Collection Group Queries

- **Collection Query** (default): Queries documents from a specific collection path

- Example: `users/123/orders` - queries orders for a specific user
- Use when you have a specific collection path

- **Collection Group Query** (`--query-collection-group`): Queries all collections with the same name across your entire database
- Example: `orders` - queries all order collections regardless of their parent path
- Use when you have collections with the same name under different documents
- Useful for subcollections that appear in multiple places

⚠️ **Important**: Always review generated schemas before using them in production.

### Option 2: Create a Schema File Manually
Expand Down Expand Up @@ -133,6 +162,19 @@ npx @firebaseextensions/fs-bq-schema-views \
--schema-files=./test_schema.json
```

For collection group queries with manual schemas:

```bash
npx @firebaseextensions/fs-bq-schema-views \
--non-interactive \
--project=YOUR_PROJECT_ID \
--big-query-project=YOUR_BIGQUERY_PROJECT_ID \
--dataset=YOUR_DATASET_ID \
--table-name-prefix=YOUR_TABLE_PREFIX \
--schema-files=./test_schema.json \
--query-collection-group
```

For multiple schema files, use comma separation:

```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ describe("parseConfig", () => {
googleAiKey: undefined,
schemaDirectory: undefined,
useGemini: false,
isCollectionGroupQuery: undefined,
});
});

Expand Down Expand Up @@ -107,6 +108,7 @@ describe("parseConfig", () => {
googleAiKey: "test-key",
geminiAnalyzeCollectionPath: "test-collection",
schemaDirectory: "test-directory",
queryCollectionGroup: true,
outputHelp: jest.fn(),
};

Expand All @@ -120,6 +122,7 @@ describe("parseConfig", () => {
expect(result.geminiAnalyzeCollectionPath).toBe("test-collection");
expect(result.schemaDirectory).toBe("test-directory");
expect(result.agentSampleSize).toBe(100);
expect(result.isCollectionGroupQuery).toBe(true);
});

it("should exit if required parameters are missing", async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ describe("Interactive Prompts", () => {

describe("questions array", () => {
it("should have the correct number of questions", () => {
expect(questions).toHaveLength(10);
expect(questions).toHaveLength(11);
});

it("should have properly formatted questions with required properties", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ describe("Command Line Parser", () => {
expect(commander.version).toHaveBeenCalledWith("1.0.0");

// Check that all options are configured
expect(commander.option).toHaveBeenCalledTimes(10);
expect(commander.option).toHaveBeenCalledTimes(11);

// Check specific options - just a sample to ensure we're setting up correctly
expect(commander.option).toHaveBeenCalledWith(
Expand All @@ -89,6 +89,12 @@ describe("Command Line Parser", () => {
collect,
[]
);

expect(commander.option).toHaveBeenCalledWith(
"--query-collection-group",
"Use collection group query instead of regular collection query",
false
);
});

it("should return the configured program", () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
interface FirestoreModule {
(): {
collection: jest.Mock;
collectionGroup: jest.Mock;
where: jest.Mock;
limit: jest.Mock;
get: jest.Mock;
Expand All @@ -31,6 +32,7 @@ interface FirestoreModule {
jest.mock("firebase-admin", () => {
const mockFirestore = {
collection: jest.fn().mockReturnThis(),
collectionGroup: jest.fn().mockReturnThis(),
where: jest.fn().mockReturnThis(),
limit: jest.fn().mockReturnThis(),
get: jest.fn().mockResolvedValue({
Expand Down Expand Up @@ -136,4 +138,95 @@ describe("sampleFirestoreDocuments", () => {
sampleFirestoreDocuments(collectionPath, sampleSize)
).rejects.toThrow("Firestore error");
});

describe("collection group queries", () => {
it("should sample documents from Firestore collection group", async () => {
const collectionPath = "orders";
const sampleSize = 2;
const isCollectionGroupQuery = true;

// Mock collection group data (subcollections from different parents)
const firebase = require("firebase-admin");
const mockFirestore = firebase.firestore();

// Clear mocks and set up specific mock for this test
jest.clearAllMocks();
mockFirestore.get.mockResolvedValueOnce({
docs: [
{
data: () => ({ orderId: "order1", amount: 50, userId: "user1" }),
id: "order1",
},
{
data: () => ({ orderId: "order2", amount: 75, userId: "user2" }),
id: "order2",
},
],
});

const result = await sampleFirestoreDocuments(
collectionPath,
sampleSize,
isCollectionGroupQuery
);

expect(mockFirestore.collectionGroup).toHaveBeenCalledWith(
collectionPath
);
expect(mockFirestore.collection).not.toHaveBeenCalled();
expect(mockFirestore.where).not.toHaveBeenCalled();
expect(mockFirestore.limit).toHaveBeenCalledWith(sampleSize);
expect(mockFirestore.get).toHaveBeenCalled();

expect(result).toHaveLength(2);
expect(result[0]).toHaveProperty("orderId", "order1");
expect(result[0]).toHaveProperty("amount", 50);
expect(result[0]).toHaveProperty("userId", "user1");
});

it("should default to regular collection query when isCollectionGroupQuery is false", async () => {
const collectionPath = "test-collection";
const sampleSize = 2;
const isCollectionGroupQuery = false;

const firebase = require("firebase-admin");
const mockFirestore = firebase.firestore();

// Clear mocks for this test
jest.clearAllMocks();

const result = await sampleFirestoreDocuments(
collectionPath,
sampleSize,
isCollectionGroupQuery
);

expect(mockFirestore.collection).toHaveBeenCalledWith(collectionPath);
expect(mockFirestore.collectionGroup).not.toHaveBeenCalled();
expect(result).toHaveLength(2);
});

it("should handle errors properly for collection group queries", async () => {
const firebase = require("firebase-admin");
const mockFirestore = firebase.firestore();

// Clear mocks and set up error for this test
jest.clearAllMocks();
mockFirestore.get.mockRejectedValueOnce(
new Error("Collection group error")
);

const collectionPath = "orders";
const sampleSize = 2;
const isCollectionGroupQuery = true;

await expect(
sampleFirestoreDocuments(
collectionPath,
sampleSize,
isCollectionGroupQuery
)
).rejects.toThrow("Collection group error");
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export interface CliConfig {
googleAiKey?: string;
schemaDirectory?: string;
geminiSchemaFileName?: string;
isCollectionGroupQuery?: boolean;
}

export async function parseConfig(): Promise<CliConfig> {
Expand All @@ -55,6 +56,7 @@ export async function parseConfig(): Promise<CliConfig> {
googleAiKey: program.googleAiKey,
schemaDirectory: program.schemaDirectory,
geminiSchemaFileName: program.geminiSchemaFileName,
isCollectionGroupQuery: program.queryCollectionGroup,
};
}
const {
Expand All @@ -68,6 +70,7 @@ export async function parseConfig(): Promise<CliConfig> {
googleAiKey,
schemaDirectory,
geminiSchemaFileName,
isCollectionGroupQuery,
} = await promptInquirer();

return {
Expand All @@ -82,5 +85,6 @@ export async function parseConfig(): Promise<CliConfig> {
googleAiKey,
schemaDirectory,
geminiSchemaFileName,
isCollectionGroupQuery,
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ export const questions = [
when: (answers) => answers.useGemini,
default: "schema",
},
{
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please update the phrasing of the question What is the Firestore collection path you want Gemini to analyze? to mention collection group queries. Thanks!

message:
"Do you want to use a collection group query instead of a regular collection query?",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @superphil0 , thank you for your PR. Can we move this question to appear before the question in which the user is asked for the Firestore collection path that Gemini would analyze.

name: "isCollectionGroupQuery",
type: "confirm",
when: (answers) => answers.useGemini,
default: false,
},
];

export const promptInquirer = () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,11 @@ export const configureProgram = () => {
"--gemini-schema-file-name <file-name>",
"Name of schema json file generated by Gemini (without .json extension)",
"schema"
)
.option(
"--query-collection-group",
"Use collection group query instead of regular collection query",
false
);

return program;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,25 @@ import inquirer from "inquirer";

export async function sampleFirestoreDocuments(
collectionPath: string,
sampleSize: number
sampleSize: number,
isCollectionGroupQuery: boolean = false
): Promise<any[]> {
const db = firebase.firestore();

try {
const snapshot = await db
.collection(collectionPath)
.where("__name__", ">=", Math.random().toString())
.limit(sampleSize)
.get();
const query = isCollectionGroupQuery
? db.collectionGroup(collectionPath)
: db.collection(collectionPath);

let snapshot = null;
if (isCollectionGroupQuery) {
snapshot = await query.limit(sampleSize).get();
} else {
snapshot = await query
.where("__name__", ">=", Math.random().toString())
.limit(sampleSize)
.get();
}

const documents = snapshot.docs.map((doc) => {
const data = doc.data();
Expand Down Expand Up @@ -197,7 +206,8 @@ export const generateSchemaFilesWithGemini = async (config: CliConfig) => {
// get sample data from Firestore
const sampleData = await sampleFirestoreDocuments(
config.geminiAnalyzeCollectionPath!,
config.agentSampleSize!
config.agentSampleSize!,
config.isCollectionGroupQuery || false
);

if (sampleData.length === 0) {
Expand Down