Skip to content

Commit 2fc2a69

Browse files
authored
fix: return suggested results and add resultType to dataset items (#73)
* return also suggested results, add resultType to output dataset items * improve readability
1 parent c08a99b commit 2fc2a69

File tree

3 files changed

+24
-7
lines changed

3 files changed

+24
-7
lines changed

.actor/actor.json

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
"title": "Overview",
1717
"description": "An view showing just basic properties for simplicity.",
1818
"transformation": {
19-
"flatten": ["metadata"],
19+
"flatten": ["metadata", "searchResult"],
2020
"fields": [
2121
"metadata.url",
2222
"metadata.title",
23+
"searchResult.resultType",
2324
"markdown"
2425
]
2526
},
@@ -34,6 +35,10 @@
3435
"label": "Page title",
3536
"format": "text"
3637
},
38+
"searchResult.resultType": {
39+
"label": "Result type",
40+
"format": "text"
41+
},
3742
"text": {
3843
"label": "Extracted Markdown",
3944
"format": "text"
@@ -49,6 +54,7 @@
4954
"fields": [
5055
"searchResult.title",
5156
"searchResult.description",
57+
"searchResult.resultType",
5258
"searchResult.url"
5359
]
5460
},
@@ -63,6 +69,10 @@
6369
"label": "Title",
6470
"format": "text"
6571
},
72+
"searchResult.resultType": {
73+
"label": "Result type",
74+
"format": "text"
75+
},
6676
"searchResult.url": {
6777
"label": "URL",
6878
"format": "text"

src/google-search/google-extractors-urls.ts

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import type { CheerioAPI } from 'cheerio';
22
import type { Element } from 'domhandler';
33

4-
import type { OrganicResult } from '../types.js';
4+
import type { OrganicResult, SearchResultType } from '../types.js';
55

66
/**
77
* Deduplicates search results based on their title and URL (source @apify/google-search).
@@ -64,7 +64,7 @@ const areTheResultsSuggestions = ($: CheerioAPI) => {
6464
/**
6565
* Extracts organic search results from the given Cheerio instance (source: @apify/google-search).
6666
*/
67-
export const scrapeOrganicResults = ($: CheerioAPI) => {
67+
export const scrapeOrganicResults = ($: CheerioAPI): OrganicResult[] => {
6868
const resultSelectors2023January = [
6969
'.hlcw0c', // Top result with site links
7070
'.g.Ww4FFb', // General search results
@@ -75,10 +75,14 @@ export const scrapeOrganicResults = ($: CheerioAPI) => {
7575
'.sATSHe', // another new selector in March 2025
7676
];
7777

78+
const searchResults = extractResultsFromSelectors($, resultSelectors2023January);
79+
const deduplicatedResults = deduplicateResults(searchResults);
80+
let resultType: SearchResultType = 'ORGANIC';
7881
if (areTheResultsSuggestions($)) {
79-
return [];
82+
resultType = 'SUGGESTED';
8083
}
81-
82-
const searchResults = extractResultsFromSelectors($, resultSelectors2023January);
83-
return deduplicateResults(searchResults);
84+
return deduplicatedResults.map((result) => ({
85+
...result,
86+
resultType,
87+
}));
8488
};

src/types.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,14 @@ export type Input = {
3232
scrapingTool: ScrapingTool;
3333
};
3434

35+
export type SearchResultType = 'ORGANIC' | 'SUGGESTED';
36+
3537
export type OrganicResult = {
3638
description?: string;
3739
title?: string;
3840
rank?: number;
3941
url?: string;
42+
resultType?: SearchResultType;
4043
};
4144

4245
export interface TimeMeasure {

0 commit comments

Comments
 (0)