Skip to content

Commit 38acad6

Browse files
authored
Merge pull request #274 from OneNoteDev/bug/fix-duplicate-videos-on-selection-clip
Rework video extraction logic
2 parents 0f61636 + a2daad9 commit 38acad6

15 files changed

+584
-452
lines changed

src/scripts/domParsers/domUtils.ts

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ export module DomUtils {
361361
// If we are on a Domain that has a valid VideoExtractor, get the embedded videos
362362
// to render them later
363363
if (extractor) {
364-
iframes = iframes.concat(extractor.createEmbeddedVideos(pageUrl, pageContent));
364+
iframes = iframes.concat(extractor.createEmbeddedVideosFromPage(pageUrl, pageContent));
365365
}
366366
} catch (e) {
367367
// if we end up here, we're unexpectedly broken
@@ -941,11 +941,7 @@ export module DomUtils {
941941

942942
let domain = VideoUtils.SupportedVideoDomains[supportedDomain];
943943
let extractor = VideoExtractorFactory.createVideoExtractor(domain);
944-
let embeddedVideos = extractor.createEmbeddedVideos(src, doc.body.innerHTML);
945-
if (embeddedVideos && embeddedVideos.length > 0) {
946-
return embeddedVideos[0];
947-
}
948-
return undefined;
944+
return extractor.createEmbeddedVideoFromUrl(src);
949945
});
950946
}
951947

@@ -972,8 +968,6 @@ export module DomUtils {
972968

973969
let scrollValue: number = (elem.scrollTop * 1.0) / (elem.scrollHeight - elem.clientHeight);
974970

975-
// console.warn(elem.scrollTop, elem.scrollHeight, elem.clientHeight, scrollValue);
976-
977971
if (asDecimalValue) {
978972
return scrollValue;
979973
}

src/scripts/domParsers/khanAcademyVideoExtractor.ts

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,48 +4,43 @@ import {YoutubeVideoExtractor} from "./youtubeVideoExtractor";
44

55
import {ObjectUtils} from "../objectUtils";
66

7-
export class KhanAcademyVideoExtractor implements VideoExtractor {
7+
export class KhanAcademyVideoExtractor extends VideoExtractor {
88
private youtubeExtractor: YoutubeVideoExtractor;
99

1010
constructor() {
11+
super();
12+
// KhanAcademy hosts their videos on Youtube
1113
this.youtubeExtractor = new YoutubeVideoExtractor();
1214
}
1315

14-
public getVideoIds(pageUrl: string, pageContent: string): string[] {
16+
public createEmbeddedVideosFromHtml(html: string): HTMLIFrameElement[] {
17+
if (!html) {
18+
return [];
19+
}
20+
1521
// Matches strings of the form id="video_\S+" OR id='video_\S+'
1622
// with any amount of whitespace padding in between strings of interest
17-
let regex1 = /id\s*=\s*("\s*video_(\S+)\s*"|'\s*video_(\S+)\s*')/gi;
23+
let regex1 = /\sid\s*=\s*("\s*video_(\S+)\s*"|'\s*video_(\S+)\s*')/gi;
1824

1925
// Matches strings of the form data-youtubeid="\S+" OR data-youtubeid='\S+'
2026
// with any amount of whitespace padding in between strings of interest
21-
let regex2 = /data-youtubeid\s*=\s*("\s*(\S+)\s*"|'\s*(\S+)\s*')/gi;
27+
let regex2 = /data-youtubeid\s*=\s*("\s*video_(\S+)\s*"|'\s*video_(\S+)\s*')/gi;
2228

23-
return VideoUtils.matchRegexFromPageContent(pageContent, [regex1, regex2]);
24-
}
25-
26-
public getVideoSrcValues(pageUrl: string, pageContent: string): string[] {
27-
if (ObjectUtils.isNullOrUndefined(pageContent)) {
28-
return;
29-
}
30-
31-
let videoIds = this.getVideoIds(pageUrl, pageContent);
32-
if (ObjectUtils.isNullOrUndefined(videoIds) || videoIds.length === 0) {
33-
return;
29+
let ids = VideoUtils.matchRegexFromPageContent(html, [regex1, regex2]);
30+
if (!ids) {
31+
return [];
3432
}
3533

36-
return ["https://www.youtube.com/embed/" + videoIds[0]];
34+
return ids.map((id) => this.createEmbeddedVideoFromId(id));
3735
}
3836

39-
/**
40-
* Create iframe in correct format for KhanAcademy video (hosted on YouTube) embed in OneNote.
41-
*/
42-
public createEmbeddedVideos(pageUrl: string, pageContent: string): HTMLIFrameElement[] {
43-
let youtubeSrcFromKhanAcademyPage = this.getVideoSrcValues(pageUrl, pageContent);
44-
if (youtubeSrcFromKhanAcademyPage.length === 0) {
45-
return [];
46-
}
47-
let firstSource = youtubeSrcFromKhanAcademyPage[0];
48-
return this.youtubeExtractor.createEmbeddedVideos(firstSource, pageContent);
37+
public createEmbeddedVideoFromUrl(url: string): HTMLIFrameElement {
38+
// KhanAcademy does not host their own videos. We can only derive videos
39+
// from their page's html.
40+
return undefined;
4941
}
5042

43+
public createEmbeddedVideoFromId(id: string): HTMLIFrameElement {
44+
return this.youtubeExtractor.createEmbeddedVideoFromId(id);
45+
}
5146
}
Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,40 @@
1-
export interface VideoExtractor {
2-
getVideoIds(pageUrl: string, pageContent: string): string[];
1+
import * as _ from "lodash";
32

4-
// We can use the video IDs to generate or retrieve src values that we can embed into our article preview
5-
getVideoSrcValues(pageUrl: string, pageContent: string): string[];
3+
export abstract class VideoExtractor {
4+
/**
5+
* Given a page's url and its body html, return a unique list of
6+
* OneNoteApi-compliant video embeds. If there's a video associated
7+
* with the page url, it will be first in the list.
8+
*/
9+
public createEmbeddedVideosFromPage(url: string, html: string): HTMLIFrameElement[] {
10+
let allVideoEmbeds: HTMLIFrameElement[] = [];
11+
let videoEmbedFromUrl = this.createEmbeddedVideoFromUrl(url);
12+
if (videoEmbedFromUrl) {
13+
allVideoEmbeds.push(videoEmbedFromUrl);
14+
}
615

7-
createEmbeddedVideos(pageUrl: string, pageContent: string): HTMLIFrameElement[];
16+
allVideoEmbeds = allVideoEmbeds.concat(this.createEmbeddedVideosFromHtml(html));
17+
return _.uniqWith(allVideoEmbeds, (v1: HTMLIFrameElement, v2: HTMLIFrameElement) => {
18+
return v1.src === v2.src;
19+
});
20+
}
21+
22+
/**
23+
* Given some arbitrary html with n embedded videos that the
24+
* extractor recognizes, return a list of n OneNoteApi-compliant
25+
* video embeds.
26+
*/
27+
public abstract createEmbeddedVideosFromHtml(html: string): HTMLIFrameElement[];
28+
29+
/**
30+
* Given the url of a video page belonging to the extractor's domain, or
31+
* an extisting embedded element, return a OneNoteApi-compliant video
32+
* embed.
33+
*/
34+
public abstract createEmbeddedVideoFromUrl(url: string): HTMLIFrameElement;
35+
36+
/**
37+
* Given the id of the video, return a OneNoteApi-compliant video embed.
38+
*/
39+
public abstract createEmbeddedVideoFromId(id: string): HTMLIFrameElement;
840
};

src/scripts/domParsers/videoUtils.ts

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
11
import {ObjectUtils} from "../objectUtils";
2-
32
import {UrlUtils} from "../urlUtils";
43

5-
import {VideoExtractor} from "./VideoExtractor";
6-
import {KhanAcademyVideoExtractor} from "./khanAcademyVideoExtractor";
7-
84
export module VideoUtils {
95
export const youTubeWatchVideoBaseUrl = "https://www.youtube.com/watch";
106
export const youTubeVideoIdQueryKey = "v";
@@ -63,6 +59,9 @@ export module VideoUtils {
6359
if (matches.length === 0) {
6460
return;
6561
}
66-
return matches.filter((element, index, array) => { return array.indexOf(element) === index; }); // unique values only
62+
63+
// We used to return unique values, but since we now support videos in selections, we moved the
64+
// uniqueness logic to VideoExtractor::createEmbeddedVideosFromPage
65+
return matches;
6766
}
6867
}

src/scripts/domParsers/vimeoVideoExtractor.ts

Lines changed: 34 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -4,73 +4,59 @@ import {VideoUtils} from "./videoUtils";
44

55
import {ObjectUtils} from "../objectUtils";
66

7-
export class VimeoVideoExtractor implements VideoExtractor {
7+
export class VimeoVideoExtractor extends VideoExtractor {
88
private dataOriginalSrcAttribute = "data-original-src";
99

10-
/**
11-
* Return id for a video on Vimeo.com
12-
*/
13-
public getVideoIds(pageUrl: string, pageContent: string): string[] {
14-
if (ObjectUtils.isNullOrUndefined(pageContent)) {
15-
return;
10+
public createEmbeddedVideosFromHtml(html: string): HTMLIFrameElement[] {
11+
if (!html) {
12+
return [];
1613
}
1714

18-
// looking for all matches in pageContent of the general format: id="clip_###"
19-
// - where ### could be any number of digits
20-
// - ignore casing
21-
// - ignore possible whitespacing variations between characters
22-
// - accept the use of either double- or single-quotes around clip_###
15+
// Looking for all matches in pageContent of the general format: id="clip_###"
16+
// - where ### could be any number of digits
17+
// - ignore casing
18+
// - ignore possible whitespacing variations between characters
19+
// - accept the use of either double- or single-quotes around clip_###
2320
let regex1 = /id\s*=\s*("\s*clip_(\d+)\s*"|'\s*clip_(\d+)\s*')/gi;
2421

25-
// also account for embedded Vimeo videos
22+
// Also account for embedded Vimeo videos
2623
let regex2 = /player\.vimeo\.com\/video\/((\d+))\d{0}/gi;
2724

28-
return VideoUtils.matchRegexFromPageContent(pageContent, [regex1, regex2]);
25+
let ids = VideoUtils.matchRegexFromPageContent(html, [regex1, regex2]);
26+
if (!ids) {
27+
return [];
28+
}
29+
30+
return ids.map((id) => this.createEmbeddedVideoFromId(id));
2931
}
3032

31-
/**
32-
* Return valid iframe src attribute value for the supported Vimeo domain
33-
*/
34-
public getVideoSrcValues(pageUrl: string, pageContent: string): string[] {
35-
if (ObjectUtils.isNullOrUndefined(pageContent)) {
36-
return;
33+
public createEmbeddedVideoFromUrl(url: string): HTMLIFrameElement {
34+
if (!url) {
35+
return undefined;
3736
}
3837

39-
let vimeoIds = this.getVideoIds(pageUrl, pageContent);
40-
if (ObjectUtils.isNullOrUndefined(vimeoIds)) {
41-
return;
38+
let match = url.match(/^https?:\/\/vimeo\.com\/(\d+)\d{0}/);
39+
if (match) {
40+
return this.createEmbeddedVideoFromId(match[1]);
4241
}
4342

44-
let values = [];
45-
for (let id of vimeoIds) {
46-
values.push("https://player.vimeo.com/video/" + id);
43+
match = url.match(/^https?:\/\/player.vimeo.com\/video\/(\d+)\d{0}/);
44+
if (match) {
45+
return this.createEmbeddedVideoFromId(match[1]);
4746
}
4847

49-
return values;
48+
return undefined;
5049
}
5150

52-
/**
53-
* Create iframes in correct format for Vimeo video embed in OneNote.
54-
* Supports multiple videos.
55-
*/
56-
public createEmbeddedVideos(pageUrl: string, pageContent: string): HTMLIFrameElement[] {
57-
let vimeoSrcs = this.getVideoSrcValues(pageUrl, pageContent);
58-
59-
if (ObjectUtils.isNullOrUndefined(vimeoSrcs)) {
60-
// fast fail: we expect all pages passed into this function in prod to contain clip ids
61-
throw new Error("Vimeo page content does not contain clip ids");
62-
}
63-
64-
let iframes: HTMLIFrameElement[] = [];
65-
66-
for (let vimeoSrc of vimeoSrcs) {
67-
let iframe = DomUtils.createEmbedVideoIframe();
68-
iframe.src = vimeoSrc;
69-
iframe.setAttribute(this.dataOriginalSrcAttribute, vimeoSrc);
70-
71-
iframes.push(iframe);
51+
public createEmbeddedVideoFromId(id: string): HTMLIFrameElement {
52+
if (!id) {
53+
return undefined;
7254
}
7355

74-
return iframes;
56+
let videoEmbed = DomUtils.createEmbedVideoIframe();
57+
let src = "https://player.vimeo.com/video/" + id;
58+
videoEmbed.src = src;
59+
videoEmbed.setAttribute(this.dataOriginalSrcAttribute, src);
60+
return videoEmbed;
7561
}
7662
}

src/scripts/domParsers/youtubeVideoExtractor.ts

Lines changed: 38 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -4,74 +4,63 @@ import {VideoExtractor} from "./videoExtractor";
44
import {ObjectUtils} from "../objectUtils";
55
import {UrlUtils} from "../urlUtils";
66

7-
export class YoutubeVideoExtractor implements VideoExtractor {
7+
export class YoutubeVideoExtractor extends VideoExtractor {
88
private youTubeWatchVideoBaseUrl = "https://www.youtube.com/watch";
99
private youTubeVideoIdQueryKey = "v";
1010
private dataOriginalSrcAttribute = "data-original-src";
1111

12-
/**
13-
* Return the ID of the video in the YouTube URL as an array
14-
*/
15-
public getVideoIds(youTubeUrl: string, pageContent: string): string[] {
16-
if (ObjectUtils.isNullOrUndefined(youTubeUrl)) {
17-
return;
12+
public createEmbeddedVideosFromHtml(html: string): HTMLIFrameElement[] {
13+
if (!html) {
14+
return [];
1815
}
1916

20-
let youTubeId: string;
21-
if (UrlUtils.getPathname(youTubeUrl).indexOf("/watch") === 0) {
22-
youTubeId = UrlUtils.getQueryValue(youTubeUrl, this.youTubeVideoIdQueryKey);
23-
if (ObjectUtils.isNullOrUndefined(youTubeId)) {
24-
return;
25-
}
26-
}
17+
let divContainer = document.createElement("div") as HTMLDivElement;
18+
divContainer.innerHTML = html;
19+
let allIframes = divContainer.getElementsByTagName("iframe") as HTMLCollectionOf<HTMLIFrameElement>;
2720

28-
if (UrlUtils.getPathname(youTubeUrl).indexOf("/embed") === 0) {
29-
let youTubeIdMatch = youTubeUrl.match(/youtube\.com\/embed\/(\S+)/);
30-
if (ObjectUtils.isNullOrUndefined(youTubeIdMatch) || ObjectUtils.isNullOrUndefined(youTubeIdMatch[1])) {
31-
return;
21+
let videoEmbeds: HTMLIFrameElement[] = [];
22+
for (let i = 0; i < allIframes.length; i++) {
23+
if (this.isYoutubeUrl(allIframes[i].src)) {
24+
let videoEmbed = this.createEmbeddedVideoFromUrl(allIframes[i].src);
25+
if (videoEmbed) {
26+
videoEmbeds.push(videoEmbed);
27+
}
3228
}
33-
youTubeId = youTubeIdMatch[1];
34-
}
35-
36-
if (ObjectUtils.isNullOrUndefined(youTubeId)) {
37-
return;
3829
}
30+
return videoEmbeds;
31+
}
3932

40-
// Ensure we remove query parameters
41-
return [youTubeId.split("?")[0]];
33+
private isYoutubeUrl(url: string): boolean {
34+
return /[^\w]youtube\.com\/watch(\?v=(\w+)|.*\&v=(\w+))/.test(url) || /[^\w]youtube\.com\/embed\/(\w+)/.test(url);
4235
}
4336

44-
/**
45-
* Return valid iframe src attribute value for the supported YouTube domain
46-
*/
47-
public getVideoSrcValues(pageUrl: string, pageContent: string): string[] {
48-
if (ObjectUtils.isNullOrUndefined(pageUrl)) {
49-
return;
37+
public createEmbeddedVideoFromUrl(url: string): HTMLIFrameElement {
38+
if (!url) {
39+
return undefined;
40+
}
41+
42+
if (UrlUtils.getPathname(url).indexOf("/watch") === 0) {
43+
return this.createEmbeddedVideoFromId(UrlUtils.getQueryValue(url, this.youTubeVideoIdQueryKey));
5044
}
5145

52-
let youTubeVideoId = this.getVideoIds(pageUrl, pageContent);
53-
if (ObjectUtils.isNullOrUndefined(youTubeVideoId)) {
54-
return;
46+
if (UrlUtils.getPathname(url).indexOf("/embed") === 0) {
47+
let youTubeIdMatch = url.match(/youtube\.com\/embed\/(\S+)/);
48+
return this.createEmbeddedVideoFromId(youTubeIdMatch[1]);
5549
}
5650

57-
return ["https://www.youtube.com/embed/" + youTubeVideoId];
51+
return undefined;
5852
}
5953

60-
/**
61-
* Create iframe in correct format for YouTube video embed in OneNote.
62-
* Supports a single video.
63-
*/
64-
public createEmbeddedVideos(pageUrl: string, pageContent: string): HTMLIFrameElement[] {
65-
let iframe = DomUtils.createEmbedVideoIframe();
66-
let srcValue = this.getVideoSrcValues(pageUrl, pageContent);
67-
let videoId = this.getVideoIds(pageUrl, pageContent)[0];
68-
if (ObjectUtils.isNullOrUndefined(srcValue) || ObjectUtils.isNullOrUndefined(videoId)) {
69-
// fast fail: we expect all page urls passed into this function in prod to contain a video id
70-
throw new Error("YouTube page url does not contain video id");
54+
public createEmbeddedVideoFromId(id: string): HTMLIFrameElement {
55+
if (!id) {
56+
return undefined;
7157
}
72-
iframe.src = srcValue[0];
73-
iframe.setAttribute(this.dataOriginalSrcAttribute, UrlUtils.addUrlQueryValue(this.youTubeWatchVideoBaseUrl, this.youTubeVideoIdQueryKey, videoId));
7458

75-
return [iframe];
59+
let videoEmbed = DomUtils.createEmbedVideoIframe();
60+
let src = "https://www.youtube.com/embed/" + id;
61+
videoEmbed.src = src;
62+
let dataOriginalSrc = UrlUtils.addUrlQueryValue(this.youTubeWatchVideoBaseUrl, this.youTubeVideoIdQueryKey, id);
63+
videoEmbed.setAttribute(this.dataOriginalSrcAttribute, dataOriginalSrc);
64+
return videoEmbed;
7665
}
7766
}

0 commit comments

Comments
 (0)