diff --git a/test/tests.js b/test/tests.js index 175dbc6..132fa62 100644 --- a/test/tests.js +++ b/test/tests.js @@ -306,4 +306,11 @@ test("twttr.txt.extractUrls", function() { var message_with_www_hyphenated_url = "Message with www.123-hyphenated-url.com"; equal(twttr.txt.extractUrls(message_with_hyphenated_url)[0], "hyphenated-url.com", "Should extract full url with hyphen."); equal(twttr.txt.extractUrls(message_with_www_hyphenated_url)[0], "www.123-hyphenated-url.com", "Should extract full url with hyphen."); + + var message_with_balanced_parens_query = "Message with http://balancedparensquery.com?q=@s=(1,2)"; + var message_with_balanced_parens_query_in_parens = "Message with (http://balancedparensquery.com?q=@s=(1,2))"; + + equal(twttr.txt.extractUrls(message_with_balanced_parens_query)[0], "http://balancedparensquery.com?q=@s=(1,2)", "Should extract balanced parens at end of query."); + equal(twttr.txt.extractUrls(message_with_balanced_parens_query_in_parens)[0], "http://balancedparensquery.com?q=@s=(1,2)", "Should NOT extract extra paren after query."); + }); diff --git a/twitter-text.js b/twitter-text.js index edfd99e..d645083 100644 --- a/twitter-text.js +++ b/twitter-text.js @@ -294,8 +294,29 @@ ')|(?:@#{validGeneralUrlPathChars}+\/)'+ ')', 'i'); - twttr.txt.regexen.validUrlQueryChars = /[a-z0-9!?\*'@\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i; - twttr.txt.regexen.validUrlQueryEndingChars = /[a-z0-9_&=#\/]/i; + twttr.txt.regexen.validUrlQueryChars = /[a-z0-9!?\*'@;:&=\+\$\/%#\[\]\-_\.,~|]/i; + twttr.txt.regexen.validUrlQueryBalancedParens = regexSupplant( + '\\(' + + '(?:' + + '#{validUrlQueryChars}+' + + '|' + + // allow one nested level of balanced parentheses + '(?:' + + '#{validUrlQueryChars}*' + + '\\(' + + '#{validUrlQueryChars}+' + + '\\)' + + '#{validUrlQueryChars}*' + + ')' + + ')' + + '\\)' + , 'i'); + twttr.txt.regexen.validUrlQueryEndingChars = regexSupplant(/[a-z0-9_&=#\/]|(?:#{validUrlQueryBalancedParens})/i); + twttr.txt.regexen.validUrlQuery = regexSupplant('(?:' + + '#{validUrlQueryChars}*' + + '(?:#{validUrlQueryBalancedParens}#{validUrlQueryChars}*)*' + + '(?:#{validUrlQueryEndingChars})'+ + ')', 'i'); twttr.txt.regexen.extractUrl = regexSupplant( '(' + // $1 total match '(#{validUrlPrecedingChars})' + // $2 Preceeding chracter @@ -304,7 +325,7 @@ '(#{validDomain})' + // $5 Domain(s) '(?::(#{validPortNumber}))?' + // $6 Port number (optional) '(\\/#{validUrlPath}*)?' + // $7 URL Path - '(\\?#{validUrlQueryChars}*#{validUrlQueryEndingChars})?' + // $8 Query String + '(\\?#{validUrlQuery})?' + // $8 Query String ')' + ')' , 'gi');