Skip to content

Commit d2a4e2e

Browse files
authored
Merge pull request #118 from wp-cli/issue_117
Use grapheme_substr & pcre_match in safe_substr for #117. Ascii::columns fix.
2 parents 37cf6e1 + 90bf8a5 commit d2a4e2e

File tree

10 files changed

+453
-83
lines changed

10 files changed

+453
-83
lines changed

.travis.yml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,14 @@ php:
55
- 5.4
66
- 5.5
77
- 5.6
8+
- 7.0
9+
- 7.1
810

9-
script: phpunit
11+
before_script:
12+
- php -m
13+
- php --info | grep -i 'intl\|icu\|pcre'
14+
15+
script: phpunit --debug
1016

1117
notifications:
1218
email:

lib/cli/Shell.php

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,14 @@ static public function columns() {
5050
}
5151
} else {
5252
if ( ! ( $columns = (int) getenv( 'COLUMNS' ) ) ) {
53-
if ( getenv( 'TERM' ) ) {
54-
$columns = (int) exec( '/usr/bin/env tput cols 2>/dev/null' );
53+
$size = exec( '/usr/bin/env stty size 2>/dev/null' );
54+
if ( '' !== $size && preg_match( '/[0-9]+ ([0-9]+)/', $size, $matches ) ) {
55+
$columns = (int) $matches[1];
56+
}
57+
if ( ! $columns ) {
58+
if ( getenv( 'TERM' ) ) {
59+
$columns = (int) exec( '/usr/bin/env tput cols 2>/dev/null' );
60+
}
5561
}
5662
}
5763
}

lib/cli/cli.php

Lines changed: 150 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -153,87 +153,148 @@ function menu( $items, $default = null, $title = 'Choose an item' ) {
153153
}
154154

155155
/**
156-
* Attempts an encoding-safe way of getting string length. If mb_string extensions aren't
157-
* installed, falls back to basic strlen if no encoding is present
156+
* Attempts an encoding-safe way of getting string length. If intl extension or PCRE with '\X' or mb_string extension aren't
157+
* available, falls back to basic strlen.
158158
*
159159
* @param string $str The string to check.
160160
* @param string|bool $encoding Optional. The encoding of the string. Default false.
161161
* @return int Numeric value that represents the string's length
162162
*/
163163
function safe_strlen( $str, $encoding = false ) {
164+
// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strlen(), "other" strlen().
165+
$test_safe_strlen = getenv( 'PHP_CLI_TOOLS_TEST_SAFE_STRLEN' );
166+
167+
// Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string.
168+
if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && null !== ( $length = grapheme_strlen( $str ) ) ) {
169+
if ( ! $test_safe_strlen || ( $test_safe_strlen & 1 ) ) {
170+
return $length;
171+
}
172+
}
173+
// Assume UTF-8 if no encoding given - `preg_match_all()` will return false if given non-UTF-8 string.
174+
if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() && false !== ( $length = preg_match_all( '/\X/u', $str, $dummy /*needed for PHP 5.3*/ ) ) ) {
175+
if ( ! $test_safe_strlen || ( $test_safe_strlen & 2 ) ) {
176+
return $length;
177+
}
178+
}
179+
// Legacy encodings and old PHPs will reach here.
164180
if ( function_exists( 'mb_strlen' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
165181
if ( ! $encoding ) {
166182
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
167183
}
168-
$length = mb_strlen( $str, $encoding );
169-
} else {
170-
// iconv will return PHP notice if non-ascii characters are present in input string
171-
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );
172-
173-
$length = strlen( $str );
184+
$length = mb_strlen( $str, $encoding );
185+
if ( 'UTF-8' === $encoding ) {
186+
// Subtract combining characters.
187+
$length -= preg_match_all( get_unicode_regexs( 'm' ), $str, $dummy /*needed for PHP 5.3*/ );
188+
}
189+
if ( ! $test_safe_strlen || ( $test_safe_strlen & 4 ) ) {
190+
return $length;
191+
}
174192
}
175-
176-
return $length;
193+
return strlen( $str );
177194
}
178195

179196
/**
180-
* Attempts an encoding-safe way of getting a substring. If mb_string extensions aren't
181-
* installed, falls back to ascii substring if no encoding is present
197+
* Attempts an encoding-safe way of getting a substring. If intl extension or PCRE with '\X' or mb_string extension aren't
198+
* available, falls back to substr().
182199
*
183200
* @param string $str The input string.
184201
* @param int $start The starting position of the substring.
185-
* @param int|bool|null $length Optional. Maximum length of the substring. Default false.
186-
* @param int|bool $is_width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
202+
* @param int|bool|null $length Optional, unless $is_width is set. Maximum length of the substring. Default false. Negative not supported.
203+
* @param int|bool $is_width Optional. If set and encoding is UTF-8, $length (which must be specified) is interpreted as spacing width. Default false.
187204
* @param string|bool $encoding Optional. The encoding of the string. Default false.
188-
* @return string Substring of string specified by start and length parameters
205+
* @return bool|string False if given unsupported args, otherwise substring of string specified by start and length parameters
189206
*/
190207
function safe_substr( $str, $start, $length = false, $is_width = false, $encoding = false ) {
208+
// Negative $length or $is_width and $length not specified not supported.
209+
if ( $length < 0 || ( $is_width && ( null === $length || false === $length ) ) ) {
210+
return false;
211+
}
212+
$have_safe_strlen = false;
191213
// PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do `safe_strlen()`.
192214
if ( null === $length || false === $length ) {
193215
$length = safe_strlen( $str, $encoding );
216+
$have_safe_strlen = true;
217+
}
218+
219+
// Allow for selective testings - "1" bit set tests grapheme_substr(), "2" preg_match( '/\X/' ), "4" mb_substr(), "8" substr().
220+
$test_safe_substr = getenv( 'PHP_CLI_TOOLS_TEST_SAFE_SUBSTR' );
221+
222+
// Assume UTF-8 if no encoding given - `grapheme_substr()` will return false (not null like `grapheme_strlen()`) if given non-UTF-8 string.
223+
if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && false !== ( $try = grapheme_substr( $str, $start, $length ) ) ) {
224+
if ( ! $test_safe_substr || ( $test_safe_substr & 1 ) ) {
225+
return $is_width ? _safe_substr_eaw( $try, $length ) : $try;
226+
}
227+
}
228+
// Assume UTF-8 if no encoding given - `preg_match()` will return false if given non-UTF-8 string.
229+
if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() ) {
230+
if ( $start < 0 ) {
231+
$start = max( $start + ( $have_safe_strlen ? $length : safe_strlen( $str, $encoding ) ), 0 );
232+
}
233+
if ( $start ) {
234+
if ( preg_match( '/^\X{' . $start . '}(\X{0,' . $length . '})/u', $str, $matches ) ) {
235+
if ( ! $test_safe_substr || ( $test_safe_substr & 2 ) ) {
236+
return $is_width ? _safe_substr_eaw( $matches[1], $length ) : $matches[1];
237+
}
238+
}
239+
} else {
240+
if ( preg_match( '/^\X{0,' . $length . '}/u', $str, $matches ) ) {
241+
if ( ! $test_safe_substr || ( $test_safe_substr & 2 ) ) {
242+
return $is_width ? _safe_substr_eaw( $matches[0], $length ) : $matches[0];
243+
}
244+
}
245+
}
194246
}
247+
// Legacy encodings and old PHPs will reach here.
195248
if ( function_exists( 'mb_substr' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
196249
if ( ! $encoding ) {
197250
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
198251
}
199-
$substr = mb_substr( $str, $start, $length, $encoding );
200-
201-
if ( $is_width && 'UTF-8' === $encoding ) {
202-
// Set the East Asian Width regex.
203-
$eaw_regex = get_unicode_regexs( 'eaw' );
204-
// If there's any East Asian double-width chars...
205-
if ( preg_match( $eaw_regex, $substr ) ) {
206-
// Note that if the length ends in the middle of a double-width char, the char is excluded, not included.
207-
208-
// See if it's all EAW - the most likely case.
209-
if ( preg_match_all( $eaw_regex, $substr, $dummy /*needed for PHP 5.3*/ ) === $length ) {
210-
// Just halve the length so (rounded down to a minimum of 1).
211-
$substr = mb_substr( $substr, 0, max( (int) ( $length / 2 ), 1 ), $encoding );
212-
} else {
213-
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
214-
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $substr, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
215-
$cnt = min( count( $chars ), $length );
216-
$width = $length;
217-
218-
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
219-
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
220-
}
221-
// Round down to a minimum of 1.
222-
if ( $width < 0 && $length > 1 ) {
223-
$length--;
224-
}
225-
return join( '', array_slice( $chars, 0, $length ) );
226-
}
227-
}
252+
// Bug: not adjusting for combining chars.
253+
$try = mb_substr( $str, $start, $length, $encoding );
254+
if ( 'UTF-8' === $encoding && $is_width ) {
255+
$try = _safe_substr_eaw( $try, $length );
256+
}
257+
if ( ! $test_safe_substr || ( $test_safe_substr & 4 ) ) {
258+
return $try;
228259
}
229-
} else {
230-
// iconv will return PHP notice if non-ascii characters are present in input string
231-
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );
232-
233-
$substr = substr( $str, $start, $length );
234260
}
261+
return substr( $str, $start, $length );
262+
}
263+
264+
/**
265+
* Internal function used by `safe_substr()` to adjust for East Asian double-width chars.
266+
*
267+
* @return string
268+
*/
269+
function _safe_substr_eaw( $str, $length ) {
270+
// Set the East Asian Width regex.
271+
$eaw_regex = get_unicode_regexs( 'eaw' );
235272

236-
return $substr;
273+
// If there's any East Asian double-width chars...
274+
if ( preg_match( $eaw_regex, $str ) ) {
275+
// Note that if the length ends in the middle of a double-width char, the char is excluded, not included.
276+
277+
// See if it's all EAW.
278+
if ( preg_match_all( $eaw_regex, $str, $dummy /*needed for PHP 5.3*/ ) === $length ) {
279+
// Just halve the length so (rounded down to a minimum of 1).
280+
$str = mb_substr( $str, 0, max( (int) ( $length / 2 ), 1 ), 'UTF-8' );
281+
} else {
282+
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
283+
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $str, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
284+
$cnt = min( count( $chars ), $length );
285+
$width = $length;
286+
287+
for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
288+
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
289+
}
290+
// Round down to a minimum of 1.
291+
if ( $width < 0 && $length > 1 ) {
292+
$length--;
293+
}
294+
return join( '', array_slice( $chars, 0, $length ) );
295+
}
296+
}
297+
return $str;
237298
}
238299

239300
/**
@@ -266,18 +327,19 @@ function strwidth( $string, $encoding = false ) {
266327
// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen().
267328
$test_strwidth = getenv( 'PHP_CLI_TOOLS_TEST_STRWIDTH' );
268329

269-
// Assume UTF-8 - `grapheme_strlen()` will return null if given non-UTF-8 string.
270-
if ( function_exists( 'grapheme_strlen' ) && null !== ( $width = grapheme_strlen( $string ) ) ) {
330+
// Assume UTF-8 if no encoding given - `grapheme_strlen()` will return null if given non-UTF-8 string.
331+
if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_icu() && null !== ( $width = grapheme_strlen( $string ) ) ) {
271332
if ( ! $test_strwidth || ( $test_strwidth & 1 ) ) {
272333
return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
273334
}
274335
}
275-
// Assume UTF-8 - `preg_match_all()` will return false if given non-UTF-8 string (or if PCRE UTF-8 mode is unavailable).
276-
if ( false !== ( $width = preg_match_all( '/\X/u', $string, $dummy /*needed for PHP 5.3*/ ) ) ) {
336+
// Assume UTF-8 if no encoding given - `preg_match_all()` will return false if given non-UTF-8 string.
337+
if ( ( ! $encoding || 'UTF-8' === $encoding ) && can_use_pcre_x() && false !== ( $width = preg_match_all( '/\X/u', $string, $dummy /*needed for PHP 5.3*/ ) ) ) {
277338
if ( ! $test_strwidth || ( $test_strwidth & 2 ) ) {
278339
return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
279340
}
280341
}
342+
// Legacy encodings and old PHPs will reach here.
281343
if ( function_exists( 'mb_strwidth' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
282344
if ( ! $encoding ) {
283345
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
@@ -294,6 +356,40 @@ function strwidth( $string, $encoding = false ) {
294356
return safe_strlen( $string, $encoding );
295357
}
296358

359+
/**
360+
* Returns whether ICU is modern enough not to flake out.
361+
*
362+
* @return bool
363+
*/
364+
function can_use_icu() {
365+
static $can_use_icu = null;
366+
367+
if ( null === $can_use_icu ) {
368+
// Choosing ICU 54, Unicode 7.0.
369+
$can_use_icu = defined( 'INTL_ICU_VERSION' ) && version_compare( INTL_ICU_VERSION, '54.1', '>=' ) && function_exists( 'grapheme_strlen' ) && function_exists( 'grapheme_substr' );
370+
}
371+
372+
return $can_use_icu;
373+
}
374+
375+
/**
376+
* Returns whether PCRE Unicode extended grapheme cluster '\X' is available for use.
377+
*
378+
* @return bool
379+
*/
380+
function can_use_pcre_x() {
381+
static $can_use_pcre_x = null;
382+
383+
if ( null === $can_use_pcre_x ) {
384+
// '\X' introduced (as Unicde extended grapheme cluster) in PCRE 8.32 - see https://vcs.pcre.org/pcre/code/tags/pcre-8.32/ChangeLog?view=markup line 53.
385+
// Older versions of PCRE were bundled with PHP <= 5.3.23 & <= 5.4.13.
386+
$pcre_version = substr( PCRE_VERSION, 0, strspn( PCRE_VERSION, '0123456789.' ) ); // Remove any trailing date stuff.
387+
$can_use_pcre_x = version_compare( $pcre_version, '8.32', '>=' ) && false !== @preg_match( '/\X/u', '' );
388+
}
389+
390+
return $can_use_pcre_x;
391+
}
392+
297393
/**
298394
* Get the regexs generated from Unicode data.
299395
*

lib/cli/table/Ascii.php

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ public function setWidths(array $widths) {
4040
$this->_constraintWidth = (int) Shell::columns();
4141
}
4242
$col_count = count( $widths );
43-
$col_borders_count = $col_count * strlen( $this->_characters['border'] );
44-
$table_borders_count = strlen( $this->_characters['border'] ) * 1;
43+
$col_borders_count = $col_count ? ( ( $col_count - 1 ) * strlen( $this->_characters['border'] ) ) : 0;
44+
$table_borders_count = strlen( $this->_characters['border'] ) * 2;
4545
$col_padding_count = $col_count * strlen( $this->_characters['padding'] ) * 2;
4646
$max_width = $this->_constraintWidth - $col_borders_count - $table_borders_count - $col_padding_count;
4747

@@ -63,11 +63,11 @@ public function setWidths(array $widths) {
6363
foreach( $widths as &$width ) {
6464
if ( in_array( $width, $resize_widths ) ) {
6565
$width = $avg + $avg_extra_width;
66-
$extra_width = $extra_width - $avg_extra_width;
6766
array_shift( $resize_widths );
6867
// Last item gets the cake
6968
if ( empty( $resize_widths ) ) {
70-
$width = $width + $extra_width;
69+
$width = 0; // Zero it so not in sum.
70+
$width = $max_width - array_sum( $widths );
7171
}
7272
}
7373
}
@@ -131,7 +131,7 @@ public function row( array $row ) {
131131

132132
foreach( $row as $col => $value ) {
133133

134-
$value = str_replace( PHP_EOL, ' ', $value );
134+
$value = str_replace( array( "\r\n", "\n" ), ' ', $value );
135135

136136
$col_width = $this->_widths[ $col ];
137137
$encoding = function_exists( 'mb_detect_encoding' ) ? mb_detect_encoding( $value, null, true /*strict*/ ) : false;

tests/bootstrap.php

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
require dirname( dirname( __FILE__ ) ) . '/lib/cli/cli.php';
44

5+
/**
6+
* Compatibility with PHPUnit 6+
7+
*/
8+
if ( class_exists( 'PHPUnit\Runner\Version' ) ) {
9+
require_once dirname( __FILE__ ) . '/phpunit6-compat.php';
10+
}
11+
512
function cli_autoload( $className ) {
613
$className = ltrim($className, '\\');
714
$fileName = '';
@@ -20,4 +27,4 @@ function cli_autoload( $className ) {
2027
require dirname( dirname( __FILE__ ) ) . '/lib/' . $fileName;
2128
}
2229

23-
spl_autoload_register( 'cli_autoload' );
30+
spl_autoload_register( 'cli_autoload' );

tests/phpunit6-compat.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?php
2+
// From core "tests/phpunit/includes/phpunit6-compat.php" without `getTickets()` (see https://core.trac.wordpress.org/ticket/39822).
3+
4+
if ( class_exists( 'PHPUnit\Runner\Version' ) && version_compare( PHPUnit\Runner\Version::id(), '6.0', '>=' ) ) {
5+
6+
class_alias( 'PHPUnit\Framework\TestCase', 'PHPUnit_Framework_TestCase' );
7+
class_alias( 'PHPUnit\Framework\Exception', 'PHPUnit_Framework_Exception' );
8+
class_alias( 'PHPUnit\Framework\ExpectationFailedException', 'PHPUnit_Framework_ExpectationFailedException' );
9+
class_alias( 'PHPUnit\Framework\Error\Notice', 'PHPUnit_Framework_Error_Notice' );
10+
class_alias( 'PHPUnit\Framework\Error\Warning', 'PHPUnit_Framework_Error_Warning' );
11+
class_alias( 'PHPUnit\Framework\Test', 'PHPUnit_Framework_Test' );
12+
class_alias( 'PHPUnit\Framework\Warning', 'PHPUnit_Framework_Warning' );
13+
class_alias( 'PHPUnit\Framework\AssertionFailedError', 'PHPUnit_Framework_AssertionFailedError' );
14+
class_alias( 'PHPUnit\Framework\TestSuite', 'PHPUnit_Framework_TestSuite' );
15+
class_alias( 'PHPUnit\Framework\TestListener', 'PHPUnit_Framework_TestListener' );
16+
class_alias( 'PHPUnit\Util\GlobalState', 'PHPUnit_Util_GlobalState' );
17+
class_alias( 'PHPUnit\Util\Getopt', 'PHPUnit_Util_Getopt' );
18+
19+
}

0 commit comments

Comments
 (0)