Editor: word count: exclude more characters

Also only exclude these characters for the `words` type. They should be counted for other types.
Add the ASCIIOnly option to the uglify config to preserve escaped unicode characters.

See #30966. Fixes #27391.



git-svn-id: https://develop.svn.wordpress.org/trunk@33292 602fd350-edb4-49c9-b593-d223f7449a82
This commit is contained in:
Ella Iseulde Van Dorpe 2015-07-16 09:44:43 +00:00
parent 4b975d8278
commit 129a3affe5
3 changed files with 53 additions and 4 deletions

View File

@ -399,6 +399,9 @@ module.exports = function(grunt) {
}
},
uglify: {
options: {
ASCIIOnly: true
},
core: {
expand: true,
cwd: SOURCE_DIR,

View File

@ -14,7 +14,42 @@
WordCounter.prototype.settings = {
HTMLRegExp: /<\/?[a-z][^>]*?>/gi,
spaceRegExp: /&nbsp;|&#160;/gi,
removeRegExp: /[0-9.(),;:!?%#$¿'"_+=\\\/-]+/g,
connectorRegExp: /--|\u2014/gi,
removeRegExp: new RegExp( [
'[',
// Basic Latin (extract)
'\u0021-\u0040\u005B-\u0060\u007B-\u007E',
// Latin-1 Supplement (extract)
'\u0080-\u00BF\u00D7\u00F7',
// General Punctuation
// Superscripts and Subscripts
// Currency Symbols
// Combining Diacritical Marks for Symbols
// Letterlike Symbols
// Number Forms
// Arrows
// Mathematical Operators
// Miscellaneous Technical
// Control Pictures
// Optical Character Recognition
// Enclosed Alphanumerics
// Box Drawing
// Block Elements
// Geometric Shapes
// Miscellaneous Symbols
// Dingbats
// Miscellaneous Mathematical Symbols-A
// Supplemental Arrows-A
// Braille Patterns
// Supplemental Arrows-B
// Miscellaneous Mathematical Symbols-B
// Supplemental Mathematical Operators
// Miscellaneous Symbols and Arrows
'\u2000-\u2BFF',
// Supplemental Punctuation
'\u2E00-\u2E7F',
']'
].join( '' ), 'g' ),
wordsRegExp: /\S\s+/g,
charactersRegExp: /\S/g,
allRegExp: /[^\f\n\r\t\v\u00ad\u2028\u2029]/g,
@ -31,7 +66,11 @@
text = text.replace( this.settings.HTMLRegExp, '\n' );
text = text.replace( this.settings.spaceRegExp, ' ' );
text = text.replace( this.settings.removeRegExp, '' );
if ( type === 'words' ) {
text = text.replace( this.settings.connectorRegExp, ' ' );
text = text.replace( this.settings.removeRegExp, '' );
}
text = text.match( this.settings[ type + 'RegExp' ] );

View File

@ -33,9 +33,16 @@
},
{
message: 'Punctuation.',
string: 'It\'s two three... 4?',
string: 'It\'s two three \u2026 4?',
words: 3,
characters: 11,
characters: 15,
all: 19
},
{
message: 'Em dash.',
string: 'one\u2014two--three',
words: 3,
characters: 14,
all: 14
}
], function( test ) {