diff --git a/Gruntfile.js b/Gruntfile.js index f2a0c0cd0a..ee2fe596c4 100644 --- a/Gruntfile.js +++ b/Gruntfile.js @@ -399,6 +399,9 @@ module.exports = function(grunt) { } }, uglify: { + options: { + ASCIIOnly: true + }, core: { expand: true, cwd: SOURCE_DIR, diff --git a/src/wp-admin/js/word-count.js b/src/wp-admin/js/word-count.js index 40c2934b9e..0740bb6ef1 100644 --- a/src/wp-admin/js/word-count.js +++ b/src/wp-admin/js/word-count.js @@ -14,7 +14,42 @@ WordCounter.prototype.settings = { HTMLRegExp: /<\/?[a-z][^>]*?>/gi, spaceRegExp: / | /gi, - removeRegExp: /[0-9.(),;:!?%#$¿'"_+=\\\/-]+/g, + connectorRegExp: /--|\u2014/gi, + removeRegExp: new RegExp( [ + '[', + // Basic Latin (extract) + '\u0021-\u0040\u005B-\u0060\u007B-\u007E', + // Latin-1 Supplement (extract) + '\u0080-\u00BF\u00D7\u00F7', + // General Punctuation + // Superscripts and Subscripts + // Currency Symbols + // Combining Diacritical Marks for Symbols + // Letterlike Symbols + // Number Forms + // Arrows + // Mathematical Operators + // Miscellaneous Technical + // Control Pictures + // Optical Character Recognition + // Enclosed Alphanumerics + // Box Drawing + // Block Elements + // Geometric Shapes + // Miscellaneous Symbols + // Dingbats + // Miscellaneous Mathematical Symbols-A + // Supplemental Arrows-A + // Braille Patterns + // Supplemental Arrows-B + // Miscellaneous Mathematical Symbols-B + // Supplemental Mathematical Operators + // Miscellaneous Symbols and Arrows + '\u2000-\u2BFF', + // Supplemental Punctuation + '\u2E00-\u2E7F', + ']' + ].join( '' ), 'g' ), wordsRegExp: /\S\s+/g, charactersRegExp: /\S/g, allRegExp: /[^\f\n\r\t\v\u00ad\u2028\u2029]/g, @@ -31,7 +66,11 @@ text = text.replace( this.settings.HTMLRegExp, '\n' ); text = text.replace( this.settings.spaceRegExp, ' ' ); - text = text.replace( this.settings.removeRegExp, '' ); + + if ( type === 'words' ) { + text = text.replace( this.settings.connectorRegExp, ' ' ); + text = text.replace( this.settings.removeRegExp, '' ); + } text = text.match( this.settings[ type + 'RegExp' ] ); diff --git a/tests/qunit/wp-admin/js/word-count.js b/tests/qunit/wp-admin/js/word-count.js index 5d3664f583..c3c147d913 100644 --- a/tests/qunit/wp-admin/js/word-count.js +++ b/tests/qunit/wp-admin/js/word-count.js @@ -33,9 +33,16 @@ }, { message: 'Punctuation.', - string: 'It\'s two three... 4?', + string: 'It\'s two three \u2026 4?', words: 3, - characters: 11, + characters: 15, + all: 19 + }, + { + message: 'Em dash.', + string: 'one\u2014two--three', + words: 3, + characters: 14, all: 14 } ], function( test ) {