function slugify(value) {The following runs some test cases through it. They are designed for a different function, so most fail, but it shows how it works.
// Compatibly-decompose and remove combining characters.
value = value.normalize('NFKD').replace(/[\u0300-\u036F]/g, '');
// Remove all non-word characters, leaving spaces and dashes. Trim and convert to lower case.
value = value.replace(/[^\w\s\-]+/g, '').trim().toLowerCase();
// Replace groups of spaces and dashes with a single dash.
return value.replace(/[-\s]+/g, '-');
}
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function() { | |
function slugify(value) { | |
// Compatibly-decompose and remove combining characters. | |
value = value.normalize('NFKD').replace(/[\u0300-\u036F]/g, ''); | |
// Remove all non-word characters, leaving spaces and dashes. Trim and convert to lower case. | |
value = value.replace(/[^\w\s\-]+/g, '').trim().toLowerCase(); | |
// Replace groups of spaces and dashes with a single dash. | |
return value.replace(/[-\s]+/g, '-'); | |
} | |
// Most of these don't pass. It's debatable as to how some should be handled. Running this gives a good idea of how lots of different cases are handled. | |
var testCases = [ | |
[' a b ', 'a-b'], | |
['Hello', 'hello'], | |
['Hello World', 'hello-world'], | |
['Привет мир', 'privet-mir'], | |
['Привіт світ', 'privit-svit'], | |
['Hello: World', 'hello-world'], | |
['H+e#l1l--o/W§o r.l:d)', 'h-e-l1l-o-w-o-r-l-d'], | |
[': World', 'world'], | |
['Hello World!', 'hello-world'], | |
['Ä ä Ö ö Ü ü ß', 'ae-ae-oe-oe-ue-ue-ss'], | |
['Á À á à É È é è Ó Ò ó ò Ñ ñ Ú Ù ú ù', 'a-a-a-a-e-e-e-e-o-o-o-o-n-n-u-u-u-u'], | |
['Â â Ê ê Ô ô Û û', 'a-a-e-e-o-o-u-u'], | |
['Â â Ê ê Ô ô Û 1', 'a-a-e-e-o-o-u-1'], | |
['°¹²³⁴⁵⁶⁷⁸⁹@₀₁₂₃₄₅₆₇₈₉', '0123456789at0123456789'], | |
['Mórë thån wørds', 'more-thaan-woerds'], | |
['Блоґ їжачка', 'blog-jizhachka'], | |
['фильм', 'film'], | |
['драма', 'drama'], | |
['Ύπαρξη Αυτής η Σκουληκομυρμηγκότρυπα', 'iparxi-autis-i-skoulikomirmigkotripa'], | |
['Français Œuf où à', 'francais-oeuf-ou-a'], | |
['هذه هي اللغة العربية', 'hthh-hy-llgh-laarby'], | |
['مرحبا العالم', 'mrhb-laa-lm'], | |
['Één jaar', 'een-jaar'], | |
['tiếng việt rất khó', 'tieng-viet-rat-kho'], | |
['Nguyễn Đăng Khoa', 'nguyen-dang-khoa'], | |
['နှစ်သစ်ကူးတွင် သတ္တဝါတွေ စိတ်ချမ်းသာ ကိုယ်ကျန်းမာ၍ ကောင်းခြင်း အနန္တနှင့် ပြည့်စုံကြပါစေ', 'nhitthitkutwin-thttwatwe-seikkhyaantha-koekyaanmaywae-kaungkhyin-anntnhin-pyisonkypase'], | |
['Zażółć żółcią gęślą jaźń', 'zazolc-zolcia-gesla-jazn'], | |
['Mężny bądź chroń pułk twój i sześć flag', 'mezny-badz-chron-pulk-twoj-i-szesc-flag'], | |
['ერთი ორი სამი ოთხი ხუთი', 'erti-ori-sami-otkhi-khuti'], | |
['अ ऒ न द', 'a-oii-na-tha'], | |
['Æ Ø Å æ ø å', 'ae-oe-aa-ae-oe-aa'], | |
['Übergrößenträger'.repeat(1000), 'uebergroessentraeger'.repeat(1000)], | |
['my |
|
['hi |
|
['Č Ć Ž Š Đ č ć ž š đ', 'c-c-z-s-d-c-c-z-s-d'], | |
['Ą Č Ę Ė Į Š Ų Ū Ž ą č ę ė į š ų ū ž', 'a-c-e-e-i-s-u-u-z-a-c-e-e-i-s-u-u-z'], | |
]; | |
console.table(testCases.map(function(s) { | |
var output = slugify(s[0]); | |
return {'Test String': s[0], 'output': output, 'expected': s[1], 'pass?': output == s[1] ? 'Yes' : 'NO!'} | |
})); | |
})(); |