Category Archives: Node.js

JavaScript ECMAScript 6 slugify

I wanted to slugify a URL using Javascript.  There are lots of implementations out there, but they aren’t very nice.  I have taken inspiration from the Django implementation to create the following.  It uses String.prototype.normalize(), which is ES6, so it won’t work where you don’t have ES6.  I’m using Node.js, so I’m happy with this, and it’s very neat.  
function slugify(value) {
// Compatibly-decompose and remove combining characters.
value = value.normalize('NFKD').replace(/[\u0300-\u036F]/g, '');
// Remove all non-word characters, leaving spaces and dashes. Trim and convert to lower case.
value = value.replace(/[^\w\s\-]+/g, '').trim().toLowerCase();
// Replace groups of spaces and dashes with a single dash.
return value.replace(/[-\s]+/g, '-');
}
The following runs some test cases through it.  They are designed for a different function, so most fail, but it shows how it works.  
(function() {
function slugify(value) {
// Compatibly-decompose and remove combining characters.
value = value.normalize('NFKD').replace(/[\u0300-\u036F]/g, '');
// Remove all non-word characters, leaving spaces and dashes. Trim and convert to lower case.
value = value.replace(/[^\w\s\-]+/g, '').trim().toLowerCase();
// Replace groups of spaces and dashes with a single dash.
return value.replace(/[-\s]+/g, '-');
}
// Most of these don't pass. It's debatable as to how some should be handled. Running this gives a good idea of how lots of different cases are handled.
var testCases = [
[' a b ', 'a-b'],
['Hello', 'hello'],
['Hello World', 'hello-world'],
['Привет мир', 'privet-mir'],
['Привіт світ', 'privit-svit'],
['Hello: World', 'hello-world'],
['H+e#l1l--o/W§o r.l:d)', 'h-e-l1l-o-w-o-r-l-d'],
[': World', 'world'],
['Hello World!', 'hello-world'],
['Ä ä Ö ö Ü ü ß', 'ae-ae-oe-oe-ue-ue-ss'],
['Á À á à É È é è Ó Ò ó ò Ñ ñ Ú Ù ú ù', 'a-a-a-a-e-e-e-e-o-o-o-o-n-n-u-u-u-u'],
['Â â Ê ê Ô ô Û û', 'a-a-e-e-o-o-u-u'],
['Â â Ê ê Ô ô Û 1', 'a-a-e-e-o-o-u-1'],
['°¹²³⁴⁵⁶⁷⁸⁹@₀₁₂₃₄₅₆₇₈₉', '0123456789at0123456789'],
['Mórë thån wørds', 'more-thaan-woerds'],
['Блоґ їжачка', 'blog-jizhachka'],
['фильм', 'film'],
['драма', 'drama'],
['Ύπαρξη Αυτής η Σκουληκομυρμηγκότρυπα', 'iparxi-autis-i-skoulikomirmigkotripa'],
['Français Œuf où à', 'francais-oeuf-ou-a'],
['هذه هي اللغة العربية', 'hthh-hy-llgh-laarby'],
['مرحبا العالم', 'mrhb-laa-lm'],
['Één jaar', 'een-jaar'],
['tiếng việt rất khó', 'tieng-viet-rat-kho'],
['Nguyễn Đăng Khoa', 'nguyen-dang-khoa'],
['နှစ်သစ်ကူးတွင် သတ္တဝါတွေ စိတ်ချမ်းသာ ကိုယ်ကျန်းမာ၍ ကောင်းခြင်း အနန္တနှင့် ပြည့်စုံကြပါစေ', 'nhitthitkutwin-thttwatwe-seikkhyaantha-koekyaanmaywae-kaungkhyin-anntnhin-pyisonkypase'],
['Zażółć żółcią gęślą jaźń', 'zazolc-zolcia-gesla-jazn'],
['Mężny bądź chroń pułk twój i sześć flag', 'mezny-badz-chron-pulk-twoj-i-szesc-flag'],
['ერთი ორი სამი ოთხი ხუთი', 'erti-ori-sami-otkhi-khuti'],
['अ ऒ न द', 'a-oii-na-tha'],
['Æ Ø Å æ ø å', 'ae-oe-aa-ae-oe-aa'],
['Übergrößenträger'.repeat(1000), 'uebergroessentraeger'.repeat(1000)],
['my🎉'.repeat(5000), 'my-'.repeat(5000)],
['hi🇦🇹'.repeat(5000), 'hi-'.repeat(5000)],
['Č Ć Ž Š Đ č ć ž š đ', 'c-c-z-s-d-c-c-z-s-d'],
['Ą Č Ę Ė Į Š Ų Ū Ž ą č ę ė į š ų ū ž', 'a-c-e-e-i-s-u-u-z-a-c-e-e-i-s-u-u-z'],
];
console.table(testCases.map(function(s) {
var output = slugify(s[0]);
return {'Test String': s[0], 'output': output, 'expected': s[1], 'pass?': output == s[1] ? 'Yes' : 'NO!'}
}));
})();