From 9f9072d8e0eaa147b0498cb5c916e079ade9e3a4 Mon Sep 17 00:00:00 2001 From: Tom Ryder Date: Tue, 21 Mar 2023 18:57:58 +1300 Subject: Match [:space:] and all Unicode spaces --- autoload/strip_trailing_whitespace.vim | 49 ++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/autoload/strip_trailing_whitespace.vim b/autoload/strip_trailing_whitespace.vim index f9e9a1e..17dec7a 100644 --- a/autoload/strip_trailing_whitespace.vim +++ b/autoload/strip_trailing_whitespace.vim @@ -1,3 +1,48 @@ +" Set the pattern for trailing horizontal whitespace to match and remove. The +" `[:space:]` character class suffices for almost everything in practice, but +" at the time of writing it still only includes ASCII characters. I'm writing +" this because I had a document with lines with a trailing NO-BREAK SPACE +" (U+00A0) which `[:space:]` doesn't catch, so we'll round out the collection +" by adding all the Unicode space characters, since that's easy to do. +" +" +" archived: +" +" * U+0020: SPACE +" * U+00A0: NO-BREAK SPACE +" * U+1680: OGHAM SPACE MARK +" * U+180E: MONGOLIAN VOWEL SEPARATOR +" * U+2000: EN QUAD +" * U+2001: EM QUAD +" * U+2002: EN SPACE +" * U+2003: EM SPACE +" * U+2004: THREE-PER-EM SPACE +" * U+2005: FOUR-PER-EM SPACE +" * U+2006: SIX-PER-EM SPACE +" * U+2007: FIGURE SPACE +" * U+2008: PUNCTUATION SPACE +" * U+2009: THIN SPACE +" * U+200A: HAIR SPACE +" * U+200B: ZERO WIDTH SPACE +" * U+202F: NARROW NO-BREAK SPACE +" * U+205F: MEDIUM MATHEMATICAL SPACE +" * U+3000: IDEOGRAPHIC SPACE +" * U+FEFF: ZERO WIDTH NO-BREAK SPACE +" +let s:pattern + \ = '[' + \ . '[:space:]' + \ . '\u0020' + \ . '\u00A0' + \ . '\u1680' + \ . '\u180E' + \ . '\u2000-\u200B' + \ . '\u202F' + \ . '\u205F' + \ . '\u3000' + \ . '\uFEFF' + \ . ']\+$' + " Wrapper function to strip both horizontal and vertical trailing whitespace, " return the cursor to its previous position, and report changes function! strip_trailing_whitespace#(start, end) abort @@ -46,8 +91,8 @@ function! s:StripHorizontal(start, end) abort " If the line has trailing whitespace, strip it off and bump the count let line = getline(num) - if line =~# '\s\+$' - call setline(num, substitute(line, '\s*$', '', '')) + if line =~# s:pattern + call setline(num, substitute(line, s:pattern, '', '')) let stripped = stripped + 1 endif -- cgit v1.2.3