Blame view

node_modules/wcwidth/index.js 3.07 KB
ce4c83ff   wxy   初始提交
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
  "use strict"
  
  var defaults = require('defaults')
  var combining = require('./combining')
  
  var DEFAULTS = {
    nul: 0,
    control: 0
  }
  
  module.exports = function wcwidth(str) {
    return wcswidth(str, DEFAULTS)
  }
  
  module.exports.config = function(opts) {
    opts = defaults(opts || {}, DEFAULTS)
    return function wcwidth(str) {
      return wcswidth(str, opts)
    }
  }
  
  /*
   *  The following functions define the column width of an ISO 10646
   *  character as follows:
   *  - The null character (U+0000) has a column width of 0.
   *  - Other C0/C1 control characters and DEL will lead to a return value
   *    of -1.
   *  - Non-spacing and enclosing combining characters (general category
   *    code Mn or Me in the
   *    Unicode database) have a column width of 0.
   *  - SOFT HYPHEN (U+00AD) has a column width of 1.
   *  - Other format characters (general category code Cf in the Unicode
   *    database) and ZERO WIDTH
   *    SPACE (U+200B) have a column width of 0.
   *  - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
   *    have a column width of 0.
   *  - Spacing characters in the East Asian Wide (W) or East Asian
   *    Full-width (F) category as
   *    defined in Unicode Technical Report #11 have a column width of 2.
   *  - All remaining characters (including all printable ISO 8859-1 and
   *    WGL4 characters, Unicode control characters, etc.) have a column
   *    width of 1.
   *  This implementation assumes that characters are encoded in ISO 10646.
  */
  
  function wcswidth(str, opts) {
    if (typeof str !== 'string') return wcwidth(str, opts)
  
    var s = 0
    for (var i = 0; i < str.length; i++) {
      var n = wcwidth(str.charCodeAt(i), opts)
      if (n < 0) return -1
      s += n
    }
  
    return s
  }
  
  function wcwidth(ucs, opts) {
    // test for 8-bit control characters
    if (ucs === 0) return opts.nul
    if (ucs < 32 || (ucs >= 0x7f && ucs < 0xa0)) return opts.control
  
    // binary search in table of non-spacing characters
    if (bisearch(ucs)) return 0
  
    // if we arrive here, ucs is not a combining or C0/C1 control character
    return 1 +
        (ucs >= 0x1100 &&
         (ucs <= 0x115f ||                       // Hangul Jamo init. consonants
          ucs == 0x2329 || ucs == 0x232a ||
          (ucs >= 0x2e80 && ucs <= 0xa4cf &&
           ucs != 0x303f) ||                     // CJK ... Yi
          (ucs >= 0xac00 && ucs <= 0xd7a3) ||    // Hangul Syllables
          (ucs >= 0xf900 && ucs <= 0xfaff) ||    // CJK Compatibility Ideographs
          (ucs >= 0xfe10 && ucs <= 0xfe19) ||    // Vertical forms
          (ucs >= 0xfe30 && ucs <= 0xfe6f) ||    // CJK Compatibility Forms
          (ucs >= 0xff00 && ucs <= 0xff60) ||    // Fullwidth Forms
          (ucs >= 0xffe0 && ucs <= 0xffe6) ||
          (ucs >= 0x20000 && ucs <= 0x2fffd) ||
          (ucs >= 0x30000 && ucs <= 0x3fffd)));
  }
  
  function bisearch(ucs) {
    var min = 0
    var max = combining.length - 1
    var mid
  
    if (ucs < combining[0][0] || ucs > combining[max][1]) return false
  
    while (max >= min) {
      mid = Math.floor((min + max) / 2)
      if (ucs > combining[mid][1]) min = mid + 1
      else if (ucs < combining[mid][0]) max = mid - 1
      else return true
    }
  
    return false
  }