rushstack/api-extractor/src/Tokenizer.ts at gcb-typescript-2.4.1-patch · JavaScriptExpert/rushstack · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import Token, { TokenType } from './Token';
import TypeScriptHelpers from './TypeScriptHelpers';

/**
 * Handles the tokenization of a JSDoc comment.
 */
export default class Tokenizer {

  /**
   * Match JsDoc block tags and inline tags
   * Example "@a @b@c d@e @f {whatever} {@link a} { @something } \@g" => ["@a", "@f", "{@link a}", "{ @something }"]
   */
  private static _jsdocTagsRegex: RegExp = /{\s*@(\\{|\\}|[^{}])*}|(?:^|\s)(\@[a-z_]+)(?=\s|$)/gi;

  /**
   * List of Tokens that have been tokenized.
   */
  private _tokenStream: Token[];

  private _reportError: (message: string) => void;

  constructor(docs: string, reportError: (message: string) => void) {
    this._reportError = reportError;
    this._tokenStream = this._tokenizeDocs(docs);
  }

  /**
   * Converts a doc comment string into an array of Tokens. This processing is done so that docs
   * can be processed more strictly.
   * Example: "This is a JsDoc description with a {@link URL} and more text. \@summary example \@public"
   * => [
   *  {tokenType: 'text', parameter: 'This is a JsDoc description with a'},
   *  {tokenType: '@link', parameter: 'URL'},
   *  {tokenType: '\@summary', parameter: ''},
   *  {tokenType: 'text', parameter: 'example'},
   *  {tokenType: '\@public', parameter: ''}
   * ]
   */
  protected _tokenizeDocs(docs: string): Token[] {
    if (!docs) {
      return;
    }
    const docEntries: string[] = TypeScriptHelpers.splitStringWithRegEx(docs, Tokenizer._jsdocTagsRegex);
    const sanitizedTokens: string[] =  this._sanitizeDocEntries(docEntries); // remove white space and empty entries

    // process each sanitized doc string to a Token object
    const tokens: Token[] = [];
    let value: string;
    for (let i: number = 0; i < sanitizedTokens.length; i++) {
      let token: Token;
      value = sanitizedTokens[i];
      if (value.charAt(0) === '@') {
       token = new Token(TokenType.Tag, value);
      } else if (value.charAt(0) === '{' && value.charAt(value.length - 1) === '}') {
        token = this._tokenizeInline(value); // Can return undefined if invalid inline tag
      } else {
        token = new Token(TokenType.Text, '', value);
      }

      if (token) {
        tokens.push(token);
      }
    }

    return tokens;
  }

  /**
   * Parse an inline tag and returns the Token for it if itis a valid inline tag.
   * Example '{@link https://bing.com | Bing}' => '{type: 'Inline', tag: '@link', text: 'https://bing.com  | Bing'}'
   */
  protected _tokenizeInline(docEntry: string): Token {
    if (docEntry.charAt(0) !== '{' || docEntry.charAt(docEntry.length - 1) !== '}') {
      this._reportError('All inline tags should be wrapped inside curly braces.');
      return;
    }
    const tokenContent: string = docEntry.slice(1, docEntry.length - 1).trim();

    if (tokenContent.charAt(0) !== '@') {
      this._reportError('Content of inline tags should start with a leading \'@\'.');
      return;
    }

    const unescapedCurlyBraces: RegExp = /([^\\])({|}[^$])/gi;
    if (unescapedCurlyBraces.test(tokenContent)) {
      this._reportError(`Unescaped '{' or '}' detected inside an inline tag. ` +
        'Use \\ to escape curly braces inside inline tags.');
      return;
    }

    // Split the inline tag content with whitespace
    // Example: '@link    https://bing.com  |  Bing' => ['@link', 'https://bing.com', '|', 'Bing']
    const tokenChunks: string[] = tokenContent.split(/\s+/gi);
    if (tokenChunks[0] === '@link') {
      if (tokenChunks.length < 2) {
        this._reportError('Too few parameters for @link inline tag.');
        return;
      }

      tokenChunks.shift(); // Gets rid of '@link'
      const token: Token = new Token(TokenType.Inline, '@link', tokenChunks.join(' '));
      return token;
    } else if (tokenChunks[0] === '@inheritdoc') {
      tokenChunks.shift(); // Gets rid of '@inheritdoc'
      const token: Token = new Token(TokenType.Inline, '@inheritdoc', tokenChunks.join(' '));
      return token;
    }

    this._reportError('Unknown tag name for inline tag.');
    return;
  }

  public peekToken(): Token {
    return (!this._tokenStream || this._tokenStream.length === 0) ? undefined : this._tokenStream[0];
  }

  public getToken(): Token {
    return (!this._tokenStream || this._tokenStream.length === 0) ? undefined : this._tokenStream.shift();
  }

  /**
   * Trims whitespaces on either end of the entry (which is just a string within the doc comments),
   * replaces \r and \n's with single whitespace, and removes empty entries.
   *
   * @param docEntries - Array of doc strings to be santitized
   */
  private _sanitizeDocEntries(docEntries: string[]): string[] {
    const result: string[] = [];
    for (let entry of docEntries) {
      entry = entry.replace(/\s+/g, ' ');
      entry = entry.trim();

      if (entry === '') {
        continue;
      }
      result.push(entry);
    }

    return result;
  }
}