-
Notifications
You must be signed in to change notification settings - Fork 485
Expand file tree
/
Copy pathparse.ts
More file actions
124 lines (107 loc) · 3.49 KB
/
parse.ts
File metadata and controls
124 lines (107 loc) · 3.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import * as fs from 'fs'
import * as path from 'path'
import Parser from 'tree-sitter'
import { getLanguageConfig } from './languages'
export const DEBUG_PARSING = false
const IGNORE_TOKENS = ['__init__', '__post_init__', '__call__', 'constructor']
export async function getFileTokenScores(projectRoot: string, filePaths: string[]) {
const startTime = Date.now()
const tokenScores: { [filePath: string]: { [token: string]: number } } = {}
const externalCalls: { [token: string]: number } = {}
for (const filePath of filePaths) {
const fullPath = path.join(projectRoot, filePath)
if (!!getLanguageConfig(fullPath)) {
const { identifiers, calls, numLines } = await parseTokens(fullPath)
const tokenScoresForFile: { [token: string]: number } = {}
tokenScores[filePath] = tokenScoresForFile
const dirs = path.dirname(fullPath).split(path.sep)
const depth = dirs.length
const tokenBaseScore =
0.8 ** depth * Math.sqrt(numLines / (identifiers.length + 1))
for (const identifier of identifiers) {
if (!IGNORE_TOKENS.includes(identifier)) {
tokenScoresForFile[identifier] = tokenBaseScore
}
}
for (const call of calls) {
if (!tokenScoresForFile[call]) {
externalCalls[call] = (externalCalls[call] ?? 0) + 1
}
}
}
}
for (const scores of Object.values(tokenScores)) {
for (const token of Object.keys(scores)) {
const numCalls = externalCalls[token] ?? 0
if (typeof numCalls !== 'number') continue
scores[token] *= 1 + Math.log(1 + numCalls)
}
}
if (DEBUG_PARSING) {
const endTime = Date.now()
console.log(`Parsed ${filePaths.length} files in ${endTime - startTime}ms`)
console.log('externalCalls', externalCalls)
// Save exportedTokens to a file
const exportedTokensFilePath = path.join(
projectRoot,
'exported-tokens.json'
)
try {
fs.writeFileSync(
exportedTokensFilePath,
JSON.stringify(tokenScores, null, 2)
)
console.log(`Exported tokens saved to ${exportedTokensFilePath}`)
} catch (error) {
console.error(`Failed to save exported tokens to file: ${error}`)
}
}
return tokenScores
}
export async function parseTokens(filePath: string) {
const languageConfig = await getLanguageConfig(filePath)
if (languageConfig) {
const { parser, query } = languageConfig
try {
const sourceCode = fs.readFileSync(filePath, 'utf8')
const numLines = sourceCode.match(/\n/g)?.length ?? 0 + 1
const parseResults = parseFile(parser, query, sourceCode)
const identifiers = parseResults.identifier
const calls = parseResults['call.identifier']
return {
numLines,
identifiers: identifiers ?? [],
calls: calls ?? [],
}
} catch (e) {
if (DEBUG_PARSING) {
console.error(`Error parsing query: ${e}`)
console.log(filePath)
}
}
}
return {
numLines: 0,
identifiers: [] as string[],
calls: [] as string[],
}
}
function parseFile(
parser: Parser,
query: Parser.Query,
sourceCode: string
): { [key: string]: string[] } {
const tree = parser.parse(sourceCode, undefined, {
bufferSize: 1024 * 1024,
})
const captures = query.captures(tree.rootNode)
const result: { [key: string]: string[] } = {}
for (const capture of captures) {
const { name, node } = capture
if (!result[name]) {
result[name] = []
}
result[name].push(node.text)
}
return result
}