Skip to content

Commit 9251971

Browse files
kddnewtonmatzbot
authored andcommitted
[ruby/prism] Add a sample for multiplexing constants
ruby/prism@e265dc5862
1 parent 3affd43 commit 9251971

File tree

1 file changed

+138
-0
lines changed

1 file changed

+138
-0
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# This script indexes the classes and modules within a set of files using the
2+
# saved source functionality.
3+
4+
require "prism"
5+
require "etc"
6+
require "tempfile"
7+
8+
module Indexer
9+
# A class that implements the #enter functionality so that it can be passed to
10+
# the various save* APIs. This effectively bundles up all of the node_id and
11+
# field_name pairs so that they can be written back to the parent process.
12+
class Repository
13+
attr_reader :scope, :entries
14+
15+
def initialize
16+
@scope = []
17+
@entries = []
18+
end
19+
20+
def with(next_scope)
21+
previous_scope = scope
22+
@scope = scope + next_scope
23+
yield
24+
@scope = previous_scope
25+
end
26+
27+
def empty?
28+
entries.empty?
29+
end
30+
31+
def enter(node_id, field_name)
32+
entries << [scope.join("::"), node_id, field_name]
33+
end
34+
end
35+
36+
# Visit the classes and modules in the AST and save their locations into the
37+
# repository.
38+
class Visitor < Prism::Visitor
39+
attr_reader :repository
40+
41+
def initialize(repository)
42+
@repository = repository
43+
end
44+
45+
def visit_class_node(node)
46+
repository.with(node.constant_path.full_name_parts) do
47+
node.constant_path.save_location(repository)
48+
visit(node.body)
49+
end
50+
end
51+
52+
def visit_module_node(node)
53+
repository.with(node.constant_path.full_name_parts) do
54+
node.constant_path.save_location(repository)
55+
visit(node.body)
56+
end
57+
end
58+
end
59+
60+
# Index the classes and modules within a file. If there are any entries,
61+
# return them as a serialized string to the parent process.
62+
def self.index(filepath)
63+
repository = Repository.new
64+
Prism.parse_file(filepath).value.accept(Visitor.new(repository))
65+
"#{filepath}|#{repository.entries.join("|")}" unless repository.empty?
66+
end
67+
end
68+
69+
def index_glob(glob, count = Etc.nprocessors - 1)
70+
process_ids = []
71+
filepath_writers = []
72+
index_reader, index_writer = IO.pipe
73+
74+
# For each number in count, fork off a worker that has access to two pipes.
75+
# The first pipe is the index_writer, to which it writes all of the results of
76+
# indexing the various files. The second pipe is the filepath_reader, from
77+
# which it reads the filepaths that it needs to index.
78+
count.times do
79+
filepath_reader, filepath_writer = IO.pipe
80+
81+
process_ids << fork do
82+
filepath_writer.close
83+
index_reader.close
84+
85+
while (filepath = filepath_reader.gets(chomp: true))
86+
results = Indexer.index(filepath)
87+
index_writer.puts(results) if results
88+
end
89+
end
90+
91+
filepath_reader.close
92+
filepath_writers << filepath_writer
93+
end
94+
95+
index_writer.close
96+
97+
# In a separate thread, write all of the filepaths to the various worker
98+
# processes. This is done in a separate threads since puts will eventually
99+
# block when each of the pipe buffers fills up. We write in a round-robin
100+
# fashion to the various workers. This could be improved using a work-stealing
101+
# algorithm, but is fine if you don't end up having a ton of variety in the
102+
# size of your files.
103+
writer_thread =
104+
Thread.new do
105+
Dir[glob].each_with_index do |filepath, index|
106+
filepath_writers[index % count].puts(filepath)
107+
end
108+
end
109+
110+
index = Hash.new { |hash, key| hash[key] = [] }
111+
112+
# In a separate thread, read all of the results from the various worker
113+
# processes and store them in the index. This is done in a separate thread so
114+
# that reads and writes can be interleaved. This is important so that the
115+
# index pipe doesn't fill up and block the writer.
116+
reader_thread =
117+
Thread.new do
118+
while (line = index_reader.gets(chomp: true))
119+
filepath, *entries = line.split("|")
120+
repository = Prism::Relocation.filepath(filepath).filepath.lines.code_unit_columns(Encoding::UTF_16LE).leading_comments
121+
122+
entries.each_slice(3) do |(name, node_id, field_name)|
123+
index[name] << repository.enter(Integer(node_id), field_name.to_sym)
124+
end
125+
end
126+
end
127+
128+
writer_thread.join
129+
filepath_writers.each(&:close)
130+
131+
reader_thread.join
132+
index_reader.close
133+
134+
process_ids.each { |process_id| Process.wait(process_id) }
135+
index
136+
end
137+
138+
index_glob(File.expand_path("../../lib/**/*.rb", __dir__))

0 commit comments

Comments
 (0)