
423 lines
14 KiB

const { spawn } = require('child_process');
const path = require('path');
// `ripgrep` and `scandal` have a different way of handling the trailing and leading
// context lines:
// * `scandal` returns all the context lines that are requested, even if they include
// previous or future results.
// * `ripgrep` is a bit smarter and only returns the context lines that do not correspond
// to any result (in a similar way that is shown in the find and replace UI).
// For example, if we have the following file and we request to leading context lines:
// line 1
// line 2
// result 1
// result 2
// line 3
// line 4
// `scandal` will return two results:
// * First result with `['line 1', line 2']` as leading context.
// * Second result with `['line 2', result 1']` as leading context.
// `ripgrep` on the other hand will return a JS object that is more similar to the way that
// the results are shown:
// [
// {type: 'begin', ...},
// {type: 'context', ...}, // context for line 1
// {type: 'context', ...}, // context for line 2
// {type: 'match', ...}, // result 1
// {type: 'match', ...}, // result 2
// {type: 'end', ...},
// ]
// In order to keep backwards compatibility, and avoid doing changes to the find and replace logic,
// for `ripgrep` we need to keep some state with the context lines (and matches) to be able to build
// a data structure that has the same behaviour as the `scandal` one.
// We use the `pendingLeadingContext` array to generate the leading context. This array gets mutated
// to always contain the leading `n` lines and is cloned every time a match is found. It's currently
// implemented as a standard array but we can easily change it to use a linked list if we find that
// the shift operations are slow.
// We use the `pendingTrailingContexts` Set to generate the trailing context. Since the trailing
// context needs to be generated after receiving a match, we keep all trailing context arrays that
// haven't been fulfilled in this Set, and mutate them adding new lines until they are fulfilled.
function updateLeadingContext(message, pendingLeadingContext, options) {
if (message.type !== 'match' && message.type !== 'context') {
if (options.leadingContextLineCount) {
if (pendingLeadingContext.length > options.leadingContextLineCount) {
function updateTrailingContexts(message, pendingTrailingContexts, options) {
if (message.type !== 'match' && message.type !== 'context') {
if (options.trailingContextLineCount) {
for (const trailingContextLines of pendingTrailingContexts) {
if (trailingContextLines.length === options.trailingContextLineCount) {
function cleanResultLine(resultLine) {
resultLine = getText(resultLine);
return resultLine[resultLine.length - 1] === '\n'
? resultLine.slice(0, -1)
: resultLine;
function getPositionFromColumn(lines, column) {
let currentLength = 0;
let currentLine = 0;
let previousLength = 0;
while (column >= currentLength) {
previousLength = currentLength;
currentLength += lines[currentLine].length + 1;
return [currentLine - 1, column - previousLength];
function processUnicodeMatch(match) {
const text = getText(match.lines);
if (text.length === Buffer.byteLength(text)) {
// fast codepath for lines that only contain characters of 1 byte length.
let remainingBuffer = Buffer.from(text);
let currentLength = 0;
let previousPosition = 0;
function convertPosition(position) {
const currentBuffer = remainingBuffer.slice(0, position - previousPosition);
currentLength = currentBuffer.toString().length + currentLength;
remainingBuffer = remainingBuffer.slice(position);
previousPosition = position;
return currentLength;
// Iterate over all the submatches to find the convert the start and end values
// (which come as bytes from ripgrep) to character positions.
// We can do this because submatches come ordered by position.
for (const submatch of match.submatches) {
submatch.start = convertPosition(submatch.start);
submatch.end = convertPosition(submatch.end);
// This function processes a ripgrep submatch to create the correct
// range. This is mostly needed for multi-line results, since the range
// will have differnt start and end rows and we need to calculate these
// based on the lines that ripgrep returns.
function processSubmatch(submatch, lineText, offsetRow) {
const lineParts = lineText.split('\n');
const start = getPositionFromColumn(lineParts, submatch.start);
const end = getPositionFromColumn(lineParts, submatch.end);
// Make sure that the lineText string only contains lines that are
// relevant to this submatch. This means getting rid of lines above
// the start row and below the end row.
for (let i = start[0]; i > 0; i--) {
while (end[0] < lineParts.length - 1) {
start[0] += offsetRow;
end[0] += offsetRow;
return {
range: [start, end],
lineText: cleanResultLine({ text: lineParts.join('\n') })
function getText(input) {
return input.text
? input.text
: Buffer.from(input.bytes, 'base64').toString();
module.exports = class RipgrepDirectorySearcher {
canSearchDirectory() {
return true;
// Performs a text search for files in the specified `Directory`s, subject to the
// specified parameters.
// Results are streamed back to the caller by invoking methods on the specified `options`,
// such as `didMatch` and `didError`.
// * `directories` {Array} of {Directory} objects to search, all of which have been accepted by
// this searcher's `canSearchDirectory()` predicate.
// * `regex` {RegExp} to search with.
// * `options` {Object} with the following properties:
// * `didMatch` {Function} call with a search result structured as follows:
// * `searchResult` {Object} with the following keys:
// * `filePath` {String} absolute path to the matching file.
// * `matches` {Array} with object elements with the following keys:
// * `lineText` {String} The full text of the matching line (without a line terminator character).
// * `lineTextOffset` {Number} Always 0, present for backwards compatibility
// * `matchText` {String} The text that matched the `regex` used for the search.
// * `range` {Range} Identifies the matching region in the file. (Likely as an array of numeric arrays.)
// * `didError` {Function} call with an Error if there is a problem during the search.
// * `didSearchPaths` {Function} periodically call with the number of paths searched that contain results thus far.
// * `inclusions` {Array} of glob patterns (as strings) to search within. Note that this
// array may be empty, indicating that all files should be searched.
// Each item in the array is a file/directory pattern, e.g., `src` to search in the "src"
// directory or `*.js` to search all JavaScript files. In practice, this often comes from the
// comma-delimited list of patterns in the bottom text input of the ProjectFindView dialog.
// * `includeHidden` {boolean} whether to ignore hidden files.
// * `excludeVcsIgnores` {boolean} whether to exclude VCS ignored paths.
// * `exclusions` {Array} similar to inclusions
// * `follow` {boolean} whether symlinks should be followed.
// Returns a *thenable* `DirectorySearch` that includes a `cancel()` method. If `cancel()` is
// invoked before the `DirectorySearch` is determined, it will resolve the `DirectorySearch`.
search(directories, regexp, options) {
const numPathsFound = { num: 0 };
const allPromises = =>
this.searchInDirectory(directory, regexp, options, numPathsFound)
const promise = Promise.all(allPromises);
promise.cancel = () => {
for (const promise of allPromises) {
return promise;
searchInDirectory(directory, regexp, options, numPathsFound) {
// Delay the require of vscode-ripgrep to not mess with the snapshot creation.
if (!this.rgPath) {
this.rgPath = require('vscode-ripgrep').rgPath.replace(
const directoryPath = directory.getPath();
const regexpStr = this.prepareRegexp(regexp.source);
const args = ['--json', '--regexp', regexpStr];
if (options.leadingContextLineCount) {
args.push('--before-context', options.leadingContextLineCount);
if (options.trailingContextLineCount) {
args.push('--after-context', options.trailingContextLineCount);
if (regexp.ignoreCase) {
for (const inclusion of this.prepareGlobs(
)) {
args.push('--glob', inclusion);
for (const exclusion of this.prepareGlobs(
)) {
args.push('--glob', '!' + exclusion);
if (this.isMultilineRegexp(regexpStr)) {
if (options.includeHidden) {
if (options.follow) {
if (!options.excludeVcsIgnores) {
const child = spawn(this.rgPath, args, {
cwd: directoryPath,
stdio: ['pipe', 'pipe', 'pipe']
const didMatch = options.didMatch || (() => {});
let cancelled = false;
const returnedPromise = new Promise((resolve, reject) => {
let buffer = '';
let bufferError = '';
let pendingEvent;
let pendingLeadingContext;
let pendingTrailingContexts;
child.on('close', (code, signal) => {
// code 1 is used when no results are found.
if (code !== null && code > 1) {
reject(new Error(bufferError));
} else {
child.stderr.on('data', chunk => {
bufferError += chunk;
child.stdout.on('data', chunk => {
if (cancelled) {
buffer += chunk;
const lines = buffer.split('\n');
buffer = lines.pop();
for (const line of lines) {
const message = JSON.parse(line);
updateTrailingContexts(message, pendingTrailingContexts, options);
if (message.type === 'begin') {
pendingEvent = {
filePath: path.join(directoryPath, getText(,
matches: []
pendingLeadingContext = [];
pendingTrailingContexts = new Set();
} else if (message.type === 'match') {
const trailingContextLines = [];
for (const submatch of {
const { lineText, range } = processSubmatch(
getText(, - 1
matchText: getText(submatch.match),
lineTextOffset: 0,
leadingContextLines: [...pendingLeadingContext],
} else if (message.type === 'end') {
pendingEvent = null;
updateLeadingContext(message, pendingLeadingContext, options);
returnedPromise.cancel = () => {
cancelled = true;
return returnedPromise;
// We need to prepare the "globs" that we receive from the user to make their behaviour more
// user-friendly (e.g when adding `src/` the user probably means `src/**/*`).
// This helper function takes care of that.
prepareGlobs(globs, projectRootPath) {
const output = [];
for (let pattern of globs) {
// we need to replace path separators by slashes since globs should
// always use always slashes as path separators.
pattern = pattern.replace(new RegExp(`\\${path.sep}`, 'g'), '/');
if (pattern.length === 0) {
const projectName = path.basename(projectRootPath);
// The user can just search inside one of the opened projects. When we detect
// this scenario we just consider the glob to include every file.
if (pattern === projectName) {
if (pattern.startsWith(projectName + '/')) {
pattern = pattern.slice(projectName.length + 1);
if (pattern.endsWith('/')) {
pattern = pattern.slice(0, -1);
output.push(pattern.endsWith('/**') ? pattern : `${pattern}/**`);
return output;
prepareRegexp(regexpStr) {
// ripgrep handles `--` as the arguments separator, so we need to escape it if the
// user searches for that exact same string.
if (regexpStr === '--') {
return '\\-\\-';
// ripgrep is quite picky about unnecessarily escaped sequences, so we need to unescape
// them:
regexpStr = regexpStr.replace(/\\\//g, '/');
return regexpStr;
isMultilineRegexp(regexpStr) {
if (regexpStr.includes('\\n')) {
return true;
return false;