1/*
2 * Copyright (c) 1998-2007 Matthijs Hollemans
3 * Copyright (c) 2008-2017, Haiku Inc.
4 * Distributed under the terms of the MIT license.
5 *
6 * Authors:
7 *      Matthijs Holleman
8 *      Stephan A��mus <superstippi@gmx.de>
9 *      Philippe Houdoin
10 */
11
12#include "Grepper.h"
13
14#include <errno.h>
15#include <new>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <sys/select.h>
20#include <sys/time.h>
21
22#include <Catalog.h>
23#include <Directory.h>
24#include <image.h>
25#include <List.h>
26#include <Locale.h>
27#include <NodeInfo.h>
28#include <OS.h>
29#include <Path.h>
30#include <UTF8.h>
31
32#include "FileIterator.h"
33#include "Model.h"
34
35#undef B_TRANSLATION_CONTEXT
36#define B_TRANSLATION_CONTEXT "Grepper"
37
38
39const char* kEOFTag = "//EOF";
40
41
42using std::nothrow;
43
44char*
45strdup_to_utf8(uint32 encode, const char* src, int32 length)
46{
47	int32 srcLen = length;
48	int32 dstLen = 2 * srcLen;
49	// TODO: stippi: Why the duplicate copy? Why not just return
50	// dst (and allocate with malloc() instead of new)? Is 2 * srcLen
51	// enough space? Check return value of convert_to_utf8 and keep
52	// converting if it didn't fit?
53	char* dst = new (nothrow) char[dstLen + 1];
54	if (dst == NULL)
55		return NULL;
56	int32 cookie = 0;
57	convert_to_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
58	dst[dstLen] = '\0';
59	char* dup = strdup(dst);
60	delete[] dst;
61	if (srcLen != length) {
62		fprintf(stderr, "strdup_to_utf8(%" B_PRId32 ", %" B_PRId32
63			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
64	}
65	return dup;
66}
67
68
69char*
70strdup_from_utf8(uint32 encode, const char* src, int32 length)
71{
72	int32 srcLen = length;
73	int32 dstLen = srcLen;
74	char* dst = new (nothrow) char[dstLen + 1];
75	if (dst == NULL)
76		return NULL;
77	int32 cookie = 0;
78	convert_from_utf8(encode, src, &srcLen, dst, &dstLen, &cookie);
79	// TODO: See above.
80	dst[dstLen] = '\0';
81	char* dup = strdup(dst);
82	delete[] dst;
83	if (srcLen != length) {
84		fprintf(stderr, "strdup_from_utf8(%" B_PRId32 ", %" B_PRId32
85			") dst allocate smalled(%" B_PRId32 ")\n", encode, length, dstLen);
86	}
87	return dup;
88}
89
90
91Grepper::Grepper(const char* pattern, const Model* model,
92		const BHandler* target, FileIterator* iterator)
93	: fPattern(NULL),
94	  fTarget(target),
95	  fRegularExpression(model->fRegularExpression),
96	  fCaseSensitive(model->fCaseSensitive),
97	  fEncoding(model->fEncoding),
98
99	  fIterator(iterator),
100	  fRunnerThreadId(-1),
101	  fXargsInput(-1),
102	  fMustQuit(false)
103{
104	if (fEncoding > 0) {
105		char* src = strdup_from_utf8(fEncoding, pattern, strlen(pattern));
106		_SetPattern(src);
107		free(src);
108	} else
109		_SetPattern(pattern);
110}
111
112
113Grepper::~Grepper()
114{
115	Cancel();
116	free(fPattern);
117	delete fIterator;
118}
119
120
121bool
122Grepper::IsValid() const
123{
124	if (fIterator == NULL || !fIterator->IsValid())
125		return false;
126	return fPattern != NULL;
127}
128
129
130void
131Grepper::Start()
132{
133	Cancel();
134
135	fMustQuit = false;
136	fRunnerThreadId = spawn_thread(
137		_SpawnRunnerThread, "Grep runner", B_NORMAL_PRIORITY, this);
138
139	resume_thread(fRunnerThreadId);
140}
141
142
143void
144Grepper::Cancel()
145{
146	if (fRunnerThreadId < 0)
147		return;
148
149	fMustQuit = true;
150	int32 exitValue;
151	wait_for_thread(fRunnerThreadId, &exitValue);
152	fRunnerThreadId = -1;
153}
154
155
156// #pragma mark - private
157
158
159int32
160Grepper::_SpawnWriterThread(void* cookie)
161{
162	Grepper* self = static_cast<Grepper*>(cookie);
163	return self->_WriterThread();
164}
165
166
167int32
168Grepper::_WriterThread()
169{
170	BMessage message;
171	char fileName[B_PATH_NAME_LENGTH*2];
172	int count = 0;
173	bigtime_t lastProgressReportTime = 0, now;
174
175	printf("paths_writer started.\n");
176
177	while (!fMustQuit && fIterator->GetNextName(fileName)) {
178		BEntry entry(fileName);
179		entry_ref ref;
180		entry.GetRef(&ref);
181		if (!entry.Exists()) {
182			if (fIterator->NotifyNegatives()) {
183				message.MakeEmpty();
184				message.what = MSG_REPORT_RESULT;
185				message.AddString("filename", fileName);
186				message.AddRef("ref", &ref);
187				fTarget.SendMessage(&message);
188			}
189			continue;
190		}
191
192		if (!_EscapeSpecialChars(fileName, sizeof(fileName))) {
193			char tempString[B_PATH_NAME_LENGTH + 32];
194			sprintf(tempString, B_TRANSLATE("%s: Not enough room to escape "
195				"the filename."), fileName);
196			message.MakeEmpty();
197			message.what = MSG_REPORT_ERROR;
198			message.AddString("error", tempString);
199			fTarget.SendMessage(&message);
200			continue;
201		}
202
203		count++;
204
205		// file exists, send it to xargs
206		write(fXargsInput, fileName, strlen(fileName));
207		write(fXargsInput, "\n", 1);
208
209		now = system_time();
210		// to avoid message flood,
211		// report progress no more than 20 times per second
212		if (now - lastProgressReportTime > 50000) {
213			message.MakeEmpty();
214			message.what = MSG_REPORT_FILE_NAME;
215			message.AddString("filename", fileName);
216			fTarget.SendMessage(&message);
217			lastProgressReportTime = now;
218		}
219	}
220
221	write(fXargsInput, kEOFTag, strlen(kEOFTag));
222	write(fXargsInput, "\n", 1);
223	close(fXargsInput);
224
225	printf("paths_writer stopped (%d paths).\n", count);
226
227	return 0;
228}
229
230
231int32
232Grepper::_SpawnRunnerThread(void* cookie)
233{
234	Grepper* self = static_cast<Grepper*>(cookie);
235	return self->_RunnerThread();
236}
237
238
239int32
240Grepper::_RunnerThread()
241{
242	BMessage message;
243	char fileName[B_PATH_NAME_LENGTH];
244
245	const char* argv[32];
246	int argc = 0;
247	argv[argc++] = "xargs";
248
249	// can't use yet the --null mode due to pipe issue
250	// the xargs stdin input pipe closure is not detected
251	// by xargs. Instead, we use eof-string mode
252
253	// argv[argc++] = "--null";
254	argv[argc++] = "-E";
255	argv[argc++] = kEOFTag;
256
257	// Enable parallel mode
258	// Retrieve cpu count for to parallel xargs via -P argument
259	char cpuCount[8];
260	system_info sys_info;
261	get_system_info(&sys_info);
262	snprintf(cpuCount, sizeof(cpuCount), "%" B_PRIu32, sys_info.cpu_count);
263	argv[argc++] = "-P";
264	argv[argc++] = cpuCount;
265
266	// grep command driven by xargs dispatcher
267	argv[argc++] = "grep";
268	argv[argc++] = "-n"; // need matching line(s) number(s)
269	argv[argc++] = "-H"; // need filename prefix
270	if (! fCaseSensitive)
271		argv[argc++] = "-i";
272	if (! fRegularExpression)
273		argv[argc++] = "-F";	 // no a regexp: force fixed string,
274	argv[argc++] = fPattern;
275	argv[argc] = NULL;
276
277	// prepare xargs to run with stdin, stdout and stderr pipes
278
279	int oldStdIn, oldStdOut, oldStdErr;
280	oldStdIn  = dup(STDIN_FILENO);
281	oldStdOut = dup(STDOUT_FILENO);
282	oldStdErr = dup(STDERR_FILENO);
283
284	int fds[2];
285	if (pipe(fds) != 0) {
286		message.MakeEmpty();
287		message.what = MSG_REPORT_ERROR;
288		message.AddString("error",
289			B_TRANSLATE("Failed to open input pipe!"));
290		fTarget.SendMessage(&message);
291		return 0;
292	}
293	dup2(fds[0], STDIN_FILENO);
294	close(fds[0]);
295	fXargsInput = fds[1];	// write to in, appears on command's stdin
296
297	if (pipe(fds) != 0) {
298		close(fXargsInput);
299		message.MakeEmpty();
300		message.what = MSG_REPORT_ERROR;
301		message.AddString("error",
302			B_TRANSLATE("Failed to open output pipe!"));
303		fTarget.SendMessage(&message);
304		return 0;
305	}
306	dup2(fds[1], STDOUT_FILENO);
307	close(fds[1]);
308	int out = fds[0]; // read from out, taken from command's stdout
309
310	if (pipe(fds) != 0) {
311		close(fXargsInput);
312		close(out);
313		message.MakeEmpty();
314		message.what = MSG_REPORT_ERROR;
315		message.AddString("error",
316			B_TRANSLATE("Failed to open errors pipe!"));
317		fTarget.SendMessage(&message);
318		return 0;
319	}
320	dup2(fds[1], STDERR_FILENO);
321	close(fds[1]);
322	int err = fds[0]; // read from err, taken from command's stderr
323
324	// "load" xargs tool
325	thread_id xargsThread = load_image(argc, argv,
326		const_cast<const char**>(environ));
327	// xargsThread is suspended after loading
328
329	// restore our previous stdin, stdout and stderr
330	close(STDIN_FILENO);
331	dup(oldStdIn);
332	close(oldStdIn);
333	close(STDOUT_FILENO);
334	dup(oldStdOut);
335	close(oldStdOut);
336	close(STDERR_FILENO);
337	dup(oldStdErr);
338	close(oldStdErr);
339
340	if (xargsThread < B_OK) {
341		close(fXargsInput);
342		close(out);
343		close(err);
344		message.MakeEmpty();
345		message.what = MSG_REPORT_ERROR;
346		message.AddString("error",
347			B_TRANSLATE("Failed to start xargs program!"));
348		fTarget.SendMessage(&message);
349		return 0;
350	}
351
352	// Listen on xargs's stdout and stderr via select()
353	printf("Running: ");
354	for (int i = 0; i < argc; i++) {
355		printf("%s ", argv[i]);
356	}
357	printf("\n");
358
359	int fdl[2] = { out, err };
360	int maxfd = 0;
361	for (int i = 0; i < 2; i++) {
362		if (maxfd < fdl[i])
363			maxfd = fdl[i];
364	}
365
366	fd_set readSet;
367	struct timeval timeout = { 0, 100000 };
368	char line[B_PATH_NAME_LENGTH * 2];
369
370	FILE* output = fdopen(out, "r");
371	FILE* errors = fdopen(err, "r");
372
373	char currentFileName[B_PATH_NAME_LENGTH];
374	currentFileName[0] = '\0';
375	bool canReadOutput, canReadErrors;
376	canReadOutput = canReadErrors = true;
377
378	thread_id writerThread = spawn_thread(_SpawnWriterThread,
379		"Grep writer", B_LOW_PRIORITY, this);
380	set_thread_priority(xargsThread, B_LOW_PRIORITY);
381
382	// we're ready, let's go!
383	resume_thread(xargsThread);
384	resume_thread(writerThread);
385
386	while (!fMustQuit && (canReadOutput || canReadErrors)) {
387		FD_ZERO(&readSet);
388		if (canReadOutput) {
389			FD_SET(out, &readSet);
390		}
391		if (canReadErrors) {
392			FD_SET(err, &readSet);
393		}
394
395		int result = select(maxfd + 1, &readSet, NULL, NULL, &timeout);
396		if (result == -1 && errno == EINTR)
397			continue;
398		if (result == 0) {
399			// timeout, but meanwhile fMustQuit was changed maybe...
400			continue;
401		}
402		if (result < 0) {
403			perror("select():");
404			message.MakeEmpty();
405			message.what = MSG_REPORT_ERROR;
406			message.AddString("error", strerror(errno));
407			fTarget.SendMessage(&message);
408			break;
409		}
410
411		if (canReadOutput && FD_ISSET(out, &readSet)) {
412			if (fgets(line, sizeof(line), output) != NULL) {
413				// parse grep output
414				int lineNumber = -1;
415				int textPos = -1;
416				sscanf(line, "%[^\n:]:%d:%n", fileName, &lineNumber, &textPos);
417				// printf("sscanf(\"%s\") -> %s %d %d\n", line, fileName,
418				//		lineNumber, textPos);
419				if (textPos > 0) {
420					if (strcmp(fileName, currentFileName) != 0) {
421						fTarget.SendMessage(&message);
422
423						strncpy(currentFileName, fileName,
424							sizeof(currentFileName));
425
426						message.MakeEmpty();
427						message.what = MSG_REPORT_RESULT;
428						message.AddString("filename", fileName);
429
430						BEntry entry(fileName);
431						entry_ref ref;
432						entry.GetRef(&ref);
433						message.AddRef("ref", &ref);
434					}
435
436					char* text = &line[strlen(fileName)+1];
437					// printf("[%s] %s", fileName, text);
438					if (fEncoding > 0) {
439						char* tempdup = strdup_to_utf8(fEncoding, text,
440							strlen(text));
441						message.AddString("text", tempdup);
442						free(tempdup);
443					} else {
444						message.AddString("text", text);
445					}
446					message.AddInt32("line", lineNumber);
447				}
448			} else {
449				canReadOutput = false;
450			}
451		}
452		if (canReadErrors && FD_ISSET(err, &readSet)) {
453			if (fgets(line, sizeof(line), errors) != NULL) {
454				// printf("ERROR: %s", line);
455				if (message.HasString("text"))
456					fTarget.SendMessage(&message);
457				currentFileName[0] = '\0';
458
459				message.MakeEmpty();
460				message.what = MSG_REPORT_ERROR;
461				message.AddString("error", line);
462				fTarget.SendMessage(&message);
463			} else {
464				canReadErrors = false;
465			}
466		}
467	}
468
469	// send last pending message, if any
470	if (message.HasString("text"))
471		fTarget.SendMessage(&message);
472
473	printf("Done.\n");
474	fclose(output);
475	fclose(errors);
476
477	close(out);
478	close(err);
479
480	fMustQuit = true;
481	int32 exitValue;
482	wait_for_thread(xargsThread, &exitValue);
483	wait_for_thread(writerThread, &exitValue);
484
485	message.MakeEmpty();
486	message.what = MSG_SEARCH_FINISHED;
487	fTarget.SendMessage(&message);
488
489	return 0;
490}
491
492
493void
494Grepper::_SetPattern(const char* src)
495{
496	if (src == NULL)
497		return;
498
499	fPattern = strdup(src);
500}
501
502
503bool
504Grepper::_EscapeSpecialChars(char* buffer, ssize_t bufferSize)
505{
506	char* copy = strdup(buffer);
507	char* start = buffer;
508	uint32 len = strlen(copy);
509	bool result = true;
510	for (uint32 count = 0; count < len; ++count) {
511		if (copy[count] == '\'' || copy[count] == '\\'
512			|| copy[count] == ' ' || copy[count] == '\n'
513			|| copy[count] == '"')
514			*buffer++ = '\\';
515		if (buffer - start == bufferSize - 1) {
516			result = false;
517			break;
518		}
519		*buffer++ = copy[count];
520	}
521	*buffer = '\0';
522	free(copy);
523	return result;
524}
525