1/*
2 * Copyright 2011-2016, Haiku, Inc. All rights reserved.
3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4 */
5
6
7#include <mail_util.h>
8
9#include <stdlib.h>
10#include <strings.h>
11#include <stdio.h>
12#define __USE_GNU
13#include <regex.h>
14#include <ctype.h>
15#include <errno.h>
16
17#include <FindDirectory.h>
18#include <List.h>
19#include <Locker.h>
20#include <parsedate.h>
21#include <Path.h>
22#include <String.h>
23#include <UTF8.h>
24
25#include <mail_encoding.h>
26
27#include <AttributeUtilities.h>
28#include <CharacterSet.h>
29#include <CharacterSetRoster.h>
30
31
32using namespace BPrivate;
33
34
35#define CRLF   "\r\n"
36
37struct CharsetConversionEntry {
38	const char *charset;
39	uint32 flavor;
40};
41
42extern const CharsetConversionEntry mail_charsets[] = {
43	// In order of authority, so when searching for the name for a particular
44	// numbered conversion, start at the beginning of the array.
45	{"iso-8859-1",  B_ISO1_CONVERSION}, // MIME STANDARD
46	{"iso-8859-2",  B_ISO2_CONVERSION}, // MIME STANDARD
47	{"iso-8859-3",  B_ISO3_CONVERSION}, // MIME STANDARD
48	{"iso-8859-4",  B_ISO4_CONVERSION}, // MIME STANDARD
49	{"iso-8859-5",  B_ISO5_CONVERSION}, // MIME STANDARD
50	{"iso-8859-6",  B_ISO6_CONVERSION}, // MIME STANDARD
51	{"iso-8859-7",  B_ISO7_CONVERSION}, // MIME STANDARD
52	{"iso-8859-8",  B_ISO8_CONVERSION}, // MIME STANDARD
53	{"iso-8859-9",  B_ISO9_CONVERSION}, // MIME STANDARD
54	{"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD
55	{"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD
56	{"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD
57	{"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD
58
59	{"shift_jis",	B_SJIS_CONVERSION}, // MIME STANDARD
60	{"shift-jis",	B_SJIS_CONVERSION},
61	{"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD
62	{"euc-jp",		B_EUC_CONVERSION}, // MIME STANDARD
63
64	{"euc-kr",      B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD
65	{"ksc5601",		B_EUC_KR_CONVERSION},    // Not sure if 7 or 8 bit. // COMPATIBLE?
66	{"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software
67
68	{"koi8-r",      B_KOI8R_CONVERSION},           // MIME STANDARD
69	{"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD
70	{"windows-1252",B_MS_WINDOWS_CONVERSION},      // MIME STANDARD
71
72	{"dos-437",     B_MS_DOS_CONVERSION},     // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? )
73	{"dos-866",     B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? )
74	{"x-mac-roman", B_MAC_ROMAN_CONVERSION},  // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? )
75
76    {"big5",        24}, // MIME STANDARD
77
78    {"gb18030",     25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? )
79    {"gb2312",      25}, // COMPATIBLE
80    {"gbk",         25}, // COMPATIBLE
81
82	/* {"utf-16",		B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */
83	{"us-ascii",	B_MAIL_US_ASCII_CONVERSION},                                  // MIME STANDARD
84	{"utf-8",		B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD
85
86	{NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */
87};
88
89
90static int32 gLocker = 0;
91static size_t gNsub = 1;
92static re_pattern_buffer gRe;
93static re_pattern_buffer *gRebuf = NULL;
94static unsigned char gTranslation[256];
95
96
97static int
98handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength,
99	size_t *sourceLength)
100{
101	char *string = *buffer;
102	int32 length = *sourceLength;
103	int32 i;
104
105	// check for 8-bit characters
106	for (i = 0;i < length;i++)
107		if (string[i] & 0x80)
108			break;
109	if (i == length)
110		return false;
111
112	// check for groups of 8-bit characters - this code is not very smart;
113	// it just can detect some sort of single-byte encoded stuff, the rest
114	// is regarded as UTF-8
115
116	int32 singletons = 0,doubles = 0;
117
118	for (i = 0;i < length;i++)
119	{
120		if (string[i] & 0x80)
121		{
122			if ((string[i + 1] & 0x80) == 0)
123				singletons++;
124			else doubles++;
125			i++;
126		}
127	}
128
129	if (singletons != 0)	// can't be valid UTF-8 anymore, so we assume ISO-Latin-1
130	{
131		int32 state = 0;
132		// just to be sure
133		int32 destLength = length * 4 + 1;
134		int32 destBufferLength = destLength;
135		char *dest = (char*)malloc(destLength);
136		if (dest == NULL)
137			return 0;
138
139		if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest,
140			&destLength, &state) == B_OK) {
141			*buffer = dest;
142			*bufferLength = destBufferLength;
143			*sourceLength = destLength;
144			return true;
145		}
146		free(dest);
147		return false;
148	}
149
150	// we assume a valid UTF-8 string here, but yes, we don't check it
151	return true;
152}
153
154
155// #pragma mark -
156
157
158status_t
159write_read_attr(BNode& node, read_flags flag)
160{
161	if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
162			< 0)
163		return B_ERROR;
164
165	// Manage the status string only if it currently has a known state
166	BString currentStatus;
167	if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &currentStatus) == B_OK
168		&& currentStatus.ICompare("New") != 0
169		&& currentStatus.ICompare("Read") != 0
170		&& currentStatus.ICompare("Seen") != 0) {
171		return B_OK;
172	}
173
174	BString statusString = flag == B_READ ? "Read"
175		: flag == B_SEEN ? "Seen" : "New";
176	if (node.WriteAttrString(B_MAIL_ATTR_STATUS, &statusString) < 0)
177		return B_ERROR;
178
179	return B_OK;
180}
181
182
183status_t
184read_read_attr(BNode& node, read_flags& flag)
185{
186	if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
187			== sizeof(int32))
188		return B_OK;
189
190	BString statusString;
191	if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) {
192		if (statusString.ICompare("New"))
193			flag = B_UNREAD;
194		else
195			flag = B_READ;
196
197		return B_OK;
198	}
199
200	return B_ERROR;
201}
202
203
204// The next couple of functions are our wrapper around convert_to_utf8 and
205// convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by
206// specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation.
207// It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION.
208
209
210status_t
211mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen,
212	char *dst, int32 *dstLen, int32 *state, char substitute)
213{
214	int32 copyAmount;
215	char *originalDst = dst;
216	status_t returnCode = -1;
217
218	if (srcEncoding == B_MAIL_UTF8_CONVERSION) {
219		copyAmount = *srcLen;
220		if (*dstLen < copyAmount)
221			copyAmount = *dstLen;
222		memcpy (dst, src, copyAmount);
223		*srcLen = copyAmount;
224		*dstLen = copyAmount;
225		returnCode = B_OK;
226	} else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) {
227		int32 i;
228		unsigned char letter;
229		copyAmount = *srcLen;
230		if (*dstLen < copyAmount)
231			copyAmount = *dstLen;
232		for (i = 0; i < copyAmount; i++) {
233			letter = *src++;
234			if (letter > 0x80U)
235				// Invalid, could also use substitute, but better to strip high bit.
236				*dst++ = letter - 0x80U;
237			else if (letter == 0x80U)
238				// Can't convert to 0x00 since that's NUL, which would cause problems.
239				*dst++ = substitute;
240			else
241				*dst++ = letter;
242		}
243		*srcLen = copyAmount;
244		*dstLen = copyAmount;
245		returnCode = B_OK;
246	} else
247		returnCode = convert_to_utf8 (srcEncoding, src, srcLen,
248			dst, dstLen, state, substitute);
249
250	if (returnCode == B_OK) {
251		// Replace spurious NUL bytes, which should normally not be in the
252		// output of the decoding (not normal UTF-8 characters, and no NULs are
253		// in our usual input strings).  They happen for some odd ISO-2022-JP
254		// byte pair combinations which are improperly handled by the BeOS
255		// routines.  Like "\e$ByD\e(B" where \e is the ESC character $1B, the
256		// first ESC $ B switches to a Japanese character set, then the next
257		// two bytes "yD" specify a character, then ESC ( B switches back to
258		// the ASCII character set.  The UTF-8 conversion yields a NUL byte.
259		int32 i;
260		for (i = 0; i < *dstLen; i++)
261			if (originalDst[i] == 0)
262				originalDst[i] = substitute;
263	}
264	return returnCode;
265}
266
267
268status_t
269mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen,
270	char *dst, int32 *dstLen, int32 *state, char substitute)
271{
272	int32 copyAmount;
273	status_t errorCode;
274	int32 originalDstLen = *dstLen;
275	int32 tempDstLen;
276	int32 tempSrcLen;
277
278	if (dstEncoding == B_MAIL_UTF8_CONVERSION) {
279		copyAmount = *srcLen;
280		if (*dstLen < copyAmount)
281			copyAmount = *dstLen;
282		memcpy (dst, src, copyAmount);
283		*srcLen = copyAmount;
284		*dstLen = copyAmount;
285		return B_OK;
286	}
287
288	if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) {
289		int32 characterLength;
290		int32 dstRemaining = *dstLen;
291		unsigned char letter;
292		int32 srcRemaining = *srcLen;
293
294		// state contains the number of source bytes to skip, left over from a
295		// partial UTF-8 character split over the end of the buffer from last
296		// time.
297		if (srcRemaining <= *state) {
298			*state -= srcRemaining;
299			*dstLen = 0;
300			return B_OK;
301		}
302		srcRemaining -= *state;
303		src += *state;
304		*state = 0;
305
306		while (true) {
307			if (srcRemaining <= 0 || dstRemaining <= 0)
308				break;
309			letter = *src;
310			if (letter < 0x80)
311				characterLength = 1; // Regular ASCII equivalent code.
312			else if (letter < 0xC0)
313				characterLength = 1; // Invalid in-between data byte 10xxxxxx.
314			else if (letter < 0xE0)
315				characterLength = 2;
316			else if (letter < 0xF0)
317				characterLength = 3;
318			else if (letter < 0xF8)
319				characterLength = 4;
320			else if (letter < 0xFC)
321				characterLength = 5;
322			else if (letter < 0xFE)
323				characterLength = 6;
324			else
325				characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8.
326			if (letter < 0x80)
327				*dst++ = *src;
328			else
329				*dst++ = substitute;
330			dstRemaining--;
331			if (srcRemaining < characterLength) {
332				// Character split past the end of the buffer.
333				*state = characterLength - srcRemaining;
334				srcRemaining = 0;
335			} else {
336				src += characterLength;
337				srcRemaining -= characterLength;
338			}
339		}
340		// Update with the amounts used.
341		*srcLen = *srcLen - srcRemaining;
342		*dstLen = *dstLen - dstRemaining;
343		return B_OK;
344	}
345
346	errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state,
347		substitute);
348	if (errorCode != B_OK)
349		return errorCode;
350
351	if (dstEncoding != B_JIS_CONVERSION)
352		return B_OK;
353
354	// B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different
355	// character subsets.  For E-mail headers (and other uses), it needs to be
356	// switched back to ASCII at the end (otherwise the last character gets
357	// lost or other weird things happen in the headers).  Note that we can't
358	// just append the escape code since the convert_from_utf8 "state" will be
359	// wrong.  So we append an ASCII letter and throw it away, leaving just the
360	// escape code.  Well, it actually switches to the Roman character set, not
361	// ASCII, but that should be OK.
362
363	tempDstLen = originalDstLen - *dstLen;
364	if (tempDstLen < 3) // Not enough space remaining in the output.
365		return B_OK; // Sort of an error, but we did convert the rest OK.
366	tempSrcLen = 1;
367	errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen,
368		dst + *dstLen, &tempDstLen, state, substitute);
369	if (errorCode != B_OK)
370		return errorCode;
371	*dstLen += tempDstLen - 1 /* don't include the ASCII letter */;
372	return B_OK;
373}
374
375
376ssize_t
377rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen)
378{
379	char *head, *tail;
380	char *charset, *encoding, *end;
381	ssize_t ret = B_OK;
382
383	if (bufp == NULL || *bufp == NULL)
384		return -1;
385
386	char *string = *bufp;
387
388	//---------Handle *&&^%*&^ non-RFC compliant, 8bit mail
389	if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen))
390		return strLen;
391
392	// set up string length
393	if (strLen == 0)
394		strLen = strlen(*bufp);
395	char lastChar = (*bufp)[strLen];
396	(*bufp)[strLen] = '\0';
397
398	//---------Whew! Now for RFC compliant mail
399	bool encodedWordFoundPreviously = false;
400	for (head = tail = string;
401		((charset = strstr(tail, "=?")) != NULL)
402		&& (((encoding = strchr(charset + 2, '?')) != NULL)
403			&& encoding[1] && (encoding[2] == '?') && encoding[3])
404		&& (end = strstr(encoding + 3, "?=")) != NULL;
405		// found "=?...charset...?e?...text...?=   (e == encoding)
406		//        ^charset       ^encoding    ^end
407		tail = end)
408	{
409		// Copy non-encoded text (from tail up to charset) to the output.
410		// Ignore spaces between two encoded "words".  RFC2047 says the words
411		// should be concatenated without the space (designed for Asian
412		// sentences which have no spaces yet need to be broken into "words" to
413		// keep within the line length limits).
414		bool nonSpaceFound = false;
415		for (int i = 0; i < charset-tail; i++) {
416			if (!isspace (tail[i])) {
417				nonSpaceFound = true;
418				break;
419			}
420		}
421		if (!encodedWordFoundPreviously || nonSpaceFound) {
422			if (string != tail && tail != charset)
423				memmove(string, tail, charset-tail);
424			string += charset-tail;
425		}
426		tail = charset;
427		encodedWordFoundPreviously = true;
428
429		// move things to point at what they should:
430		//   =?...charset...?e?...text...?=   (e == encoding)
431		//     ^charset      ^encoding     ^end
432		charset += 2;
433		encoding += 1;
434		end += 2;
435
436		// find the charset this text is in now
437		size_t cLen = encoding - 1 - charset;
438		bool base64encoded = toupper(*encoding) == 'B';
439
440		uint32 convertID = B_MAIL_NULL_CONVERSION;
441		char charsetName[cLen + 1];
442		memcpy(charsetName, charset, cLen);
443		charsetName[cLen] = '\0';
444		if (strcasecmp(charsetName, "us-ascii") == 0) {
445			convertID = B_MAIL_US_ASCII_CONVERSION;
446		} else if (strcasecmp(charsetName, "utf-8") == 0) {
447			convertID = B_MAIL_UTF8_CONVERSION;
448		} else {
449			const BCharacterSet* charSet
450				= BCharacterSetRoster::FindCharacterSetByName(charsetName);
451			if (charSet != NULL) {
452				convertID = charSet->GetConversionID();
453			}
454		}
455		if (convertID == B_MAIL_NULL_CONVERSION) {
456			// unidentified charset
457			// what to do? doing nothing skips the encoded text;
458			// but we should keep it: we copy it to the output.
459			if (string != tail && tail != end)
460				memmove(string, tail, end-tail);
461			string += end-tail;
462			continue;
463		}
464		// else we've successfully identified the charset
465
466		char *src = encoding+2;
467		int32 srcLen = end - 2 - src;
468		// encoded text: src..src+srcLen
469
470		// decode text, get decoded length (reducing xforms)
471		srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1)
472			: decode_base64(src, src, srcLen);
473
474		// allocate space for the converted text
475		int32 dstLen = end-string + *bufLen-strLen;
476		char *dst = (char*)malloc(dstLen);
477		int32 cvLen = srcLen;
478		int32 convState = 0;
479
480		//
481		// do the conversion
482		//
483		ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen,
484			&convState);
485		if (ret != B_OK) {
486			// what to do? doing nothing skips the encoded text
487			// but we should keep it: we copy it to the output.
488
489			free(dst);
490
491			if (string != tail && tail != end)
492				memmove(string, tail, end-tail);
493			string += end-tail;
494			continue;
495		}
496		/* convert_to_ is either returning something wrong or my
497		   test data is screwed up.  Whatever it is, Not Enough
498		   Space is not the only cause of the below, so we just
499		   assume it succeeds if it converts anything at all.
500		else if (cvLen < srcLen)
501		{
502			// not enough room to convert the data;
503			// grow *buf and retry
504
505			free(dst);
506
507			char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1));
508			if (temp == NULL)
509			{
510				ret = B_NO_MEMORY;
511				break;
512			}
513
514			*bufp = temp;
515			*bufLen = 2*(*bufLen + 1);
516
517			string = *bufp + (string-head);
518			tail = *bufp + (tail-head);
519			charset = *bufp + (charset-head);
520			encoding = *bufp + (encoding-head);
521			end = *bufp + (end-head);
522			src = *bufp + (src-head);
523			head = *bufp;
524			continue;
525		}
526		*/
527		else {
528			if (dstLen > end-string) {
529				// copy the string forward...
530				memmove(string+dstLen, end, strLen - (end-head) + 1);
531				strLen += string+dstLen - end;
532				end = string + dstLen;
533			}
534
535			memcpy(string, dst, dstLen);
536			string += dstLen;
537			free(dst);
538			continue;
539		}
540	}
541
542	// copy everything that's left
543	size_t tailLen = strLen - (tail - head);
544	memmove(string, tail, tailLen+1);
545	string += tailLen;
546
547	// replace the last char
548	(*bufp)[strLen] = lastChar;
549
550	return ret < B_OK ? ret : string-head;
551}
552
553
554ssize_t
555utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding)
556{
557	struct word {
558		BString	originalWord;
559		BString	convertedWord;
560		bool	needsEncoding;
561
562		// Convert the word from UTF-8 to the desired character set.  The
563		// converted version also includes the escape codes to return to ASCII
564		// mode, if relevant.  Also note if it uses unprintable characters,
565		// which means it will need that special encoding treatment later.
566		void ConvertWordToCharset (uint32 charset) {
567			int32 state = 0;
568			int32 originalLength = originalWord.Length();
569			int32 convertedLength = originalLength * 5 + 1;
570			char *convertedBuffer = convertedWord.LockBuffer (convertedLength);
571			mail_convert_from_utf8 (charset, originalWord.String(),
572				&originalLength, convertedBuffer, &convertedLength, &state);
573			for (int i = 0; i < convertedLength; i++) {
574				if ((convertedBuffer[i] & (1 << 7)) ||
575					(convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) {
576					needsEncoding = true;
577					break;
578				}
579			}
580			convertedWord.UnlockBuffer (convertedLength);
581		};
582	};
583	struct word *currentWord;
584	BList words;
585
586	// Break the header into words.  White space characters (including tabs and
587	// newlines) separate the words.  Each word includes any space before it as
588	// part of the word.  Actually, quotes and other special characters
589	// (",()<>@) are treated as separate words of their own so that they don't
590	// get encoded (because MIME headers get the quotes parsed before character
591	// set unconversion is done).  The reader is supposed to ignore all white
592	// space between encoded words, which can be inserted so that older mail
593	// parsers don't have overly long line length problems.
594
595	const char *source = *bufp;
596	const char *bufEnd = *bufp + length;
597	const char *specialChars = "\"()<>@,";
598
599	while (source < bufEnd) {
600		currentWord = new struct word;
601		currentWord->needsEncoding = false;
602
603		int wordEnd = 0;
604
605		// Include leading spaces as part of the word.
606		while (source + wordEnd < bufEnd && isspace (source[wordEnd]))
607			wordEnd++;
608
609		if (source + wordEnd < bufEnd &&
610			strchr (specialChars, source[wordEnd]) != NULL) {
611			// Got a quote mark or other special character, which is treated as
612			// a word in itself since it shouldn't be encoded, which would hide
613			// it from the mail system.
614			wordEnd++;
615		} else {
616			// Find the end of the word.  Leave wordEnd pointing just after the
617			// last character in the word.
618			while (source + wordEnd < bufEnd) {
619				if (isspace(source[wordEnd]) ||
620					strchr (specialChars, source[wordEnd]) != NULL)
621					break;
622				if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ &&
623					0xC0 == (0xC0 & (unsigned int) source[wordEnd])) {
624					// No English words are that long (46 is the longest),
625					// break up what is likely Asian text (which has no spaces)
626					// at the start of the next non-ASCII UTF-8 character (high
627					// two bits are both ones).  Note that two encoded words in
628					// a row get joined together, even if there is a space
629					// between them in the final output text, according to the
630					// standard.  Next word will also be conveniently get
631					// encoded due to the 0xC0 test.
632					currentWord->needsEncoding = true;
633					break;
634				}
635				wordEnd++;
636			}
637		}
638		currentWord->originalWord.SetTo (source, wordEnd);
639		currentWord->ConvertWordToCharset (charset);
640		words.AddItem(currentWord);
641		source += wordEnd;
642	}
643
644	// Combine adjacent words which contain unprintable text so that the
645	// overhead of switching back and forth between regular text and specially
646	// encoded text is reduced.  However, the combined word must be shorter
647	// than the maximum of 75 bytes, including character set specification and
648	// all those delimiters (worst case 22 bytes of overhead).
649
650	struct word *run;
651
652	for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) {
653		if (!currentWord->needsEncoding)
654			continue; // No need to combine unencoded words.
655		for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) {
656			if (!run->needsEncoding)
657				break; // Don't want to combine encoded and unencoded words.
658			if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) {
659				currentWord->originalWord.Append (run->originalWord);
660				currentWord->ConvertWordToCharset (charset);
661				words.RemoveItem(g);
662				delete run;
663				g--;
664			} else // Can't merge this word, result would be too long.
665				break;
666		}
667	}
668
669	// Combine the encoded and unencoded words into one line, doing the
670	// quoted-printable or base64 encoding.  Insert an extra space between
671	// words which are both encoded to make word wrapping easier, since there
672	// is normally none, and you're allowed to insert space (the receiver
673	// throws it away if it is between encoded words).
674
675	BString rfc2047;
676	bool	previousWordNeededEncoding = false;
677
678	const char *charset_dec = "none-bug";
679	for (int32 i = 0; mail_charsets[i].charset != NULL; i++) {
680		if (mail_charsets[i].flavor == charset) {
681			charset_dec = mail_charsets[i].charset;
682			break;
683		}
684	}
685
686	while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) {
687		if ((encoding != quoted_printable && encoding != base64) ||
688		!currentWord->needsEncoding) {
689			rfc2047.Append (currentWord->convertedWord);
690		} else {
691			// This word needs encoding.  Try to insert a space between it and
692			// the previous word.
693			if (previousWordNeededEncoding)
694				rfc2047 << ' '; // Can insert as many spaces as you want between encoded words.
695			else {
696				// Previous word is not encoded, spaces are significant.  Try
697				// to move a space from the start of this word to be outside of
698				// the encoded text, so that there is a bit of space between
699				// this word and the previous one to enhance word wrapping
700				// chances later on.
701				if (currentWord->originalWord.Length() > 1 &&
702					isspace (currentWord->originalWord[0])) {
703					rfc2047 << currentWord->originalWord[0];
704					currentWord->originalWord.Remove (0 /* offset */, 1 /* length */);
705					currentWord->ConvertWordToCharset (charset);
706				}
707			}
708
709			char *encoded = NULL;
710			ssize_t encoded_len = 0;
711			int32 convertedLength = currentWord->convertedWord.Length ();
712			const char *convertedBuffer = currentWord->convertedWord.String ();
713
714			switch (encoding) {
715				case quoted_printable:
716					encoded = (char *) malloc (convertedLength * 3);
717					encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */);
718					break;
719				case base64:
720					encoded = (char *) malloc (convertedLength * 2);
721					encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */);
722					break;
723				default: // Unknown encoding type, shouldn't happen.
724					encoded = (char *) convertedBuffer;
725					encoded_len = convertedLength;
726					break;
727			}
728
729			rfc2047 << "=?" << charset_dec << '?' << encoding << '?';
730			rfc2047.Append (encoded, encoded_len);
731			rfc2047 << "?=";
732
733			if (encoding == quoted_printable || encoding == base64)
734				free(encoded);
735		}
736		previousWordNeededEncoding = currentWord->needsEncoding;
737		delete currentWord;
738	}
739
740	free(*bufp);
741
742	ssize_t finalLength = rfc2047.Length ();
743	*bufp = (char *) (malloc (finalLength + 1));
744	memcpy (*bufp, rfc2047.String(), finalLength);
745	(*bufp)[finalLength] = 0;
746
747	return finalLength;
748}
749
750
751void
752FoldLineAtWhiteSpaceAndAddCRLF(BString &string)
753{
754	int inputLength = string.Length();
755	int lineStartIndex;
756	const int maxLineLength = 78; // Doesn't include CRLF.
757	BString output;
758	int splitIndex;
759	int tempIndex;
760
761	lineStartIndex = 0;
762	while (true) {
763		// If we don't need to wrap the text, just output the remainder, if any.
764
765		if (lineStartIndex + maxLineLength >= inputLength) {
766			if (lineStartIndex < inputLength) {
767				output.Insert (string, lineStartIndex /* source offset */,
768					inputLength - lineStartIndex /* count */,
769					output.Length() /* insert at */);
770				output.Append (CRLF);
771			}
772			break;
773		}
774
775		// Look ahead for a convenient spot to split it, between a comma and
776		// space, which you often see between e-mail addresses like this:
777		// "Joe Who" joe@dot.com, "Someone Else" else@blot.com
778
779		tempIndex = lineStartIndex + maxLineLength;
780		if (tempIndex > inputLength)
781			tempIndex = inputLength;
782		splitIndex = string.FindLast (", ", tempIndex);
783		if (splitIndex >= lineStartIndex)
784			splitIndex++; // Point to the space character.
785
786		// If none of those exist, try splitting at any white space.
787
788		if (splitIndex <= lineStartIndex)
789			splitIndex = string.FindLast (" ", tempIndex);
790		if (splitIndex <= lineStartIndex)
791			splitIndex = string.FindLast ("\t", tempIndex);
792
793		// If none of those exist, allow for a longer word - split at the next
794		// available white space.
795
796		if (splitIndex <= lineStartIndex)
797			splitIndex = string.FindFirst (" ", lineStartIndex + 1);
798		if (splitIndex <= lineStartIndex)
799			splitIndex = string.FindFirst ("\t", lineStartIndex + 1);
800
801		// Give up, the whole rest of the line can't be split, just dump it
802		// out.
803
804		if (splitIndex <= lineStartIndex) {
805			if (lineStartIndex < inputLength) {
806				output.Insert (string, lineStartIndex /* source offset */,
807					inputLength - lineStartIndex /* count */,
808					output.Length() /* insert at */);
809				output.Append (CRLF);
810			}
811			break;
812		}
813
814		// Do the split.  The current line up to but not including the space
815		// gets output, followed by a CRLF.  The space remains to become the
816		// start of the next line (and that tells the message reader that it is
817		// a continuation line).
818
819		output.Insert (string, lineStartIndex /* source offset */,
820			splitIndex - lineStartIndex /* count */,
821			output.Length() /* insert at */);
822		output.Append (CRLF);
823		lineStartIndex = splitIndex;
824	}
825	string.SetTo (output);
826}
827
828
829ssize_t
830readfoldedline(FILE *file, char **buffer, size_t *buflen)
831{
832	ssize_t len = buflen && *buflen ? *buflen : 0;
833	char * buf = buffer && *buffer ? *buffer : NULL;
834	ssize_t cnt = 0; // Number of characters currently in the buffer.
835	int c;
836
837	while (true) {
838		// Make sure there is space in the buffer for two more characters (one
839		// for the next character, and one for the end of string NUL byte).
840		if (buf == NULL || cnt + 2 >= len) {
841			char *temp = (char *)realloc(buf, len + 64);
842			if (temp == NULL) {
843				// Out of memory, however existing buffer remains allocated.
844				cnt = ENOMEM;
845				break;
846			}
847			len += 64;
848			buf = temp;
849		}
850
851		// Read the next character, or end of file, or IO error.
852		if ((c = fgetc(file)) == EOF) {
853			if (ferror (file)) {
854				cnt = errno;
855				if (cnt >= 0)
856					cnt = -1; // Error codes must be negative.
857			} else {
858				// Really is end of file.  Also make it end of line if there is
859				// some text already read in.  If the first thing read was EOF,
860				// just return an empty string.
861				if (cnt > 0) {
862					buf[cnt++] = '\n';
863					if (buf[cnt-2] == '\r') {
864						buf[cnt-2] = '\n';
865						--cnt;
866					}
867				}
868			}
869			break;
870		}
871
872		buf[cnt++] = c;
873
874		if (c == '\n') {
875			// Convert CRLF end of line to just a LF.  Do it before folding, in
876			// case we don't need to fold.
877			if (cnt >= 2 && buf[cnt-2] == '\r') {
878				buf[cnt-2] = '\n';
879				--cnt;
880			}
881			// If the current line is empty then return it (so that empty lines
882			// don't disappear if the next line starts with a space).
883			if (cnt <= 1)
884				break;
885			// Fold if first character on the next line is whitespace.
886			c = fgetc(file); // Note it's OK to read EOF and ungetc it too.
887			if (c == ' ' || c == '\t')
888				buf[cnt-1] = c; // Replace \n with the white space character.
889			else {
890				// Not folding, we finished reading a line; break out of the loop
891				ungetc(c,file);
892				break;
893			}
894		}
895	}
896
897	if (buf != NULL && cnt >= 0)
898		buf[cnt] = '\0';
899
900	if (buffer)
901		*buffer = buf;
902	else if (buf)
903		free(buf);
904
905	if (buflen)
906		*buflen = len;
907
908	return cnt;
909}
910
911
912ssize_t
913readfoldedline(BPositionIO &in, char **buffer, size_t *buflen)
914{
915	ssize_t len = buflen && *buflen ? *buflen : 0;
916	char * buf = buffer && *buffer ? *buffer : NULL;
917	ssize_t cnt = 0; // Number of characters currently in the buffer.
918	char c;
919	status_t errorCode;
920
921	while (true) {
922		// Make sure there is space in the buffer for two more characters (one
923		// for the next character, and one for the end of string NUL byte).
924		if (buf == NULL || cnt + 2 >= len) {
925			char *temp = (char *)realloc(buf, len + 64);
926			if (temp == NULL) {
927				// Out of memory, however existing buffer remains allocated.
928				cnt = ENOMEM;
929				break;
930			}
931			len += 64;
932			buf = temp;
933		}
934
935		errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered.
936		if (errorCode != 1) {
937			if (errorCode < 0) {
938				cnt = errorCode; // IO error encountered, just return the code.
939			} else {
940				// Really is end of file.  Also make it end of line if there is
941				// some text already read in.  If the first thing read was EOF,
942				// just return an empty string.
943				if (cnt > 0) {
944					buf[cnt++] = '\n';
945					if (buf[cnt-2] == '\r') {
946						buf[cnt-2] = '\n';
947						--cnt;
948					}
949				}
950			}
951			break;
952		}
953
954		buf[cnt++] = c;
955
956		if (c == '\n') {
957			// Convert CRLF end of line to just a LF.  Do it before folding, in
958			// case we don't need to fold.
959			if (cnt >= 2 && buf[cnt-2] == '\r') {
960				buf[cnt-2] = '\n';
961				--cnt;
962			}
963			// If the current line is empty then return it (so that empty lines
964			// don't disappear if the next line starts with a space).
965			if (cnt <= 1)
966				break;
967			// if first character on the next line is whitespace, fold lines
968			errorCode = in.Read(&c,1);
969			if (errorCode == 1) {
970				if (c == ' ' || c == '\t')
971					buf[cnt-1] = c; // Replace \n with the white space character.
972				else {
973					// Not folding, we finished reading a whole line.
974					in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read.
975					break;
976				}
977			} else if (errorCode < 0) {
978				cnt = errorCode;
979				break;
980			} else // No next line; at the end of the file.  Return the line.
981				break;
982		}
983	}
984
985	if (buf != NULL && cnt >= 0)
986		buf[cnt] = '\0';
987
988	if (buffer)
989		*buffer = buf;
990	else if (buf)
991		free(buf);
992
993	if (buflen)
994		*buflen = len;
995
996	return cnt;
997}
998
999
1000ssize_t
1001nextfoldedline(const char** header, char **buffer, size_t *buflen)
1002{
1003	ssize_t len = buflen && *buflen ? *buflen : 0;
1004	char * buf = buffer && *buffer ? *buffer : NULL;
1005	ssize_t cnt = 0; // Number of characters currently in the buffer.
1006	char c;
1007
1008	while (true)
1009	{
1010		// Make sure there is space in the buffer for two more characters (one
1011		// for the next character, and one for the end of string NUL byte).
1012		if (buf == NULL || cnt + 2 >= len)
1013		{
1014			char *temp = (char *)realloc(buf, len + 64);
1015			if (temp == NULL) {
1016				// Out of memory, however existing buffer remains allocated.
1017				cnt = ENOMEM;
1018				break;
1019			}
1020			len += 64;
1021			buf = temp;
1022		}
1023
1024		// Read the next character, or end of file.
1025		if ((c = *(*header)++) == 0) {
1026			// End of file.  Also make it end of line if there is some text
1027			// already read in.  If the first thing read was EOF, just return
1028			// an empty string.
1029			if (cnt > 0) {
1030				buf[cnt++] = '\n';
1031				if (buf[cnt-2] == '\r') {
1032					buf[cnt-2] = '\n';
1033					--cnt;
1034				}
1035			}
1036			break;
1037		}
1038
1039		buf[cnt++] = c;
1040
1041		if (c == '\n') {
1042			// Convert CRLF end of line to just a LF.  Do it before folding, in
1043			// case we don't need to fold.
1044			if (cnt >= 2 && buf[cnt-2] == '\r') {
1045				buf[cnt-2] = '\n';
1046				--cnt;
1047			}
1048			// If the current line is empty then return it (so that empty lines
1049			// don't disappear if the next line starts with a space).
1050			if (cnt <= 1)
1051				break;
1052			// if first character on the next line is whitespace, fold lines
1053			c = *(*header)++;
1054			if (c == ' ' || c == '\t')
1055				buf[cnt-1] = c; // Replace \n with the white space character.
1056			else {
1057				// Not folding, we finished reading a line; break out of the loop
1058				(*header)--; // Undo read of the non-whitespace.
1059				break;
1060			}
1061		}
1062	}
1063
1064
1065	if (buf != NULL && cnt >= 0)
1066		buf[cnt] = '\0';
1067
1068	if (buffer)
1069		*buffer = buf;
1070	else if (buf)
1071		free(buf);
1072
1073	if (buflen)
1074		*buflen = len;
1075
1076	return cnt;
1077}
1078
1079
1080void
1081trim_white_space(BString &string)
1082{
1083	int32 i;
1084	int32 length = string.Length();
1085	char *buffer = string.LockBuffer(length + 1);
1086
1087	while (length > 0 && isspace(buffer[length - 1]))
1088		length--;
1089	buffer[length] = '\0';
1090
1091	for (i = 0; buffer[i] && isspace(buffer[i]); i++) {}
1092	if (i != 0) {
1093		length -= i;
1094		memmove(buffer,buffer + i,length + 1);
1095	}
1096	string.UnlockBuffer(length);
1097}
1098
1099
1100/*!	Tries to return a human-readable name from the specified
1101	header parameter (should be from "To:" or "From:").
1102	Tries to return the name rather than the eMail address.
1103*/
1104void
1105extract_address_name(BString &header)
1106{
1107	BString name;
1108	const char *start = header.String();
1109	const char *stop = start + strlen (start);
1110
1111	// Find a string S in the header (email foo) that matches:
1112	//   Old style name in brackets: foo@bar.com (S)
1113	//   New style quotes: "S" <foo@bar.com>
1114	//   New style no quotes if nothing else found: S <foo@bar.com>
1115	//   If nothing else found then use the whole thing: S
1116
1117	for (int i = 0; i <= 3; i++) {
1118		// Set p1 to the first letter in the name and p2 to just past the last
1119		// letter in the name.  p2 stays NULL if a name wasn't found in this
1120		// pass.
1121		const char *p1 = NULL, *p2 = NULL;
1122
1123		switch (i) {
1124			case 0: // foo@bar.com (S)
1125				if ((p1 = strchr(start,'(')) != NULL) {
1126					p1++; // Advance to first letter in the name.
1127					size_t nest = 1; // Handle nested brackets.
1128					for (p2 = p1; p2 < stop; ++p2)
1129					{
1130						if (*p2 == ')')
1131							--nest;
1132						else if (*p2 == '(')
1133							++nest;
1134						if (nest <= 0)
1135							break;
1136					}
1137					if (nest != 0)
1138						p2 = NULL; // False alarm, no terminating bracket.
1139				}
1140				break;
1141			case 1: // "S" <foo@bar.com>
1142				if ((p1 = strchr(start, '\"')) != NULL)
1143					p2 = strchr(++p1, '\"');
1144				break;
1145			case 2: // S <foo@bar.com>
1146				p1 = start;
1147				if (name.Length() == 0)
1148					p2 = strchr(start, '<');
1149				break;
1150			case 3: // S
1151				p1 = start;
1152				if (name.Length() == 0)
1153					p2 = stop;
1154				break;
1155		}
1156
1157		// Remove leading and trailing space-like characters and save the
1158		// result if it is longer than any other likely names found.
1159		if (p2 != NULL) {
1160			while (p1 < p2 && (isspace (*p1)))
1161				++p1;
1162
1163			while (p1 < p2 && (isspace (p2[-1])))
1164				--p2;
1165
1166			int newLength = p2 - p1;
1167			if (name.Length() < newLength)
1168				name.SetTo(p1, newLength);
1169		}
1170	}
1171
1172	int32 lessIndex = name.FindFirst('<');
1173	int32 greaterIndex = name.FindLast('>');
1174
1175	if (lessIndex == 0) {
1176		// Have an address of the form <address> and nothing else, so remove
1177		// the greater and less than signs, if any.
1178		if (greaterIndex > 0)
1179			name.Remove(greaterIndex, 1);
1180		name.Remove(lessIndex, 1);
1181	} else if (lessIndex > 0 && lessIndex < greaterIndex) {
1182		// Yahoo stupidly inserts the e-mail address into the name string, so
1183		// this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com>
1184		name.Remove(lessIndex, greaterIndex - lessIndex + 1);
1185	}
1186
1187	trim_white_space(name);
1188	header = name;
1189}
1190
1191
1192/*!	Given a subject in a BString, remove the extraneous RE: re: and other stuff
1193	to get down to the core subject string, which should be identical for all
1194	messages posted about a topic.  The input string is modified in place to
1195	become the output core subject string.
1196*/
1197void
1198SubjectToThread (BString &string)
1199{
1200// a regex that matches a non-ASCII UTF8 character:
1201#define U8C \
1202	"[\302-\337][\200-\277]" \
1203	"|\340[\302-\337][\200-\277]" \
1204	"|[\341-\357][\200-\277][\200-\277]" \
1205	"|\360[\220-\277][\200-\277][\200-\277]" \
1206	"|[\361-\367][\200-\277][\200-\277][\200-\277]" \
1207	"|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \
1208	"|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \
1209	"|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \
1210	"|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]"
1211
1212#define PATTERN \
1213	"^ +" \
1214	"|^(\\[[^]]*\\])(\\<|  +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1215	"|^(  +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1216	"| *\\(fwd\\) *$"
1217
1218	if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) {
1219		// the idea is to compile the regexp once to speed up testing
1220
1221		for (int i=0; i<256; ++i) gTranslation[i]=i;
1222		for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i);
1223
1224		gRe.translate = gTranslation;
1225		gRe.regs_allocated = REGS_FIXED;
1226		re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;
1227
1228		const char *pattern = PATTERN;
1229		// count subexpressions in PATTERN
1230		for (unsigned int i=0; pattern[i] != 0; ++i)
1231		{
1232			if (pattern[i] == '\\')
1233				++i;
1234			else if (pattern[i] == '(')
1235				++gNsub;
1236		}
1237
1238		const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe);
1239		if (err == NULL)
1240			gRebuf = &gRe;
1241		else
1242			fprintf(stderr, "Failed to compile the regex: %s\n", err);
1243	} else {
1244		int32 tries = 200;
1245		while (gRebuf == NULL && tries-- > 0)
1246			snooze(10000);
1247	}
1248
1249	if (gRebuf) {
1250		struct re_registers regs;
1251		// can't be static if this function is to be thread-safe
1252
1253		regs.num_regs = gNsub;
1254		regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1255		regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1256
1257		for (int start = 0; (start = re_search(gRebuf, string.String(),
1258				string.Length(), 0, string.Length(), &regs)) >= 0;) {
1259			//
1260			// we found something
1261			//
1262
1263			// don't delete [bemaildaemon]...
1264			if (start == regs.start[1])
1265				start = regs.start[2];
1266
1267			string.Remove(start,regs.end[0]-start);
1268			if (start)
1269				string.Insert(' ',1,start);
1270
1271			// TODO: for some subjects this results in an endless loop, check
1272			// why this happen.
1273			if (regs.end[0] - start <= 1)
1274				break;
1275		}
1276
1277		free(regs.start);
1278		free(regs.end);
1279	}
1280
1281	// Finally remove leading and trailing space.  Some software, like
1282	// tm-edit 1.8, appends a space to the subject, which would break
1283	// threading if we left it in.
1284	trim_white_space(string);
1285}
1286
1287
1288/*!	Converts a date to a time.  Handles numeric time zones too, unlike
1289	parsedate().  Returns -1 if it fails.
1290*/
1291time_t
1292ParseDateWithTimeZone(const char *DateString)
1293{
1294	time_t currentTime;
1295	time_t dateAsTime;
1296	char tempDateString[80];
1297	char tempZoneString[6];
1298	time_t zoneDeltaTime;
1299	int zoneIndex;
1300	char *zonePntr;
1301
1302	// See if we can remove the time zone portion.  parsedate understands time
1303	// zone 3 letter names, but doesn't understand the numeric +9999 time zone
1304	// format.  To do: see if a newer parsedate exists.
1305
1306	strncpy (tempDateString, DateString, sizeof (tempDateString));
1307	tempDateString[sizeof (tempDateString) - 1] = 0;
1308
1309	// Remove trailing spaces.
1310	zonePntr = tempDateString + strlen (tempDateString) - 1;
1311	while (zonePntr >= tempDateString && isspace (*zonePntr))
1312		*zonePntr-- = 0;
1313	if (zonePntr < tempDateString)
1314		return -1; // Empty string.
1315
1316	// Remove the trailing time zone in round brackets, like in
1317	// Fri, 22 Feb 2002 15:22:42 EST (-0500)
1318	// Thu, 25 Apr 1996 11:44:19 -0400 (EDT)
1319	if (tempDateString[strlen(tempDateString)-1] == ')')
1320	{
1321		zonePntr = strrchr (tempDateString, '(');
1322		if (zonePntr != NULL)
1323		{
1324			*zonePntr-- = 0; // Zap the '(', then remove trailing spaces.
1325			while (zonePntr >= tempDateString && isspace (*zonePntr))
1326				*zonePntr-- = 0;
1327			if (zonePntr < tempDateString)
1328				return -1; // Empty string.
1329		}
1330	}
1331
1332	// Look for a numeric time zone like  Tue, 30 Dec 2003 05:01:40 +0000
1333	for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--)
1334	{
1335		zonePntr = tempDateString + zoneIndex;
1336		if (zonePntr[0] == '+' || zonePntr[0] == '-')
1337		{
1338			if (zonePntr[1] >= '0' && zonePntr[1] <= '9' &&
1339				zonePntr[2] >= '0' && zonePntr[2] <= '9' &&
1340				zonePntr[3] >= '0' && zonePntr[3] <= '9' &&
1341				zonePntr[4] >= '0' && zonePntr[4] <= '9')
1342				break;
1343		}
1344	}
1345	if (zoneIndex >= 0)
1346	{
1347		// Remove the zone from the date string and any following time zone
1348		// letter codes.  Also put in GMT so that the date gets parsed as GMT.
1349		memcpy (tempZoneString, zonePntr, 5);
1350		tempZoneString [5] = 0;
1351		strcpy (zonePntr, "GMT");
1352	}
1353	else // No numeric time zone found.
1354		strcpy (tempZoneString, "+0000");
1355
1356	time (&currentTime);
1357	dateAsTime = parsedate (tempDateString, currentTime);
1358	if (dateAsTime == (time_t) -1)
1359		return -1; // Failure.
1360
1361	zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes.
1362	tempZoneString[3] = 0;
1363	zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours.
1364	if (tempZoneString[0] == '+')
1365		zoneDeltaTime = 0 - zoneDeltaTime;
1366	dateAsTime += zoneDeltaTime;
1367
1368	return dateAsTime;
1369}
1370
1371
1372/*! Parses a mail header and fills the headers BMessage
1373*/
1374status_t
1375parse_header(BMessage &headers, BPositionIO &input)
1376{
1377	char *buffer = NULL;
1378	size_t bufferSize = 0;
1379	int32 length;
1380
1381	while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) {
1382		--length;
1383			// Don't include the \n at the end of the buffer.
1384
1385		// convert to UTF-8 and null-terminate the buffer
1386		length = rfc2047_to_utf8(&buffer, &bufferSize, length);
1387		buffer[length] = '\0';
1388
1389		const char *delimiter = strstr(buffer, ":");
1390		if (delimiter == NULL)
1391			continue;
1392
1393		BString header(buffer, delimiter - buffer);
1394		header.CapitalizeEachWord();
1395			// unified case for later fetch
1396
1397		delimiter++; // Skip the colon.
1398		// Skip over leading white space and tabs.
1399		// TODO: (comments in brackets).
1400		while (isspace(*delimiter))
1401			delimiter++;
1402
1403		// TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s)
1404		headers.AddString(header.String(), delimiter);
1405	}
1406	free(buffer);
1407
1408	return B_OK;
1409}
1410
1411
1412status_t
1413extract_from_header(const BString& header, const BString& field,
1414	BString& target)
1415{
1416	int32 headerLength = header.Length();
1417	int32 fieldEndPos = 0;
1418	while (true) {
1419		int32 pos = header.IFindFirst(field, fieldEndPos);
1420		if (pos < 0)
1421			return B_BAD_VALUE;
1422		fieldEndPos = pos + field.Length();
1423
1424		if (pos != 0 && header.ByteAt(pos - 1) != '\n')
1425			continue;
1426		if (header.ByteAt(fieldEndPos) == ':')
1427			break;
1428	}
1429	fieldEndPos++;
1430
1431	int32 crPos = fieldEndPos;
1432	while (true) {
1433		fieldEndPos = crPos;
1434		crPos = header.FindFirst('\n', crPos);
1435		if (crPos < 0)
1436			crPos = headerLength;
1437		BString temp;
1438		header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos);
1439		if (header.ByteAt(crPos - 1) == '\r') {
1440			temp.Truncate(temp.Length() - 1);
1441			temp += " ";
1442		}
1443		target += temp;
1444		crPos++;
1445		if (crPos >= headerLength)
1446			break;
1447		char nextByte = header.ByteAt(crPos);
1448		if (nextByte != ' ' && nextByte != '\t')
1449			break;
1450		crPos++;
1451	}
1452
1453	size_t bufferSize = target.Length();
1454	char* buffer = target.LockBuffer(bufferSize);
1455	size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize);
1456	target.UnlockBuffer(length);
1457
1458	trim_white_space(target);
1459
1460	return B_OK;
1461}
1462
1463
1464void
1465extract_address(BString &address)
1466{
1467	const char *string = address.String();
1468	int32 first;
1469
1470	// first, remove all quoted text
1471
1472	if ((first = address.FindFirst('"')) >= 0) {
1473		int32 last = first + 1;
1474		while (string[last] && string[last] != '"')
1475			last++;
1476
1477		if (string[last] == '"')
1478			address.Remove(first, last + 1 - first);
1479	}
1480
1481	// try to extract the address now
1482
1483	if ((first = address.FindFirst('<')) >= 0) {
1484		// the world likes us and we can just get the address the easy way...
1485		int32 last = address.FindFirst('>');
1486		if (last >= 0) {
1487			address.Truncate(last);
1488			address.Remove(0, first + 1);
1489
1490			return;
1491		}
1492	}
1493
1494	// then, see if there is anything in parenthesis to throw away
1495
1496	if ((first = address.FindFirst('(')) >= 0) {
1497		int32 last = first + 1;
1498		while (string[last] && string[last] != ')')
1499			last++;
1500
1501		if (string[last] == ')')
1502			address.Remove(first, last + 1 - first);
1503	}
1504
1505	// now, there shouldn't be much else left
1506
1507	trim_white_space(address);
1508}
1509
1510
1511void
1512get_address_list(BList &list, const char *string,
1513	void (*cleanupFunc)(BString &))
1514{
1515	if (string == NULL || !string[0])
1516		return;
1517
1518	const char *start = string;
1519
1520	while (true) {
1521		if (string[0] == '"') {
1522			const char *quoteEnd = ++string;
1523
1524			while (quoteEnd[0] && quoteEnd[0] != '"')
1525				quoteEnd++;
1526
1527			if (!quoteEnd[0])	// string exceeds line!
1528				quoteEnd = string;
1529
1530			string = quoteEnd + 1;
1531		}
1532
1533		if (string[0] == ',' || string[0] == '\0') {
1534			BString address(start, string - start);
1535			trim_white_space(address);
1536
1537			if (cleanupFunc)
1538				cleanupFunc(address);
1539
1540			list.AddItem(strdup(address.String()));
1541
1542			start = string + 1;
1543		}
1544
1545		if (!string[0])
1546			break;
1547
1548		string++;
1549	}
1550}
1551
1552
1553status_t
1554CopyMailFolderAttributes(const char* targetPath)
1555{
1556	BPath path;
1557	status_t status = find_directory(B_USER_SETTINGS_DIRECTORY, &path);
1558	if (status != B_OK)
1559		return status;
1560
1561	path.Append("Tracker");
1562	path.Append("DefaultQueryTemplates");
1563	path.Append("text_x-email");
1564
1565	BNode source(path.Path());
1566	BNode target(targetPath);
1567	return BPrivate::CopyAttributes(source, target);
1568}
1569