1/* (Text)Component - message component base class and plain text
2**
3** Copyright 2001 Dr. Zoidberg Enterprises. All rights reserved.
4*/
5
6
7#include <String.h>
8#include <Mime.h>
9
10#include <ctype.h>
11#include <stdlib.h>
12#include <strings.h>
13
14class _EXPORT BMailComponent;
15class _EXPORT BTextMailComponent;
16
17#include <MailComponent.h>
18#include <MailAttachment.h>
19#include <MailContainer.h>
20#include <mail_util.h>
21
22#include <CharacterSet.h>
23#include <CharacterSetRoster.h>
24
25using namespace BPrivate ;
26
27struct CharsetConversionEntry
28{
29	const char* charset;
30	uint32 flavor;
31};
32
33extern const CharsetConversionEntry mail_charsets[];
34
35
36const char* kHeaderCharsetString = "header-charset";
37const char* kHeaderEncodingString = "header-encoding";
38// Special field names in the headers which specify the character set (int32)
39// and encoding (int8) to use when converting the headers from UTF-8 to the
40// output e-mail format (rfc2047).  Since they are numbers, not strings, the
41// extra fields won't be output.
42
43
44BMailComponent::BMailComponent(uint32 defaultCharSet)
45	: _charSetForTextDecoding (defaultCharSet)
46{
47}
48
49
50BMailComponent::~BMailComponent()
51{
52}
53
54
55uint32
56BMailComponent::ComponentType()
57{
58	if (NULL != dynamic_cast<BAttributedMailAttachment*> (this))
59		return B_MAIL_ATTRIBUTED_ATTACHMENT;
60
61	BMimeType type, super;
62	MIMEType(&type);
63	type.GetSupertype(&super);
64
65	//---------ATT-This code *desperately* needs to be improved
66	if (super == "multipart") {
67		if (type == "multipart/x-bfile") // Not likely, they have the MIME
68			return B_MAIL_ATTRIBUTED_ATTACHMENT; // of their data contents.
69		else
70			return B_MAIL_MULTIPART_CONTAINER;
71	} else if (!IsAttachment() && (super == "text" || type.Type() == NULL))
72		return B_MAIL_PLAIN_TEXT_BODY;
73	else
74		return B_MAIL_SIMPLE_ATTACHMENT;
75}
76
77
78BMailComponent*
79BMailComponent::WhatIsThis()
80{
81	switch (ComponentType()) {
82		case B_MAIL_SIMPLE_ATTACHMENT:
83			return new BSimpleMailAttachment;
84		case B_MAIL_ATTRIBUTED_ATTACHMENT:
85			return new BAttributedMailAttachment;
86		case B_MAIL_MULTIPART_CONTAINER:
87			return new BMIMEMultipartMailContainer (NULL, NULL, _charSetForTextDecoding);
88		case B_MAIL_PLAIN_TEXT_BODY:
89		default:
90			return new BTextMailComponent (NULL, _charSetForTextDecoding);
91	}
92}
93
94
95bool
96BMailComponent::IsAttachment()
97{
98	const char* disposition = HeaderField("Content-Disposition");
99	if ((disposition != NULL)
100		&& (strncasecmp(disposition, "Attachment", strlen("Attachment")) == 0))
101		return true;
102
103	BMessage header;
104	HeaderField("Content-Type", &header);
105	if (header.HasString("name"))
106		return true;
107
108	if (HeaderField("Content-Location", &header) == B_OK)
109		return true;
110
111	BMimeType type;
112	MIMEType(&type);
113	if (type == "multipart/x-bfile")
114		return true;
115
116	return false;
117}
118
119
120void
121BMailComponent::SetHeaderField(const char* key, const char* value,
122	uint32 charset, mail_encoding encoding, bool replace_existing)
123{
124	if (replace_existing)
125		headers.RemoveName(key);
126	if (value != NULL && value[0] != 0) // Empty or NULL strings mean delete header.
127		headers.AddString(key, value);
128
129	// Latest setting of the character set and encoding to use when outputting
130	// the headers is the one which affects all the headers.  There used to be
131	// separate settings for each item in the headers, but it never actually
132	// worked (can't store multiple items of different types in a BMessage).
133	if (charset != B_MAIL_NULL_CONVERSION
134		&& headers.ReplaceInt32 (kHeaderCharsetString, charset) != B_OK)
135		headers.AddInt32(kHeaderCharsetString, charset);
136	if (encoding != null_encoding
137		&& headers.ReplaceInt8 (kHeaderEncodingString, encoding) != B_OK)
138		headers.AddInt8(kHeaderEncodingString, encoding);
139}
140
141
142void
143BMailComponent::SetHeaderField(const char* key, BMessage* structure,
144	bool replace_existing)
145{
146	int32 charset = B_MAIL_NULL_CONVERSION;
147	int8 encoding = null_encoding;
148	const char* unlabeled = "unlabeled";
149
150	if (replace_existing)
151		headers.RemoveName(key);
152
153	BString value;
154	if (structure->HasString(unlabeled))
155		value << structure->FindString(unlabeled) << "; ";
156
157	const char* name;
158	const char* sub_val;
159	type_code type;
160	for (int32 i = 0; structure->GetInfo(B_STRING_TYPE, i,
161#if !defined(HAIKU_TARGET_PLATFORM_DANO)
162		(char**)
163#endif
164		&name, &type) == B_OK; i++) {
165
166		if (strcasecmp(name, unlabeled) == 0)
167			continue;
168
169		structure->FindString(name, &sub_val);
170		value << name << '=';
171		if (BString(sub_val).FindFirst(' ') > 0)
172			value << '\"' << sub_val << "\"; ";
173		else
174			value << sub_val << "; ";
175	}
176
177	value.Truncate(value.Length() - 2); //-----Remove the last "; "
178
179	if (structure->HasInt32(kHeaderCharsetString))
180		structure->FindInt32(kHeaderCharsetString, &charset);
181	if (structure->HasInt8(kHeaderEncodingString))
182		structure->FindInt8(kHeaderEncodingString, &encoding);
183
184	SetHeaderField(key, value.String(), (uint32) charset, (mail_encoding) encoding);
185}
186
187
188const char*
189BMailComponent::HeaderField(const char* key, int32 index) const
190{
191	const char* string = NULL;
192
193	headers.FindString(key, index, &string);
194	return string;
195}
196
197
198status_t
199BMailComponent::HeaderField(const char* key, BMessage* structure,
200	int32 index) const
201{
202	BString string = HeaderField(key, index);
203	if (string == "")
204		return B_NAME_NOT_FOUND;
205
206	BString sub_cat;
207	BString end_piece;
208	int32 i = 0;
209	int32 end = 0;
210
211	// Break the header into parts, they're separated by semicolons, like this:
212	// Content-Type: multipart/mixed;boundary= "----=_NextPart_000_00AA_354DB459.5977A1CA"
213	// There's also white space and quotes to be removed, and even comments in
214	// parenthesis like this, which can appear anywhere white space is: (header comment)
215
216	while (end < string.Length()) {
217		end = string.FindFirst(';', i);
218		if (end < 0)
219			end = string.Length();
220
221		string.CopyInto(sub_cat, i, end - i);
222		i = end + 1;
223
224		//-------Trim spaces off of beginning and end of text
225		for (int32 h = 0; h < sub_cat.Length(); h++) {
226			if (!isspace(sub_cat.ByteAt(h))) {
227				sub_cat.Remove(0, h);
228				break;
229			}
230		}
231		for (int32 h = sub_cat.Length() - 1; h >= 0; h--) {
232			if (!isspace(sub_cat.ByteAt(h))) {
233				sub_cat.Truncate(h + 1);
234				break;
235			}
236		}
237		//--------Split along '='
238		int32 first_equal = sub_cat.FindFirst('=');
239		if (first_equal >= 0) {
240			sub_cat.CopyInto(end_piece, first_equal + 1, sub_cat.Length() - first_equal - 1);
241			sub_cat.Truncate(first_equal);
242			// Remove leading spaces from part after the equals sign.
243			while (isspace (end_piece.ByteAt(0)))
244				end_piece.Remove (0 /* index */, 1 /* number of chars */);
245			// Remove quote marks.
246			if (end_piece.ByteAt(0) == '\"') {
247				end_piece.Remove(0, 1);
248				end_piece.Truncate(end_piece.Length() - 1);
249			}
250			sub_cat.ToLower();
251			structure->AddString(sub_cat.String(), end_piece.String());
252		} else {
253			structure->AddString("unlabeled", sub_cat.String());
254		}
255	}
256
257	return B_OK;
258}
259
260
261status_t
262BMailComponent::RemoveHeader(const char* key)
263{
264	return headers.RemoveName(key);
265}
266
267
268const char*
269BMailComponent::HeaderAt(int32 index) const
270{
271#if defined(HAIKU_TARGET_PLATFORM_DANO)
272	const
273#endif
274	char* name = NULL;
275	type_code type;
276
277	headers.GetInfo(B_STRING_TYPE, index, &name, &type);
278	return name;
279}
280
281
282status_t
283BMailComponent::GetDecodedData(BPositionIO*)
284{
285	return B_OK;
286}
287
288
289status_t
290BMailComponent::SetDecodedData(BPositionIO*)
291{
292	return B_OK;
293}
294
295
296status_t
297BMailComponent::SetToRFC822(BPositionIO* data, size_t /*length*/, bool /*parse_now*/)
298{
299	headers.MakeEmpty();
300
301	// Only parse the header here
302	return parse_header(headers, *data);
303}
304
305
306status_t
307BMailComponent::RenderToRFC822(BPositionIO* render_to)
308{
309	int32 charset = B_ISO15_CONVERSION;
310	int8 encoding = quoted_printable;
311	const char* key;
312	const char* value;
313	char* allocd;
314	ssize_t amountWritten;
315	BString concat;
316	type_code stupidity_personified = B_STRING_TYPE;
317	int32 count = 0;
318
319	if (headers.HasInt32(kHeaderCharsetString))
320		headers.FindInt32(kHeaderCharsetString, &charset);
321	if (headers.HasInt8(kHeaderEncodingString))
322		headers.FindInt8(kHeaderEncodingString, &encoding);
323
324	for (int32 index = 0; headers.GetInfo(B_STRING_TYPE, index,
325#if !defined(HAIKU_TARGET_PLATFORM_DANO)
326			(char**)
327#endif
328			&key, &stupidity_personified, &count) == B_OK; index++) {
329		for (int32 g = 0; g < count; g++) {
330			headers.FindString(key, g, (const char**)&value);
331			allocd = (char*)malloc(strlen(value) + 1);
332			strcpy(allocd, value);
333
334			concat << key << ": ";
335			concat.CapitalizeEachWord();
336
337			concat.Append(allocd, utf8_to_rfc2047(&allocd, strlen(value),
338				charset, encoding));
339			free(allocd);
340			FoldLineAtWhiteSpaceAndAddCRLF(concat);
341
342			amountWritten = render_to->Write(concat.String(), concat.Length());
343			if (amountWritten < 0)
344				return amountWritten; // IO error happened, usually disk full.
345			concat = "";
346		}
347	}
348
349	render_to->Write("\r\n", 2);
350
351	return B_OK;
352}
353
354
355status_t
356BMailComponent::MIMEType(BMimeType* mime)
357{
358	bool foundBestHeader;
359	const char* boundaryString;
360	unsigned int i;
361	BMessage msg;
362	const char* typeAsString = NULL;
363	char typeAsLowerCaseString[B_MIME_TYPE_LENGTH];
364
365	// Find the best Content-Type header to use.  There should really be just
366	// one, but evil spammers sneakily insert one for multipart (with no
367	// boundary string), then one for text/plain.  We'll scan through them and
368	// only use the multipart one if there are no others, and it has a
369	// boundary.
370
371	foundBestHeader = false;
372	for (i = 0; msg.MakeEmpty(), HeaderField("Content-Type", &msg, i) == B_OK; i++) {
373		typeAsString = msg.FindString("unlabeled");
374		if (typeAsString != NULL && strncasecmp(typeAsString, "multipart", 9) != 0) {
375			foundBestHeader = true;
376			break;
377		}
378	}
379	if (!foundBestHeader) {
380		for (i = 0; msg.MakeEmpty(), HeaderField("Content-Type", &msg, i) == B_OK; i++) {
381			typeAsString = msg.FindString("unlabeled");
382			if (typeAsString != NULL && strncasecmp(typeAsString, "multipart", 9) == 0) {
383				boundaryString = msg.FindString("boundary");
384				if (boundaryString != NULL && strlen(boundaryString) > 0) {
385					foundBestHeader = true;
386					break;
387				}
388			}
389		}
390	}
391	// At this point we have the good MIME type in typeAsString, but only if
392	// foundBestHeader is true.
393
394	if (!foundBestHeader) {
395		strcpy(typeAsLowerCaseString, "text/plain"); // Hope this is an OK default.
396	} else {
397		// Some extra processing to convert mixed or upper case MIME types into
398		// lower case, since the BeOS R5 BMimeType is case sensitive (but OpenBeOS
399		// isn't).  Also truncate the string if it is too long.
400		for (i = 0; i < sizeof(typeAsLowerCaseString) - 1
401			&& typeAsString[i] != 0; i++)
402			typeAsLowerCaseString[i] = tolower(typeAsString[i]);
403		typeAsLowerCaseString[i] = 0;
404
405		// Some old e-mail programs saved the type as just "TEXT", which we need to
406		// convert to "text/plain" since the rest of the code looks for that.
407		if (strcmp(typeAsLowerCaseString, "text") == 0)
408			strcpy(typeAsLowerCaseString, "text/plain");
409	}
410	mime->SetTo(typeAsLowerCaseString);
411	return B_OK;
412}
413
414
415void BMailComponent::_ReservedComponent1() {}
416void BMailComponent::_ReservedComponent2() {}
417void BMailComponent::_ReservedComponent3() {}
418void BMailComponent::_ReservedComponent4() {}
419void BMailComponent::_ReservedComponent5() {}
420
421
422//-------------------------------------------------------------------------
423//	#pragma mark -
424
425
426BTextMailComponent::BTextMailComponent(const char* text, uint32 defaultCharSet)
427	: BMailComponent(defaultCharSet),
428	encoding(quoted_printable),
429	charset(B_ISO15_CONVERSION),
430	raw_data(NULL)
431{
432	if (text != NULL)
433		SetText(text);
434
435	SetHeaderField("MIME-Version", "1.0");
436}
437
438
439BTextMailComponent::~BTextMailComponent()
440{
441}
442
443
444void
445BTextMailComponent::SetEncoding(mail_encoding encoding, int32 charset)
446{
447	this->encoding = encoding;
448	this->charset = charset;
449}
450
451
452void
453BTextMailComponent::SetText(const char* text)
454{
455	this->text.SetTo(text);
456
457	raw_data = NULL;
458}
459
460
461void
462BTextMailComponent::AppendText(const char* text)
463{
464	ParseRaw();
465
466	this->text << text;
467}
468
469
470const char*
471BTextMailComponent::Text()
472{
473	ParseRaw();
474
475	return text.String();
476}
477
478
479BString*
480BTextMailComponent::BStringText()
481{
482	ParseRaw();
483
484	return &text;
485}
486
487
488void
489BTextMailComponent::Quote(const char* message, const char* quote_style)
490{
491	ParseRaw();
492
493	BString string;
494	string << '\n' << quote_style;
495	text.ReplaceAll("\n",string.String());
496
497	string = message;
498	string << '\n';
499	text.Prepend(string.String());
500}
501
502
503status_t
504BTextMailComponent::GetDecodedData(BPositionIO* data)
505{
506	ParseRaw();
507
508	if (data == NULL)
509		return B_IO_ERROR;
510
511	BMimeType type;
512	BMimeType textAny("text");
513	ssize_t written;
514	if (MIMEType(&type) == B_OK && textAny.Contains(&type))
515		// Write out the string which has been both decoded from quoted
516		// printable or base64 etc, and then converted to UTF-8 from whatever
517		// character set the message specified.  Do it for text/html,
518		// text/plain and all other text datatypes.  Of course, if the message
519		// is HTML and specifies a META tag for a character set, it will now be
520		// wrong.  But then we don't display HTML in BeMail, yet.
521		written = data->Write(text.String(), text.Length());
522	else
523		// Just write out whatever the binary contents are, only decoded from
524		// the quoted printable etc format.
525		written = data->Write(decoded.String(), decoded.Length());
526
527	return written >= 0 ? B_OK : written;
528}
529
530
531status_t
532BTextMailComponent::SetDecodedData(BPositionIO* data)
533{
534	char buffer[255];
535	size_t buf_len;
536
537	while ((buf_len = data->Read(buffer, 254)) > 0) {
538		buffer[buf_len] = 0;
539		this->text << buffer;
540	}
541
542	raw_data = NULL;
543
544	return B_OK;
545}
546
547
548status_t
549BTextMailComponent::SetToRFC822(BPositionIO* data, size_t length, bool parseNow)
550{
551	off_t position = data->Position();
552	BMailComponent::SetToRFC822(data, length);
553
554	// Some malformed MIME headers can have the header running into the
555	// boundary of the next MIME chunk, resulting in a negative length.
556	length -= data->Position() - position;
557	if ((ssize_t) length < 0)
558	  length = 0;
559
560	raw_data = data;
561	raw_length = length;
562	raw_offset = data->Position();
563
564	if (parseNow) {
565		// copies the data stream and sets the raw_data variable to NULL
566		return ParseRaw();
567	}
568
569	return B_OK;
570}
571
572
573status_t
574BTextMailComponent::ParseRaw()
575{
576	if (raw_data == NULL)
577		return B_OK;
578
579	raw_data->Seek(raw_offset, SEEK_SET);
580
581	BMessage content_type;
582	HeaderField("Content-Type", &content_type);
583
584	charset = _charSetForTextDecoding;
585	if (charset == B_MAIL_NULL_CONVERSION && content_type.HasString("charset")) {
586		const char* charset_string = content_type.FindString("charset");
587		if (strcasecmp(charset_string, "us-ascii") == 0) {
588			charset = B_MAIL_US_ASCII_CONVERSION;
589		} else if (strcasecmp(charset_string, "utf-8") == 0) {
590			charset = B_MAIL_UTF8_CONVERSION;
591		} else {
592			const BCharacterSet* cs = BCharacterSetRoster::FindCharacterSetByName(charset_string);
593			if (cs != NULL) {
594				charset = cs->GetConversionID();
595			}
596		}
597	}
598
599	encoding = encoding_for_cte(HeaderField("Content-Transfer-Encoding"));
600
601	char* buffer = (char*)malloc(raw_length + 1);
602	if (buffer == NULL)
603		return B_NO_MEMORY;
604
605	int32 bytes;
606	if ((bytes = raw_data->Read(buffer, raw_length)) < 0)
607		return B_IO_ERROR;
608
609	char* string = decoded.LockBuffer(bytes + 1);
610	bytes = decode(encoding, string, buffer, bytes, 0);
611	free(buffer);
612	buffer = NULL;
613
614	// Change line ends from \r\n to just \n.  Though this won't work properly
615	// for UTF-16 because \r takes up two bytes rather than one.
616	char* dest;
617	char* src;
618	char* end = string + bytes;
619	for (dest = src = string; src < end; src++) {
620	 	if (*src != '\r')
621	 		*dest++ = *src;
622	}
623	decoded.UnlockBuffer(dest - string);
624	bytes = decoded.Length(); // Might have shrunk a bit.
625
626	// If the character set wasn't specified, try to guess.  ISO-2022-JP
627	// contains the escape sequences ESC $ B or ESC $ @ to turn on 2 byte
628	// Japanese, and ESC ( J to switch to Roman, or sometimes ESC ( B for
629	// ASCII.  We'll just try looking for the two switch to Japanese sequences.
630
631	if (charset == B_MAIL_NULL_CONVERSION) {
632		if (decoded.FindFirst ("\e$B") >= 0 || decoded.FindFirst ("\e$@") >= 0)
633			charset = B_JIS_CONVERSION;
634		else // Just assume the usual Latin-9 character set.
635			charset = B_ISO15_CONVERSION;
636	}
637
638	int32 state = 0;
639	int32 destLength = bytes * 3 /* in case it grows */ + 1 /* +1 so it isn't zero which crashes */;
640	string = text.LockBuffer(destLength);
641	mail_convert_to_utf8(charset, decoded.String(), &bytes, string,
642		&destLength, &state);
643	if (destLength > 0)
644		text.UnlockBuffer(destLength);
645	else {
646		text.UnlockBuffer(0);
647		text.SetTo(decoded);
648	}
649
650	raw_data = NULL;
651	return B_OK;
652}
653
654
655status_t
656BTextMailComponent::RenderToRFC822(BPositionIO* render_to)
657{
658	status_t status = ParseRaw();
659	if (status < B_OK)
660		return status;
661
662	BMimeType type;
663	MIMEType(&type);
664	BString content_type;
665	content_type << type.Type(); // Preserve MIME type (e.g. text/html
666
667	for (uint32 i = 0; mail_charsets[i].charset != NULL; i++) {
668		if (mail_charsets[i].flavor == charset) {
669			content_type << "; charset=\"" << mail_charsets[i].charset << "\"";
670			break;
671		}
672	}
673
674	SetHeaderField("Content-Type", content_type.String());
675
676	const char* transfer_encoding = NULL;
677	switch (encoding) {
678		case base64:
679			transfer_encoding = "base64";
680			break;
681		case quoted_printable:
682			transfer_encoding = "quoted-printable";
683			break;
684		case eight_bit:
685			transfer_encoding = "8bit";
686			break;
687		case seven_bit:
688		default:
689			transfer_encoding = "7bit";
690			break;
691	}
692
693	SetHeaderField("Content-Transfer-Encoding", transfer_encoding);
694
695	BMailComponent::RenderToRFC822(render_to);
696
697	BString modified = this->text;
698	BString alt;
699
700	int32 len = this->text.Length();
701	if (len > 0) {
702		int32 dest_len = len * 5;
703		// Shift-JIS can have a 3 byte escape sequence and a 2 byte code for
704		// each character (which could just be 2 bytes in UTF-8, or even 1 byte
705		// if it's regular ASCII), so it can get quite a bit larger than the
706		// original text.  Multiplying by 5 should make more than enough space.
707		char* raw = alt.LockBuffer(dest_len);
708		int32 state = 0;
709		mail_convert_from_utf8(charset, this->text.String(), &len, raw,
710			&dest_len, &state);
711		alt.UnlockBuffer(dest_len);
712
713		raw = modified.LockBuffer((alt.Length() * 3) + 1);
714		switch (encoding) {
715			case base64:
716				len = encode_base64(raw, alt.String(), alt.Length(), false);
717				raw[len] = 0;
718				break;
719			case quoted_printable:
720				len = encode_qp(raw, alt.String(), alt.Length(), false);
721				raw[len] = 0;
722				break;
723			case eight_bit:
724			case seven_bit:
725			default:
726				len = alt.Length();
727				strcpy(raw, alt.String());
728		}
729		modified.UnlockBuffer(len);
730
731		if (encoding != base64) // encode_base64 already does CRLF line endings.
732			modified.ReplaceAll("\n","\r\n");
733
734		// There seem to be a possibility of NULL bytes in the text, so lets
735		// filter them out, shouldn't be any after the encoding stage.
736
737		char* string = modified.LockBuffer(modified.Length());
738		for (int32 i = modified.Length(); i-- > 0;) {
739			if (string[i] != '\0')
740				continue;
741
742			puts("BTextMailComponent::RenderToRFC822: NULL byte in text!!");
743			string[i] = ' ';
744		}
745		modified.UnlockBuffer();
746
747		// word wrapping is already done by BeMail (user-configurable)
748		// and it does it *MUCH* nicer.
749
750//		//------Desperate bid to wrap lines
751//		int32 curr_line_length = 0;
752//		int32 last_space = 0;
753//
754//		for (int32 i = 0; i < modified.Length(); i++) {
755//			if (isspace(modified.ByteAt(i)))
756//				last_space = i;
757//
758//			if ((modified.ByteAt(i) == '\r') && (modified.ByteAt(i+1) == '\n'))
759//				curr_line_length = 0;
760//			else
761//				curr_line_length++;
762//
763//			if (curr_line_length > 80) {
764//				if (last_space >= 0) {
765//					modified.Insert("\r\n",last_space);
766//					last_space = -1;
767//					curr_line_length = 0;
768//				}
769//			}
770//		}
771	}
772	modified << "\r\n";
773
774	render_to->Write(modified.String(), modified.Length());
775
776	return B_OK;
777}
778
779
780void BTextMailComponent::_ReservedText1() {}
781void BTextMailComponent::_ReservedText2() {}
782