Changeset 6192
- Timestamp:
- 01/30/10 10:24:04 (3 years ago)
- Location:
- trunk/MailInContrib
- Files:
-
- 1 added
- 4 edited
-
data/System/MailInContrib.txt (modified) (2 diffs)
-
lib/Foswiki/Contrib/MailInContrib.pm (modified) (5 diffs)
-
lib/Foswiki/Contrib/MailInContrib/Config.spec (modified) (1 diff)
-
test/unit/MailInContrib/MailInContribMimeTests.pm (added)
-
test/unit/MailInContrib/MailInContribSuite.pm (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
trunk/MailInContrib/data/System/MailInContrib.txt
r6085 r6192 62 62 !MailInContribUserTemplate and edit to taste. 63 63 64 ---+ Which part of the mail is added 65 66 The module defaults to extracts the plain-text portion of the mail; the HTML portion (if present) is discarded. 67 When the module extracts the plain-text portion, it also discards inline images associated with the HTML (because, whilst they are part of the mail, they are not _attachments_). 68 69 %X% *Experimental:* 70 You can configure the module to extract the HTML portion instead, in which case the plain-text portion is discarded. The module then also extracts and attaches inline images. 71 64 72 ---+ How the contributor is identified 65 73 The user identity is used for access control checks on the target topic, so you can use Foswiki access controls to protect target topics. … … 104 112 | Release: | %$RELEASE% | 105 113 | Change History: | | 114 | _not yet released_ | Foswikitask:Item8384: Support HTML email | 106 115 | 3 Feb 2009 | Foswikitask:Item456: ported Foswikitask:Item4955: config options moved to 'Mail and Proxies' Foswikitask:Item5829: force new revision on each submission Foswikitask:Item5843: extract target topic from CC: field - general improvement to rules for extracting the target topic | 107 116 | 10 Mar 2005 | 1.000 Initial version | -
trunk/MailInContrib/lib/Foswiki/Contrib/MailInContrib.pm
r6085 r6192 147 147 $box->{onSuccess} ||= 'log'; 148 148 149 # Copy the valid domain pattern for external resource URLs (img, script, style) 150 $this->{validUrlPattern} = $box->{validUrlPattern}; 151 149 152 # Load the mail templates 150 153 Foswiki::Func::loadTemplate('MailInContrib'); … … 296 299 my $body = ''; 297 300 298 _extract( $mail, \$body, \@attachments ); 301 unless ($this->{validUrlPattern}) { 302 # Default to files attached to this wiki and files in the mail message 303 my $puburl = Foswiki::Func::expandCommonVariables( 304 '%PUBURL%', 305 $topic, $web 306 ); 307 $this->{validUrlPattern} = qr/cid:|\Q$puburl/; 308 } 309 310 $this->_extract( $mail, \$body, \@attachments, $box ); 299 311 300 312 print "Received mail from $sender for $web.$topic\n"; … … 337 349 matching => sub { 338 350 my $test = shift; 339 if ( defined $kill{ $test->header('Message-ID') } ) { 340 print STDERR "Delete ", $test->header('Message-ID'), "\n" 351 my $message_id = $test->header('Message-ID'); 352 if ( defined $message_id and defined $kill{ $message_id } ) { 353 print STDERR "Delete $message_id\n" 341 354 if $this->{debug}; 342 355 return 1; … … 368 381 } 369 382 383 sub _extract { 384 my ( $this, $mime, $text, $attach, $box ) = @_; 385 $box->{content}{type} ||= ''; 386 if ($box->{content}{type} =~ /html/) { 387 $this->_extractHtmlAndAttachments($mime, $text, $attach, $box->{content}); 388 } 389 else { 390 _extractPlainTextAndAttachments($mime, $text, $attach); 391 } 392 } 393 394 sub _extractHtmlAndAttachments { 395 my ( $this, $mime, $text, $attach, $options ) = @_; 396 my $ct = $mime->content_type || 'text/plain'; 397 my $dp = $mime->header('Content-Disposition') || 'inline'; 398 print STDERR "\nContent-type: $ct\n" if $this->{debug}; 399 if ($ct =~ m[multipart/mixed]) { 400 $this->_extractMultipartMixed($mime, $text, $attach, $options); 401 } 402 elsif ($ct =~ m[multipart/alternative]) { 403 $this->_extractMultipartAlternative($mime, $text, $attach, $options); 404 } 405 elsif ( $ct =~ m[multipart/related] ) { 406 my $found; 407 $found = _extractMultipartHtml($mime, $text, $attach, $options); 408 print STDERR "Found multipart/related HTML\n" if $found and $this->{debug}; 409 if (not $found) 410 { 411 print STDERR "Cannot find HTML. Extracting plain text\n" if $this->{debug}; 412 _extractPlainTextAndAttachments($mime, $text, $attach); 413 } 414 } 415 elsif ( $ct =~ m[text/html] and $dp =~ /inline/ ) { 416 print STDERR "Extracting text/html\n" if $this->{debug}; 417 _extractPlainHtml($mime, $text, $options); 418 } 419 else { 420 print STDERR "Extracting plain text and attachments\n" if $this->{debug}; 421 _extractPlainTextAndAttachments($mime, $text, $attach); 422 } 423 } 424 425 sub _extractMultipartMixed { 426 my ( $this, $mime, $text, $attach, $options ) = @_; 427 foreach my $part ( grep { $_ != $mime } $mime->parts() ) { 428 print STDERR "Multipart/mixed: Recursing\n" if $this->{debug}; 429 $this->_extractHtmlAndAttachments($part, $text, $attach, $options); 430 } 431 } 432 433 sub _extractMultipartAlternative { 434 my ( $this, $mime, $text, $attach, $options ) = @_; 435 436 print STDERR "Multipart/alternative\n" if $this->{debug}; 437 # See what alternatives are available 438 my @alternates = map +{ 439 mime => $_, 440 ct => $_->content_type || 'text/plain', 441 }, grep { $_ != $mime } $mime->parts(); 442 443 my ($multipartRelatedAlternate) = grep { $_->{ct} =~ m[multipart/related] } @alternates; 444 my ($htmlAlternate) = grep { $_->{ct} =~ m[text/html] } @alternates; 445 446 # Pick one 447 my $found; 448 if ($multipartRelatedAlternate and $options->{type} !~ /plain/) { 449 $found = $this->_extractMultipartHtml($multipartRelatedAlternate->{mime}, $text, $attach, $options); 450 print STDERR "Found multipart/related HTML\n" if $found and $this->{debug}; 451 } 452 if ($htmlAlternate and not $found) { 453 $found = $this->_extractPlainHtml($htmlAlternate->{mime}, $text, $options); 454 print STDERR "Found text/html\n" if $found and $this->{debug}; 455 } 456 if (not $found) 457 { 458 print STDERR "Cannot find HTML - Extracting plain text\n" if $this->{debug}; 459 _extractPlainTextAndAttachments($mime, $text, $attach); 460 } 461 } 462 463 sub _extractMultipartHtml { 464 my ( $this, $mime, $text, $attach, $options ) = @_; 465 my @bits = map +{ 466 mime => $_, 467 ct => $_->content_type || 'text/plain', 468 dp => $_->header('Content-Disposition') || 'inline' 469 }, grep { $_ != $mime } $mime->parts(); 470 my ($htmlBit) = grep { $_->{ct} =~ m[text/html] and $_->{dp} =~ /inline/ } @bits; 471 return unless $htmlBit; # Not found 472 473 my $html = $this->_extractAndTrimHtml($htmlBit->{mime}); 474 return unless $html; 475 for my $bit (grep { $_ != $htmlBit } @bits) 476 { 477 my $filename = $bit->{mime}->filename(); 478 ($filename) = Foswiki::Sandbox::sanitizeAttachmentName( $bit->{mime}->filename() ) if defined $filename; 479 my $cid = $bit->{mime}->header('Content-ID') || ''; 480 my $cid_used = ''; 481 print STDERR "cid:[$cid]\n" if $cid and $this->{debug}; 482 if ($cid =~ /^\s*<?((.*?)\@.*?)>?\s*$/) { 483 $cid = $1; 484 ($filename) = Foswiki::Sandbox::sanitizeAttachmentName($2); 485 $cid_used = ($html =~ s{"cid:\Q$cid\E"}{"%ATTACHURLPATH%/$filename"}); 486 } 487 if ( $filename and ($bit->{dp} !~ /inline/ or ($cid and $cid_used) ) ) { 488 push( 489 @$attach, 490 { 491 payload => $bit->{mime}->body(), 492 filename => $filename 493 } 494 ); 495 } 496 } 497 $$text .= "<literal><div class=\"foswikiMailInContribHtml\">$html</div></literal>\n"; 498 return 1; 499 } 500 501 sub _extractPlainHtml { 502 my ( $this, $mime, $text, $options ) = @_; 503 my $html = $this->_extractAndTrimHtml($mime); 504 return unless $html; 505 $$text .= "<literal><div class=\"foswikiMailInContribPlainHtml\">$html</div></literal>\n"; 506 return 1; 507 } 508 509 sub _extractAndTrimHtml { 510 my ($this, $mime) = @_; 511 return unless $mime; 512 my $html = $mime->body(); 513 return unless $html; 514 515 # Remove anything outside the body tag, and change the body tag into a div tag 516 # It is better to keep the body tag as a tag (and not just discard it altogether) 517 # because that tag sometimes has attributes that should be retained. 518 $html =~ s{.*<body([^>]*>.*)</body>.*}{<div$1</div>}is; 519 520 # Remove tags that point to external sites 521 my $validUrlPattern = $this->{validUrlPattern}; 522 $html =~ s{<(script|style|img) # opening tag 523 [^>]+ # whitespace or attributes 524 \bsrc= # attribute that contains a URL that could be used as e.g. a webbug 525 (['"]) # opening quote 526 (?!$validUrlPattern) # Zero-width negative lookahead for valid URLs 527 # URLs that don't match this pattern might be evil 528 [^>]+? # the URL itself 529 \2 # closing quote that matches the opening quote 530 [^>]* # Any other attributes or whitespace 531 (?: 532 >.*?</\1> # End of tag, content, and closing tag 533 | # or 534 /> # End of tag, and tag does not have content 535 ) 536 }{<em>External link removed</em>}isgx if $validUrlPattern; 537 538 return unless $html =~ /\S/; 539 return $html; 540 } 541 542 370 543 # Extract plain text and attachments from the MIME 371 sub _extract {544 sub _extractPlainTextAndAttachments { 372 545 my ( $mime, $text, $attach ) = @_; 373 546 … … 388 561 } 389 562 elsif ( $part != $mime ) { 390 _extract ( $part, $text, $attach );563 _extractPlainTextAndAttachments( $part, $text, $attach ); 391 564 } 392 565 } -
trunk/MailInContrib/lib/Foswiki/Contrib/MailInContrib/Config.spec
r6116 r6192 56 56 # <li> error - treat this as an error (overrides all other options)</li> 57 57 # <li> spam - save the mail in the spambox topic. 58 # Note: if you clear this, then Foswiki will simply ignore the mail. .</li>58 # Note: if you clear this, then Foswiki will simply ignore the mail.</li> 59 59 # </ul> 60 60 # </li> 61 # <li> spambox - optional required of onNoTopic = spam. Name of the topic61 # <li> spambox - optional, required if onNoTopic = spam. Name of the topic 62 62 # where you want to save mails that don't have a valid web.topic. You must 63 63 # specify a full web.topicname 64 # </li> 65 # <li> content - optional, defaults to "extract plain text". 66 # Specifies what part of the mail to extract and how to process it. 67 # It takes a number of fields: 68 # <ul> 69 # <li> type - specifies type of content to extract. 70 # Available options: 71 # <ul> 72 # <li> text - extract the plain-text portion </li> 73 # <li> html - extract the HTML portion, by preference 74 # - reverts to the plain-text if the mail does not contain HTML 75 # </li> 76 # </ul> 77 # </li> 78 # </ul> 64 79 # </li> 65 80 # </ul> -
trunk/MailInContrib/test/unit/MailInContrib/MailInContribSuite.pm
r6149 r6192 8 8 sub name { 'MailInContribSuite' } 9 9 10 sub include_tests { qw(MailInContribTests ) }10 sub include_tests { qw(MailInContribTests MailInContribMimeTests) } 11 11 12 12 1;
Note: See TracChangeset
for help on using the changeset viewer.
