Lidt over et aar siden lavede jeg den her som forsoeger selv at gaette charset.
<?php
class SmartMailer {
// content types
const TYPE_UNKNOWN = 100;
const TYPE_TEXT = 101;
const TYPE_HTML = 102;
// character sets
const CS_UNKNOWN = 200;
const CS_USASCII = 201;
const CS_ISO88591 = 202;
const CS_UTF8 = 203;
// default character set if 8 bit and not possible to guess
private $default_cs;
// constructor
public function __construct() {
$this->default_cs = SmartMailer::CS_UTF8;
}
// access default charcter set
public function get_default_cs() {
return $this->default_cs;
}
public function set_default_cs($cs) {
cs_type_to_string($cs); // will throw exception if unknown character set
$this->default_cs = $cs;
}
// convert content type code to actual content type string
private function type_code_to_string($type) {
switch ($type) {
case SmartMailer::TYPE_TEXT:
return "text/plain";
case SmartMailer::TYPE_HTML:
return "text/html";
default:
throw new Exception('Unsupported content type code ' . $type);
}
}
// convert character set code to actual character set string
private function cs_code_to_string($cs) {
switch ($cs) {
case SmartMailer::CS_USASCII:
return "US-ASCII";
case SmartMailer::CS_ISO88591:
return "ISO-8859-1";
case SmartMailer::UTF8:
return "UTF-8";
default:
throw new Exception('Unsupported character set code ' . $cs);
}
}
// attempt to guess content type
private function guess_type($s) {
$res = preg_match('#<(\w+).+</(\w+)#', $s, $matches);
return ($res && ($matches[1] == $matches[2])) ? SmartMailer::TYPE_HTML : SmartMailer::TYPE_TEXT;
}
// attempt to guess character set
private function guess_cs($s) {
$s8bit = array_filter(str_split($s), function($c) { return ord($c) >= 128; });
if(count($s8bit) == 0) return SmartMailer::CS_USASCII;
$s8bitnbsp = array_filter($s8bit, function($c) { return ord($c) == 0xA0; });
$s8bitlatin = array_filter($s8bit, function($c) { return ord($c) >= 0xC0; });
if(count($s8bitnbsp) == 0 && count($s8bitlatin) > 0) {
return SmartMailer::CS_ISO88591;
} else if(count($s8bitnbsp) > 0) {
return SmartMailer::CS_UTF8;
} else {
return $this->default_cs;
}
}
// header encode
private function header_encode($hdr) {
$hdrcs = $this->guess_cs($hdr);
if($hdrcs == SmartMailer::CS_USASCII) {
return $hdr;
} else {
return '=?' . $this->cs_code_to_string($hdrcs) . '?Q?' . quoted_printable_encode($hdr) . '?=';
}
}
// email address encode
private function email_address_encode($addr) {
$addrparts = explode(' <', $addr);
if(count($addrparts) == 2) {
return $this->header_encode($addrparts[0]) . ' <' . $addrparts[1];
} else {
return $addr;
}
}
// actual send
public function send_email($from, $to, $subject, $body, $type = SmartMailer::TYPE_UNKNOWN, $cs = SmartMailer::CS_UNKNOWN) {
$from = $this->email_address_encode($from);
$to = $this->email_address_encode($to);
$subject = $this->header_encode($subject);
$type = ($type == SmartMailer::TYPE_UNKNOWN) ? $this->guess_type($body) : $type;
$bodycs = ($cs == SmartMailer::CS_UNKNOWN) ? $this->guess_cs($body) : $cs;
$contenttype = $this->type_code_to_string($type) . '; charset=' . $this->cs_code_to_string($bodycs);
if($bodycs == SmartMailer::CS_USASCII) {
$contentencoding = '7bit';
} else {
$contentencoding = 'quoted-printable';
$body = quoted_printable_encode($body);
}
$headers = "From: $from\r\n" .
"Content-Type: $contenttype\r\n" .
"MIME-Version: 1.0\r\n" .
"Content-Transfer-Encoding: $contentencoding";
mail($to, $subject, $body, $headers);
}
}
?>
Indeholder emailen et mix of ISO-8859-1 og UTF-8 fordi man har et charset far PHP filen og et andet fra databasen, saa er man stadig totalt fubar.