<?php
/*
 * $RCSfile: GalleryCharsetHelper_simple.class,v $
 *
 * Gallery - a web based photo album viewer and editor
 * Copyright (C) 2000-2005 Bharat Mediratta
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA  02110-1301, USA.
 */
/**
 * @version $Revision: 1.14 $ $Date: 2005/08/23 03:49:04 $
 * @package GalleryCore
 * @author Bharat Mediratta <bharat@menalto.com>
 */

/**
 * A collection of useful charset related utilities
 *
 * @package GalleryCore
 * @subpackage Classes
 */

class GalleryCharsetHelper_simple {

    /**
     * Detect the standard charset for this system
     *
     * @return string an encoding (eg "UTF-8" or "Windows-1252")
     */
    function detectSystemCharset() {
	static $cacheKey = 'GalleryCharsetHelper_simple::detectSystemCharset';
	if (GalleryDataCache::containsKey($cacheKey)) {
	    return GalleryDataCache::get($cacheKey);
	}

	global $gallery;
	$phpVm = $gallery->getPhpVm();
	$sourceEncoding = GalleryCapabilities::get('systemCharset');

	/* Determine character set of current locale */
	if (empty($sourceEncoding) && $phpVm->function_exists('nl_langinfo')) {
	    /* @suppress windows warning: "nl_langinfo() is not supported in this PHP build" */
	    $sourceEncoding = @$phpVm->nl_langinfo(CODESET);
	}
	if (empty($sourceEncoding)) {
	    $lcTable = explode(';', $phpVm->setlocale(LC_ALL, '0'));
	    /*
	     * setlocale() returns a string like this:
	     *    LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=C;LC_COLLATE=C;LC_MONETARY=C;
	     *    LC_MESSAGES=C;LC_PAPER=C;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;
	     *    LC_MEASUREMENT=C;LC_IDENTIFICATION=C
	     */
	    foreach ($lcTable as $lc) {
		if (!strncmp($lc, "LC_CTYPE", 8)) {
		    if (($i = strpos($lc, '.')) !== false) {
			$sourceEncoding = substr($lc, $i + 1);
			break;
		    }
		}
	    }

	    if (empty($sourceEncoding)) {
		/* Unable to determine source encoding - no conversion possible */
		return $sourceEncoding;
	    }
	}

	/*
	 * Remap some known charset issues.
	 * See http://cvsweb.xfree86.org/cvsweb/xc/nls/locale.alias?rev=1.44
	 */
	switch($sourceEncoding) {
	case '1252':   /* Windows XP has LC_CTYPE=English_United States.1252 */
	    $sourceEncoding = 'Windows-1252';
	    break;
	case '1251':   /* Windows XP has LC_CTYPE=Russian_Russia.1251 */
	    $sourceEncoding = 'CP1251';
	    break;
	}
	/* FreeBSD may return charset with missing hyphen from nl_langinfo */
	$sourceEncoding = str_replace('ISO8859', 'ISO-8859', $sourceEncoding);
	/* Gentoo may return ANSI_X3.4-1968 */
	if (preg_match('{^ANSI[_-]}', $sourceEncoding)) {
	    $sourceEncoding = 'ASCII';
	}

	GalleryDataCache::put($cacheKey, $sourceEncoding);
	return $sourceEncoding;
    }

    /**
     * Convert the string from the source encoding to UTF8
     *
     * @param string the input string
     * @param string the source encoding (eg, 'ISO-8859-1'), defaults from current locale
     * @return string the result
     */
    function convertToUtf8($string, $sourceEncoding=null) {
	global $gallery;
	$phpVm = $gallery->getPhpVm();

	if (empty($sourceEncoding)) {
	    $sourceEncoding = GalleryCharsetHelper_simple::detectSystemCharset();
	}
	if (empty($sourceEncoding) || !strcmp($sourceEncoding, 'UTF-8')) {
	    return $string;
	}

	/* Iconv can return false, so try it first.  If it fails, continue */
	if ($phpVm->function_exists('iconv')) {
	    if (($result = $phpVm->iconv($sourceEncoding, 'UTF-8', $string)) !== false) {
		return $result;
	    }
	}

	if ($phpVm->function_exists('mb_convert_encoding')) {
	    return $phpVm->mb_convert_encoding($string, 'UTF-8', $sourceEncoding);
	} else if ($phpVm->function_exists('recode_string')) {
	    return $phpVm->recode_string($sourceEncoding . '..UTF-8', $string);
	} else {
	    GalleryCoreApi::relativeRequireOnce(
		'modules/core/classes/helpers/GalleryCharsetHelper_medium.class');
	    $charset =& GalleryCharsetHelper_medium::getCharsetTable($sourceEncoding);
	    if (isset($charset)) {
		return preg_replace('/([\x80-\xFF])/se',
				    '$charset[ord(\'$1\')]',
				    $string);
	    }
	}
	return $string;
    }

    /**
     * Convert the string from the internal encoding (ITF-8) to target encoding
     * Target encoding defaults to the system charset
     *
     * Warning: If neither iconv, mb_convert_encoding, or recode_string is available
     *          we will use a manual mapping from UTF-8 to the target encoding which is
     *          INCOMPLETE. That is, we only convert the characters that are in our
     *          charset tables from GalleryCharsetHelper_medium.
     *
     * @param string the input string
     * @param string the target encoding (eg, 'ISO-8859-1'), defaults to system charset
     * @return string the result
     */
    function convertFromUtf8($string, $targetEncoding=null) {
	global $gallery;
	$phpVm = $gallery->getPhpVm();

	if (empty($targetEncoding)) {
	    $targetEncoding = GalleryCharsetHelper_simple::detectSystemCharset();
	}
	if (empty($targetEncoding) || !strcmp($targetEncoding, 'UTF-8')) {
	    return $string;
	}

	/* Iconv can return false, so try it first.  If it fails, continue */
	if ($phpVm->function_exists('iconv')) {
	    if (($result =
		     $phpVm->iconv('UTF-8', $targetEncoding . '//IGNORE', $string)) !== false) {
		return $result;
	    }
	}

	if ($phpVm->function_exists('mb_convert_encoding')) {
	    return $phpVm->mb_convert_encoding($string, $targetEncoding, 'UTF-8');
	} else if ($phpVm->function_exists('recode_string')) {
	    return $phpVm->recode_string('UTF-8..' . $targetEncoding, $string);
	}  else {
	    GalleryCoreApi::relativeRequireOnce(
		'modules/core/classes/helpers/GalleryCharsetHelper_medium.class');
	    $sourceCharset =& GalleryCharsetHelper_medium::getCharsetTable($targetEncoding);
	    if (isset($sourceCharset)) {
		/*
		 * The charset table is organized for sourceEncoding -> UTF-8 conversion,
		 * we need the inverse charset table, UTF-8 -> targetEncoding
		 */
		$search = $replace = array();
		foreach ($sourceCharset as $key => $value) {
		    if (empty($value)) {
			continue;
		    }
		    /* Convert decimal value 128-255 to character */
		    $search[] = $value;
		    $replace[] = chr($key);
		}
		return str_replace($search, $replace, $string);
	    }
	}
	return $string;
    }
}
?>
