Revision: 1.16, Fri Mar 25 00:34:45 2005 UTC (3 months ago) by jgdavidson
Branch: MAIN
CVS Tags: HEAD
Changes since 1.15: +214 -130 lines
Added Ns_FindCharset and Ns_GetCharsetEncodingEx and simplified much of the charset/encoding code.
/*
 * The contents of this file are subject to the AOLserver Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://aolserver.com/.
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is AOLserver Code and related documentation
 * distributed by AOL.
 * 
 * The Initial Developer of the Original Code is America Online,
 * Inc. Portions created by AOL are Copyright (C) 1999 America Online,
 * Inc. All Rights Reserved.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU General Public License (the "GPL"), in which case the
 * provisions of GPL are applicable instead of those above.  If you wish
 * to allow use of your version of this file only under the terms of the
 * GPL and not to allow others to use your version of this file under the
 * License, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the GPL.
 * If you do not delete the provisions above, a recipient may use your
 * version of this file under either the License or the GPL.
 */

/*
 * encoding.c --
 *
 *	Defines standard default charset to encoding mappings.
 */

static const char *RCSID = "@(#) $Header: /cvsroot/aolserver/aolserver/nsd/encoding.c,v 1.16 2005/03/25 00:34:45 jgdavidson Exp $, compiled: " __DATE__ " " __TIME__;

#include "nsd.h"

/*
 * Local functions defined in this file.
 */

static void AddExtension(char *name, char *charset);
static void AddCharset(char *name, char *charset);

/*
 * Static variables defined in this file.
 */

static int		eid;
static Tcl_HashTable    encodings;
static Tcl_HashTable    charsets;
static Tcl_HashTable    extensions;
static Ns_Mutex		lock;
static Ns_Cond		cond;
#define ENC_LOCKED	((Tcl_Encoding) (-1))

/*
 * The default table maps file extension to Tcl encodings.
 */

static struct {
    char	   *extension;
    char	   *name;
} builtinExt[] = {
    {".txt", "ascii"},
    {".htm", "iso8859-1"},
    {".html", "iso8859-1"},
    {".adp", "iso8859-1"},
    {NULL, NULL}
};

/*
 * The following table provides charset aliases for Tcl encodings.
 */

static struct {
    char           *charset;
    char           *name;
} builtinChar[] = {
    { "iso-2022-jp", "iso2022-jp" },
    { "iso-2022-kr", "iso2022-kr" },
    { "iso-8859-1", "iso8859-1" },
    { "iso-8859-2", "iso8859-2" },
    { "iso-8859-3", "iso8859-3" },
    { "iso-8859-4", "iso8859-4" },
    { "iso-8859-5", "iso8859-5" },
    { "iso-8859-6", "iso8859-6" },
    { "iso-8859-7", "iso8859-7" },
    { "iso-8859-8", "iso8859-8" },
    { "iso-8859-9", "iso8859-9" },
    { "korean", "ksc5601" },
    { "ksc_5601", "ksc5601" },
    { "mac", "macRoman" },
    { "mac-centeuro", "macCentEuro" },
    { "mac-centraleupore", "macCentEuro" },
    { "mac-croatian", "macCroatian" },
    { "mac-cyrillic", "macCyrillic" },
    { "mac-greek", "macGreek" },
    { "mac-iceland", "macIceland" },
    { "mac-japan", "macJapan" },
    { "mac-roman", "macRoman" },
    { "mac-romania", "macRomania" },
    { "mac-thai", "macThai" },
    { "mac-turkish", "macTurkish" },
    { "mac-ukraine", "macUkraine" },
    { "maccenteuro", "macCentEuro" },
    { "maccentraleupore", "macCentEuro" },
    { "maccroatian", "macCroatian" },
    { "maccyrillic", "macCyrillic" },
    { "macgreek", "macGreek" },
    { "maciceland", "macIceland" },
    { "macintosh", "macRoman" },
    { "macjapan", "macJapan" },
    { "macroman", "macRoman" },
    { "macromania", "macRomania" },
    { "macthai", "macThai" },
    { "macturkish", "macTurkish" },
    { "macukraine", "macUkraine" },
    { "shift_jis", "shiftjis" },
    { "us-ascii", "ascii" },
    { "windows-1250", "cp1250" },
    { "windows-1251", "cp1251" },
    { "windows-1252", "cp1252" },
    { "windows-1253", "cp1253" },
    { "windows-1254", "cp1254" },
    { "windows-1255", "cp1255" },
    { "windows-1256", "cp1256" },
    { "windows-1257", "cp1257" },
    { "windows-1258", "cp1258" },
    { "x-mac", "macRoman" },
    { "x-mac-centeuro", "macCentEuro" },
    { "x-mac-centraleupore", "macCentEuro" },
    { "x-mac-croatian", "macCroatian" },
    { "x-mac-cyrillic", "macCyrillic" },
    { "x-mac-greek", "macGreek" },
    { "x-mac-iceland", "macIceland" },
    { "x-mac-japan", "macJapan" },
    { "x-mac-roman", "macRoman" },
    { "x-mac-romania", "macRomania" },
    { "x-mac-thai", "macThai" },
    { "x-mac-turkish", "macTurkish" },
    { "x-mac-ukraine", "macUkraine" },
    { "x-macintosh", "macRoman" },
    { NULL,     NULL }
};


/*
 *----------------------------------------------------------------------
 *
 * NsInitEncodings --
 *
 *	Add compiled-in default encodings.
 *
 * Results:
 *	None. 
 *
 * Side effects:
 *	None. 
 *
 *----------------------------------------------------------------------
 */

void
NsInitEncodings(void)
{
    int     i;

    /*
     * Allocate URL space id for input encodings.
     */

    eid = Ns_UrlSpecificAlloc();

    /*
     * Initialize hash table of encodings and charsets.
     */

    Ns_MutexSetName(&lock, "ns:encodings");
    Tcl_InitHashTable(&encodings, TCL_STRING_KEYS);
    Tcl_InitHashTable(&charsets, TCL_STRING_KEYS);
    Tcl_InitHashTable(&extensions, TCL_STRING_KEYS);

    /*
     * Add default charset and file mappings.
     */

    for (i = 0; builtinChar[i].charset != NULL; ++i) {
        AddCharset(builtinChar[i].charset, builtinChar[i].name);
    }
    for (i = 0; builtinExt[i].extension != NULL; ++i) {
	AddExtension(builtinExt[i].extension, builtinExt[i].name);
    }
}


/*
 *----------------------------------------------------------------------
 *
 * NsUpdateEncodings --
 *
 *	Add additional configured encodings.
 *
 * Results:
 *	None. 
 *
 * Side effects:
 *	None. 
 *
 *----------------------------------------------------------------------
 */

void
NsUpdateEncodings(void)
{
    Ns_Set *set;
    int     i;

    /*
     * Add configured charsets and file mappings.
     */

    set = Ns_ConfigGetSection("ns/charsets");
    for (i = 0; set != NULL && i < Ns_SetSize(set); ++i) {
	AddCharset(Ns_SetKey(set, i), Ns_SetValue(set, i));
    }
    set = Ns_ConfigGetSection("ns/encodings");
    for (i = 0; set != NULL && i < Ns_SetSize(set); ++i) {
	AddExtension(Ns_SetKey(set, i), Ns_SetValue(set, i));
    }
}


/*
 *----------------------------------------------------------------------
 *
 * Ns_GetFileEncoding --
 *
 *	Return the Tcl_Encoding for the given file.  Note this may
 *	not be the same as the encoding for the charset of the
 *	file's mimetype.
 *
 * Results:
 *	Tcl_Encoding or NULL if not found.
 *
 * Side effects:
 *	See LoadEncoding().
 *
 *----------------------------------------------------------------------
 */

Tcl_Encoding
Ns_GetFileEncoding(char *file)
{
    Tcl_HashEntry *hPtr;
    char *ext, *name;

    ext = strrchr(file, '.');
    if (ext != NULL) {
	hPtr = Tcl_FindHashEntry(&extensions, ext);
	if (hPtr != NULL) {
	    name = Tcl_GetHashValue(hPtr);
	    return Ns_GetEncoding(name);
	}
    }
    return NULL;
}


/*
 *----------------------------------------------------------------------
 *
 * Ns_GetTypeEncoding --
 *
 *	Return the Tcl_Encoding for the given Content-type header,
 *	e.g., "text/html; charset=iso-8859-1" returns Tcl_Encoding
 *	for iso8859-1.
 *      This function will utilize the ns/parameters/OutputCharset
 *      config parameter if given a content-type "text/<anything>" with
 *      no charset.
 *      When no OutputCharset defined, the fall-back behavior is to
 *      return NULL.
 *
 * Results:
 *	Tcl_Encoding or NULL if not found.
 *
 * Side effects:
 *	See LoadEncoding().
 *
 *----------------------------------------------------------------------
 */

Tcl_Encoding
Ns_GetTypeEncoding(char *type)
{
    char *charset;
    int len;

    charset = Ns_FindCharset(type, &len);
    return (charset ? Ns_GetCharsetEncodingEx(charset, len) : NULL);
}


/*
 *----------------------------------------------------------------------
 *
 * Ns_GetCharsetEncodingEx, Ns_GetCharsetEncoding --
 *
 *	Return the Tcl_Encoding for the given charset, e.g.,
 *	"iso-8859-1" returns Tcl_Encoding for iso8859-1.
 *
 * Results:
 *	Tcl_Encoding or NULL if not found.
 *
 * Side effects:
 *	See LoadEncoding().
 *
 *----------------------------------------------------------------------
 */

Tcl_Encoding
Ns_GetCharsetEncoding(char *charset)
{
    return Ns_GetCharsetEncodingEx(charset, -1);
}

Tcl_Encoding
Ns_GetCharsetEncodingEx(char *charset, int len)
{
    Tcl_HashEntry *hPtr;
    Tcl_Encoding encoding;
    Ns_DString ds;

    /*
     * Cleanup the charset name and check for an
     * alias (e.g., iso-8859-1 = iso8859-1) before
     * assuming the charset and Tcl encoding names
     * match (e.g., big5).
     */
    
    Ns_DStringInit(&ds);
    Ns_DStringNAppend(&ds, charset, len);
    charset = Ns_StrTrim(Ns_StrToLower(ds.string));
    hPtr = Tcl_FindHashEntry(&charsets, charset);
    if (hPtr != NULL) {
	charset = Tcl_GetHashValue(hPtr);
    }
    encoding = Ns_GetEncoding(charset);
    Ns_DStringFree(&ds);
    return encoding;
}


/*
 *----------------------------------------------------------------------
 *
 * Ns_GetEncoding --
 *
 *	Return the Tcl_Encoding for the given charset.
 *
 * Results:
 *	Tcl_Encoding or NULL if not found.
 *
 * Side effects:
 *	See GetEncoding().
 *
 *----------------------------------------------------------------------
 */

Tcl_Encoding
Ns_GetEncoding(char *name)
{
    Tcl_Encoding encoding;
    Tcl_HashEntry *hPtr;
    int new;

    Ns_MutexLock(&lock);
    hPtr = Tcl_CreateHashEntry(&encodings, name, &new);
    if (!new) {
	while ((encoding = Tcl_GetHashValue(hPtr)) == ENC_LOCKED) {
	    Ns_CondWait(&cond, &lock);
	}
    } else {
	Tcl_SetHashValue(hPtr, ENC_LOCKED);
	Ns_MutexUnlock(&lock);
	encoding = Tcl_GetEncoding(NULL, name);
	if (encoding == NULL) {
	    Ns_Log(Warning, "encoding: could not load: %s", name);
	} else {
	    Ns_Log(Notice, "encoding: loaded: %s", name);
	}
	Ns_MutexLock(&lock);
	Tcl_SetHashValue(hPtr, encoding);
	Ns_CondBroadcast(&cond);
    }
    Ns_MutexUnlock(&lock);
    return encoding;
}


/*
 *----------------------------------------------------------------------
 *
 * Ns_FindCharset --
 *
 *	Find start end length of charset within a type string.
 *
 * Results:
 *	Pointer to start of charset or NULL on no charset.
 *
 * Side effects:
 *	Will update lenPtr with length of charset if found.
 *
 *----------------------------------------------------------------------
 */

char *
Ns_FindCharset(char *type, int *lenPtr)
{
    char *start, *end;

    start = Ns_StrCaseFind(type, "charset");
    if (start != NULL) {
	start += 7;
	start += strspn(start, " ");
	if (*start++ == '=') {
	    start += strspn(start, " ");
	    end = start;
	    while (*end && !isspace(UCHAR(*end))) {
		++end;
	    }
	    *lenPtr = end - start;
	    return start;
	}
    }
    return NULL;
}


/*
 *----------------------------------------------------------------------
 *
 * NsTclRegisterEncodingObjCmd --
 *
 *	Implements ns_register_encoding command to register an input
 *	encoding for a given method/URL.
 *
 * Results:
 *	Standard Tcl result. 
 *
 * Side effects:
 *	May register a new encoding for given method/URL.
 *
 *----------------------------------------------------------------------
 */

int
NsTclRegisterEncodingObjCmd(ClientData data, Tcl_Interp *interp, int objc,
			   Tcl_Obj **objv)
{
    NsInterp   *itPtr = data;
    int         flags, idx;
    Tcl_Encoding encoding;
    char       *server, *method, *url, *charset;

    if (objc != 4 && objc != 5) {
badargs:
        Tcl_WrongNumArgs(interp, 1, objv, "?-noinherit? method url charset");
        return TCL_ERROR;
    }
    if (STREQ(Tcl_GetString(objv[1]), "-noinherit")) {
	if (objc < 5) {
	    goto badargs;
	}
	flags = NS_OP_NOINHERIT;
	idx = 2;
    } else {
	if (objc == 7) {
	    goto badargs;
	}
	flags = 0;
	idx = 1;
    }
    server = itPtr->servPtr->server;
    method = Tcl_GetString(objv[idx++]);
    url = Tcl_GetString(objv[idx++]);
    charset = Tcl_GetString(objv[idx++]);
    encoding = Ns_GetCharsetEncoding(charset);
    if (encoding == NULL) {
	Tcl_AppendResult(interp, "no encoding for charset: ", charset, NULL);
	return TCL_ERROR;
    }
    Ns_UrlSpecificSet(server, method, url, eid, encoding, flags, NULL);
    return TCL_OK;
}


/*
 *----------------------------------------------------------------------
 *
 * NsTclCharsetsCmd --
 *
 *      Tcl command to get the list of charsets for which we have encodings.
 *
 * Results:
 *	TCL_OK
 *
 * Side effects:
 *	Sets Tcl interpreter result.
 *
 *----------------------------------------------------------------------
 */

int
NsTclCharsetsCmd(ClientData dummy, Tcl_Interp *interp, int argc,
    char *argv[])
{
    Tcl_HashEntry  *entry;
    Tcl_HashSearch  search;

    Ns_MutexLock(&lock);
    entry = Tcl_FirstHashEntry(&charsets, &search);
    while (entry != NULL) {
	Tcl_AppendElement(interp, (char *) Tcl_GetHashKey(&charsets, entry));
	entry = Tcl_NextHashEntry(&search);
    }
    Ns_MutexUnlock(&lock);
    return TCL_OK;
}


/*
 *----------------------------------------------------------------------
 *
 * NsTclEncodingForCharsetCmd --
 *
 *      Return the name of the encoding for the specified charset.
 *
 * Results:
 *	Tcl result contains an encoding name or "".
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

int
NsTclEncodingForCharsetCmd(ClientData dummy, Tcl_Interp *interp, int argc,
                           char **argv)
{
    Tcl_Encoding encoding;

    if (argc != 2) {
        Tcl_AppendResult(interp, "usage: ", argv[0], " charset", NULL);
	return TCL_ERROR;
    }

    encoding = Ns_GetCharsetEncoding(argv[1]);
    if (encoding == NULL) {
	return TCL_OK;
    }

    Tcl_SetResult(interp, (char*)Tcl_GetEncodingName(encoding), TCL_VOLATILE);
    return TCL_OK;
}


/*
 *----------------------------------------------------------------------
 *
 * NsGetInputEncoding --
 *
 *	Return the registered input encoding for the connection
 *	method/url, if any.
 *
 * Results:
 *	Pointer to Tcl_Encoding or NULL if no encoding registered.
 *
 * Side effects:
 *	None.
 *
 *----------------------------------------------------------------------
 */

Tcl_Encoding
NsGetInputEncoding(Conn *connPtr)
{
    Tcl_Encoding encoding;

    encoding = Ns_UrlSpecificGet(connPtr->server, connPtr->request->method,
			     	 connPtr->request->url, eid);
    if (encoding == NULL) {
	encoding = connPtr->servPtr->inputEncoding;
    }
    return encoding;
}


/*
 *----------------------------------------------------------------------
 *
 * AddCharset, AddExtension --
 *
 *	Add charsets and extensions to hash tables.
 *
 * Results:
 *	None. 
 *
 * Side effects:
 *	None. 
 *
 *----------------------------------------------------------------------
 */

static void
AddExtension(char *ext, char *name)
{
    Tcl_HashEntry  *hPtr;
    int             new;

    hPtr = Tcl_CreateHashEntry(&extensions, ext, &new);
    Tcl_SetHashValue(hPtr, name);
}

static void
AddCharset(char *charset, char *name)
{
    Tcl_HashEntry  *hPtr;
    Ns_DString	    ds;
    int             new;

    Ns_DStringInit(&ds);
    charset = Ns_StrToLower(Ns_DStringAppend(&ds, charset));
    hPtr = Tcl_CreateHashEntry(&charsets, charset, &new);
    Tcl_SetHashValue(hPtr, name);
    Ns_DStringFree(&ds);
}


Tcl_Encoding
NsGetOutputEncoding(Conn *connPtr)
{
    Tcl_Encoding encoding;
    char *type, *charset;
    int len;

    /*
     * Determine the output type based on the charset for text/
     * types, using the server default charset if necessary.
     */

   encoding = NULL;
   type = Ns_GetMimeType(connPtr->request->url);
    if (type != NULL && (strncmp(type, "text/", 5) == 0)) {
    	charset = Ns_FindCharset(type, &len);
    	if (charset == NULL) {
	    charset = connPtr->servPtr->defcharset;
	    len = -1;
	}
    	if (charset != NULL) {
	    encoding = Ns_GetCharsetEncodingEx(charset, len);
	}
    }
    return encoding;
}

Back to SourceForge.net

Powered by ViewCVS 1.0-dev