Revision: 1.17, Fri Mar 25 00:39:47 2005 UTC (3 months ago) by jgdavidson
Branch: MAIN
CVS Tags: HEAD
Changes since 1.16: +59 -119 lines
Simplified much of the code based on more basic configuration of default charsets and cooresponding encodings.
/*
 * The contents of this file are subject to the AOLserver Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://aolserver.com/.
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is AOLserver Code and related documentation
 * distributed by AOL.
 * 
 * The Initial Developer of the Original Code is America Online,
 * Inc. Portions created by AOL are Copyright (C) 1999 America Online,
 * Inc. All Rights Reserved.
 *
 * Alternatively, the contents of this file may be used under the terms
 * of the GNU General Public License (the "GPL"), in which case the
 * provisions of GPL are applicable instead of those above.  If you wish
 * to allow use of your version of this file only under the terms of the
 * GPL and not to allow others to use your version of this file under the
 * License, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the GPL.
 * If you do not delete the provisions above, a recipient may use your
 * version of this file under either the License or the GPL.
 */


/* 
 * urlencode.c --
 *
 *	Encode and decode URLs, as described in RFC 1738.
 */

static const char *RCSID = "@(#) $Header: /cvsroot/aolserver/aolserver/nsd/urlencode.c,v 1.17 2005/03/25 00:39:47 jgdavidson Exp $, compiled: " __DATE__ " " __TIME__;

#include "nsd.h"

/*
 * Static functions defined in this file.
 */

static Tcl_Encoding  GetUrlEncoding(char *charset);

/*
 * The following table is used for URL encoding and decoding
 * all 256 characters.
 */

struct {
    int   hex;	    /* Valid hex value or -1. */
    int   len;	    /* Length required to encode string. */
    char *str;	    /* String for multibyte encoded character. */
} enc[] = {
    {-1, 3, "00"}, {-1, 3, "01"}, {-1, 3, "02"}, {-1, 3, "03"}, 
    {-1, 3, "04"}, {-1, 3, "05"}, {-1, 3, "06"}, {-1, 3, "07"}, 
    {-1, 3, "08"}, {-1, 3, "09"}, {-1, 3, "0a"}, {-1, 3, "0b"}, 
    {-1, 3, "0c"}, {-1, 3, "0d"}, {-1, 3, "0e"}, {-1, 3, "0f"}, 
    {-1, 3, "10"}, {-1, 3, "11"}, {-1, 3, "12"}, {-1, 3, "13"}, 
    {-1, 3, "14"}, {-1, 3, "15"}, {-1, 3, "16"}, {-1, 3, "17"}, 
    {-1, 3, "18"}, {-1, 3, "19"}, {-1, 3, "1a"}, {-1, 3, "1b"}, 
    {-1, 3, "1c"}, {-1, 3, "1d"}, {-1, 3, "1e"}, {-1, 3, "1f"}, 
    {-1, 1, NULL}, {-1, 3, "21"}, {-1, 3, "22"}, {-1, 3, "23"}, 
    {-1, 3, "24"}, {-1, 3, "25"}, {-1, 3, "26"}, {-1, 3, "27"}, 
    {-1, 3, "28"}, {-1, 3, "29"}, {-1, 3, "2a"}, {-1, 3, "2b"}, 
    {-1, 3, "2c"}, {-1, 3, "2d"}, {-1, 3, "2e"}, {-1, 3, "2f"}, 
    { 0, 1, NULL}, { 1, 1, NULL}, { 2, 1, NULL}, { 3, 1, NULL}, 
    { 4, 1, NULL}, { 5, 1, NULL}, { 6, 1, NULL}, { 7, 1, NULL}, 
    { 8, 1, NULL}, { 9, 1, NULL}, {-1, 3, "3a"}, {-1, 3, "3b"}, 
    {-1, 3, "3c"}, {-1, 3, "3d"}, {-1, 3, "3e"}, {-1, 3, "3f"}, 
    {-1, 3, "40"}, {10, 1, NULL}, {11, 1, NULL}, {12, 1, NULL}, 
    {13, 1, NULL}, {14, 1, NULL}, {15, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 3, "5b"}, 
    {-1, 3, "5c"}, {-1, 3, "5d"}, {-1, 3, "5e"}, {-1, 3, "5f"}, 
    {-1, 3, "60"}, {10, 1, NULL}, {11, 1, NULL}, {12, 1, NULL}, 
    {13, 1, NULL}, {14, 1, NULL}, {15, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, 
    {-1, 1, NULL}, {-1, 1, NULL}, {-1, 1, NULL}, {-1, 3, "7b"}, 
    {-1, 3, "7c"}, {-1, 3, "7d"}, {-1, 3, "7e"}, {-1, 3, "7f"}, 
    {-1, 3, "80"}, {-1, 3, "81"}, {-1, 3, "82"}, {-1, 3, "83"}, 
    {-1, 3, "84"}, {-1, 3, "85"}, {-1, 3, "86"}, {-1, 3, "87"}, 
    {-1, 3, "88"}, {-1, 3, "89"}, {-1, 3, "8a"}, {-1, 3, "8b"}, 
    {-1, 3, "8c"}, {-1, 3, "8d"}, {-1, 3, "8e"}, {-1, 3, "8f"}, 
    {-1, 3, "90"}, {-1, 3, "91"}, {-1, 3, "92"}, {-1, 3, "93"}, 
    {-1, 3, "94"}, {-1, 3, "95"}, {-1, 3, "96"}, {-1, 3, "97"}, 
    {-1, 3, "98"}, {-1, 3, "99"}, {-1, 3, "9a"}, {-1, 3, "9b"}, 
    {-1, 3, "9c"}, {-1, 3, "9d"}, {-1, 3, "9e"}, {-1, 3, "9f"}, 
    {-1, 3, "a0"}, {-1, 3, "a1"}, {-1, 3, "a2"}, {-1, 3, "a3"}, 
    {-1, 3, "a4"}, {-1, 3, "a5"}, {-1, 3, "a6"}, {-1, 3, "a7"}, 
    {-1, 3, "a8"}, {-1, 3, "a9"}, {-1, 3, "aa"}, {-1, 3, "ab"}, 
    {-1, 3, "ac"}, {-1, 3, "ad"}, {-1, 3, "ae"}, {-1, 3, "af"}, 
    {-1, 3, "b0"}, {-1, 3, "b1"}, {-1, 3, "b2"}, {-1, 3, "b3"}, 
    {-1, 3, "b4"}, {-1, 3, "b5"}, {-1, 3, "b6"}, {-1, 3, "b7"}, 
    {-1, 3, "b8"}, {-1, 3, "b9"}, {-1, 3, "ba"}, {-1, 3, "bb"}, 
    {-1, 3, "bc"}, {-1, 3, "bd"}, {-1, 3, "be"}, {-1, 3, "bf"}, 
    {-1, 3, "c0"}, {-1, 3, "c1"}, {-1, 3, "c2"}, {-1, 3, "c3"}, 
    {-1, 3, "c4"}, {-1, 3, "c5"}, {-1, 3, "c6"}, {-1, 3, "c7"}, 
    {-1, 3, "c8"}, {-1, 3, "c9"}, {-1, 3, "ca"}, {-1, 3, "cb"}, 
    {-1, 3, "cc"}, {-1, 3, "cd"}, {-1, 3, "ce"}, {-1, 3, "cf"}, 
    {-1, 3, "d0"}, {-1, 3, "d1"}, {-1, 3, "d2"}, {-1, 3, "d3"}, 
    {-1, 3, "d4"}, {-1, 3, "d5"}, {-1, 3, "d6"}, {-1, 3, "d7"}, 
    {-1, 3, "d8"}, {-1, 3, "d9"}, {-1, 3, "da"}, {-1, 3, "db"}, 
    {-1, 3, "dc"}, {-1, 3, "dd"}, {-1, 3, "de"}, {-1, 3, "df"}, 
    {-1, 3, "e0"}, {-1, 3, "e1"}, {-1, 3, "e2"}, {-1, 3, "e3"}, 
    {-1, 3, "e4"}, {-1, 3, "e5"}, {-1, 3, "e6"}, {-1, 3, "e7"}, 
    {-1, 3, "e8"}, {-1, 3, "e9"}, {-1, 3, "ea"}, {-1, 3, "eb"}, 
    {-1, 3, "ec"}, {-1, 3, "ed"}, {-1, 3, "ee"}, {-1, 3, "ef"}, 
    {-1, 3, "f0"}, {-1, 3, "f1"}, {-1, 3, "f2"}, {-1, 3, "f3"}, 
    {-1, 3, "f4"}, {-1, 3, "f5"}, {-1, 3, "f6"}, {-1, 3, "f7"}, 
    {-1, 3, "f8"}, {-1, 3, "f9"}, {-1, 3, "fa"}, {-1, 3, "fb"}, 
    {-1, 3, "fc"}, {-1, 3, "fd"}, {-1, 3, "fe"}, {-1, 3, "ff"}
};


/*
 *----------------------------------------------------------------------
 *
 * Ns_EncodeUrlWithEncoding --
 *
 *	Take a URL and encode any non-alphanumeric characters into 
 *	%hexcode 
 *
 * Results:
 *	A pointer to the encoded string (which is part of the 
 *	passed-in DString's memory) 
 *
 * Side effects:
 *	Encoded URL will be copied to given dstring.
 *
 *----------------------------------------------------------------------
 */

char *
Ns_EncodeUrlWithEncoding(Ns_DString *dsPtr, char *string, Tcl_Encoding encoding)
{
    register int   i, n;
    register char *p, *q;
    Tcl_DString  ds;

    Tcl_DStringInit(&ds);
    if (encoding != NULL) {
        string = Tcl_UtfToExternalDString(encoding, string, -1, &ds);
    }

    /*
     * Determine and set the required dstring length.
     */

    p = string;
    n = 0;
    while ((i = UCHAR(*p)) != 0) {
	n += enc[i].len;
	++p;
    }
    i = dsPtr->length;
    Ns_DStringSetLength(dsPtr, dsPtr->length + n);

    /*
     * Copy the result directly to the pre-sized dstring.
     */

    q = dsPtr->string + i;
    p = string;
    while ((i = UCHAR(*p)) != 0) {
	if (UCHAR(*p) == ' ') {
	    *q++ = '+';
	} else if (enc[i].str == NULL) {
	    *q++ = *p;
	} else {
	    *q++ = '%';
	    *q++ = enc[i].str[0];
	    *q++ = enc[i].str[1];
	}
	++p;
    }
    Tcl_DStringFree(&ds);
    return dsPtr->string;
}



/*
 *----------------------------------------------------------------------
 *
 * Ns_EncodeUrlCharset --
 *
 *	Take a URL and encode any non-alphanumeric characters into 
 *	%hexcode 
 *
 * Results:
 *	A pointer to the encoded string (which is part of the 
 *	passed-in DString's memory) 
 *
 * Side effects:
 *	Encoded URL will be copied to given dstring.
 *
 *----------------------------------------------------------------------
 */

char *
Ns_EncodeUrlCharset(Ns_DString *dsPtr, char *string, char *charset)
{
    Tcl_Encoding encoding = GetUrlEncoding(charset);

    return Ns_EncodeUrlWithEncoding(dsPtr, string, encoding);

}

/*
 *----------------------------------------------------------------------
 *
 * Ns_DecodeUrlCharset --
 *
 *	Decode an encoded URL (with %hexcode, etc.).
 *
 * Results:
 *	A pointer to the dstring's value, containing the decoded 
 *	URL.
 *
 * Side effects:
 *	Decoded URL will be copied to given dstring.
 *
 *----------------------------------------------------------------------
 */

char *
Ns_DecodeUrlCharset(Ns_DString *dsPtr, char *string, char *charset)
{
    Tcl_Encoding  encoding = GetUrlEncoding(charset);

    return Ns_DecodeUrlWithEncoding(dsPtr, string, encoding);
}



/*
 *----------------------------------------------------------------------
 *
 * Ns_DecodeUrlWithEncoding --
 *
 *	Decode an encoded URL (with %hexcode, etc.).
 *
 * Results:
 *	A pointer to the dstring's value, containing the decoded 
 *	URL.
 *
 * Side effects:
 *	Decoded URL will be copied to given dstring.
 *
 *----------------------------------------------------------------------
 */

char *
Ns_DecodeUrlWithEncoding(Ns_DString *dsPtr, char *string, Tcl_Encoding encoding)
{
    register int i, j, n;
    register char *p, *q;
    char c;
    Ns_DString	 *outPtr, out;
    Tcl_DString   ds;

    /*
     * If using an encoding, copy output to a scratch buffer instead
     * of directly to given dstring.
     */

    if (encoding == NULL) {
	outPtr = dsPtr;
    } else {
    	Ns_DStringInit(&out);
	outPtr = &out;
    }

    /*
     * Expand the output to the length of the input
     * string which will be the largest size required.
     */

    n = strlen(string);
    i = dsPtr->length;
    Ns_DStringSetLength(outPtr, i + n);
    q = outPtr->string + i;
    p = string;
    while ((c = UCHAR(*p)) != '\0') {
	if (c == '%' && n > 2 &&
	    (i = enc[UCHAR(p[1])].hex) >= 0 &&
	    (j = enc[UCHAR(p[2])].hex) >= 0) {
	    *q = (unsigned char) ((i << 4) + j);
	    n -= 2;
	    p += 2;
	} else if (c == '+') {
	    *q = ' ';
	} else {
	    *q = c;
	}
	--n, ++q, ++p;
    }

    /*
     * Terminate the dstring, decoding to utf8 if necessary.
     */

    n = q - outPtr->string;
    if (outPtr == dsPtr) {
        Ns_DStringSetLength(dsPtr, n);
    } else {
        Tcl_ExternalToUtfDString(encoding, outPtr->string, n, &ds);
        Ns_DStringAppend(dsPtr, ds.string);
        Tcl_DStringFree(&ds);
    	Ns_DStringFree(outPtr);
    }
    return dsPtr->string;
}


/*
 *----------------------------------------------------------------------
 *
 * NsTclUrlEncodeObjCmd, NsTclUrlDecodeObjCmd --
 *
 *	Implements ns_urlencode and ns_urldecode.
 *
 * Results:
 *	Tcl result. 
 *
 * Side effects:
 *	See docs. 
 *
 *----------------------------------------------------------------------
 */

static int
EncodeObjCmd(Tcl_Interp *interp, int objc, Tcl_Obj *CONST objv[], int encode)
{
    Tcl_DString ds;
    char        *charset;
    char        *data;

    if (objc == 2) {
        charset = NULL;
        data = Tcl_GetString(objv[1]);
    } else if (objc == 4 && STREQ(Tcl_GetString(objv[1]), "-charset")) {
        charset = Tcl_GetString(objv[2]);
        data = Tcl_GetString(objv[3]);
    } else {
	Tcl_WrongNumArgs(interp, 1, objv, "?-charset charset? data");
        return TCL_ERROR;
    }
    Tcl_DStringInit(&ds);
    if (encode) {
	Ns_EncodeUrlCharset(&ds, data, charset);
    } else {
	Ns_DecodeUrlCharset(&ds, data, charset);
    }
    Tcl_DStringResult(interp, &ds);
    Tcl_DStringFree(&ds);
    return TCL_OK;
}

int
NsTclUrlDecodeObjCmd(ClientData arg, Tcl_Interp *interp, int objc,
		     Tcl_Obj *CONST objv[])
{
    return EncodeObjCmd(interp, objc, objv, 0);
}

int
NsTclUrlEncodeObjCmd(ClientData arg, Tcl_Interp *interp, int objc,
		     Tcl_Obj *CONST objv[])
{
    return EncodeObjCmd(interp, objc, objv, 1);
}


/*
 *----------------------------------------------------------------------
 *
 * GetUrlEncoding --
 *
 *	Get the encoding to use in Ns_EncodeUrl/Ns_DecodeUrl,
 *	utilizing server defaults for current connection if
 *	necessary.
 *
 * Results:
 *	A Tcl_Encoding.
 *
 * Side effects:
 *	None. 
 *
 *----------------------------------------------------------------------
 */

static Tcl_Encoding
GetUrlEncoding(char *charset)
{
    Tcl_Encoding  encoding = NULL;
    Ns_Conn *conn;

    if (charset != NULL) {
	encoding = Ns_GetCharsetEncoding(charset);
    }

    /*
     * Use server default for current connection, if any.
     */
 
    if (encoding == NULL && (conn = Ns_GetConn()) != NULL) {
	encoding = Ns_ConnGetUrlEncoding(conn);
    }
    return encoding;
}

Back to SourceForge.net

Powered by ViewCVS 1.0-dev