This is a Python module that implements Double URL encoding, as per the description of 'getfields' in the Google 'Search Protocol Reference' documentation at http://code.google.com/enterprise/documentation/xml_reference.html#request_parameters. Built-in test cases are the same ones that Google uses in their documentation.
This function also serves as a url_encode function that is more similar to other languages, omitting the 'variablename=' portion of the output.
from urllib import urlencode def testcase(text, result): """Test to see if the double_urlencode function's output for an input of 'text' matches 'result'.""" text = double_urlencode(text) if (result != text): print "Error: double_urlencode testcase failure :(" # Pretty up the problem for debugging purposes print text print result buf = "" for i in range(0, len(result)): try: if text[i] == result[i]: buf += " " else: buf += "^" except IndexError: buf += "*" print buf def double_urlencode(text): """double URL-encode a given 'text'. Do not return the 'variablename=' portion.""" text = single_urlencode(text) text = single_urlencode(text) return text def single_urlencode(text): """single URL-encode a given 'text'. Do not return the 'variablename=' portion.""" blah = urlencode({'blahblahblah':text}) #we know the length of the 'blahblahblah=' is equal to 13. This lets us avoid any messy string matches blah = blah[13:] return blah #examples are from 'http://code.google.com/enterprise/documentation/xml_reference.html#appendix_url_escaping' testcase("William Shakespeare", "William%2BShakespeare") testcase("admission form site:www.stanford.edu","admission%2Bform%2Bsite%253Awww.stanford.edu")