This is a Python module that implements Double URL encoding, as per the description of 'getfields' in the Google 'Search Protocol Reference' documentation at http://code.google.com/enterprise/documentation/xml_reference.html#request_parameters. Built-in test cases are the same ones that Google uses in their documentation.
This function also serves as a url_encode function that is more similar to other languages, omitting the 'variablename=' portion of the output.
from urllib import urlencode
def testcase(text, result):
"""Test to see if the double_urlencode function's output for an input of 'text' matches 'result'."""
text = double_urlencode(text)
if (result != text):
print "Error: double_urlencode testcase failure :("
# Pretty up the problem for debugging purposes
print text
print result
buf = ""
for i in range(0, len(result)):
try:
if text[i] == result[i]:
buf += " "
else:
buf += "^"
except IndexError:
buf += "*"
print buf
def double_urlencode(text):
"""double URL-encode a given 'text'. Do not return the 'variablename=' portion."""
text = single_urlencode(text)
text = single_urlencode(text)
return text
def single_urlencode(text):
"""single URL-encode a given 'text'. Do not return the 'variablename=' portion."""
blah = urlencode({'blahblahblah':text})
#we know the length of the 'blahblahblah=' is equal to 13. This lets us avoid any messy string matches
blah = blah[13:]
return blah
#examples are from 'http://code.google.com/enterprise/documentation/xml_reference.html#appendix_url_escaping'
testcase("William Shakespeare", "William%2BShakespeare")
testcase("admission form site:www.stanford.edu","admission%2Bform%2Bsite%253Awww.stanford.edu")