Eli Fulkerson .com HomeProjectsPython-double-urlencode

Double URL-encode


This is a Python module that implements Double URL encoding, as per the description of 'getfields' in the Google 'Search Protocol Reference' documentation at http://code.google.com/enterprise/documentation/xml_reference.html#request_parameters. Built-in test cases are the same ones that Google uses in their documentation.

This function also serves as a url_encode function that is more similar to other languages, omitting the 'variablename=' portion of the output.

from urllib import urlencode

def testcase(text, result):
   """Test to see if the double_urlencode function's output for an input of 'text' matches 'result'."""
   text = double_urlencode(text)

   if (result != text):
       print "Error: double_urlencode testcase failure :("

       # Pretty up the problem for debugging purposes
       print text
       print result

       buf = ""
       for i in range(0, len(result)):
               if text[i] == result[i]:
                   buf += " "
                   buf += "^"
           except IndexError:
               buf += "*"

       print buf

def double_urlencode(text):
   """double URL-encode a given 'text'.  Do not return the 'variablename=' portion."""

   text = single_urlencode(text)
   text = single_urlencode(text)

   return text

def single_urlencode(text):
   """single URL-encode a given 'text'.  Do not return the 'variablename=' portion."""

   blah = urlencode({'blahblahblah':text})

   #we know the length of the 'blahblahblah=' is equal to 13.  This lets us avoid any messy string matches
   blah = blah[13:]

   return blah

#examples are from 'http://code.google.com/enterprise/documentation/xml_reference.html#appendix_url_escaping'
testcase("William Shakespeare", "William%2BShakespeare")
testcase("admission form site:www.stanford.edu","admission%2Bform%2Bsite%253Awww.stanford.edu")