Добавил:
Опубликованный материал нарушает ваши авторские права? Сообщите нам.
Вуз: Предмет: Файл:
C++ Timesaving Techniques (2005) [eng].pdf
Скачиваний:
65
Добавлен:
16.08.2013
Размер:
8.35 Mб
Скачать

338 Technique 56: Encoding and Decoding Data for the Web

Creating the URL Codec Class

In the technical world, a “codec” is a compressor/ decompressor, normally used for compressing audio or video formats into a smaller size. However, the concept is very applicable to what we are doing with text because we are working with streams of data that are similar to video and audio formats. For the purposes of this technique, we are going to create a simple class that understands how to encode a string so that it can be used with existing Web browsers. Each character in the string will be examined, and if it is not in a valid format for the Web, will be encoded to use the proper syntax. Here’s how it works:

1. In the code editor of your choice, create a new file to hold the code for the source file of the technique.

In this example, the file is named ch56.cpp, although you can use whatever you choose. This file will contain the class definition for our automation object.

2. Type the code in Listing 56-1 into your file.

Better yet, copy the code from the source file on this book’s companion Web site.

LISTING 56-1: DATA ENCODING AND DECODING

#include <string> #include <iostream>

using namespace std;

class URLCodec

{

string _url; protected:

//Convert a hex string to an ASCII representation.

char htoa (int number)

{

if ((number >= 0) && (number <= 9)) return (‘0’ + number);

else if ((number >= 10) && (number <= 15))

return (‘A’ + number - 10);

else

return (‘X’);

}

//Convert an ASCII string into a hex digit.

char atoh (unsigned char character)

{

if ((character >= ‘0’) && (character <= ‘9’))

return (character - ‘0’); else if ((character >= ‘A’) &&

(character <= ‘F’))

return (character - ‘A’ + 10); else if ((character >= ‘a’) &&

(character <= ‘f’))

return (character - ‘a’ + 10);

else

return (0);

}

public:

URLCodec( void )

{

_url = “”;

}

URLCodec( const char *strIn )

{

_url = strIn;

}

URLCodec( const URLCodec& aCopy )

{

_url = aCopy._url;

}

URLCodec operator=( const URLCodec& aCopy )

{

_url = aCopy._url; return *this;

}

void setURL ( const char *strIn )

{

_url = strIn;

}

void setURL ( const string& sIn )

{

Creating the URL Codec Class 339

_url = sIn;

}

string getURL ( void )

{

return _url;

}

 

 

string encode()

 

1

{

 

int index; string encoded;

//Make a copy of the string. encoded = _url;

//Scan the input string backward. index = encoded.length();

while (index--)

{

//Check for special characters. if (!isalnum((unsigned

{ char)encoded[index]))

2

unsigned char special;

 

 

char insert;

 

 

special = (unsigned char)

 

encoded[index];

 

 

encoded.erase (index, 1);

 

insert = htoa (special %

 

 

16);

 

 

encoded.insert (index,

 

 

&insert, 1);

 

 

insert = htoa (special /

 

 

16);

 

 

encoded.insert (index,

 

 

&insert, 1);

 

 

insert = ‘%’;

 

 

encoded.insert (index,

 

 

&insert, 1);

 

 

}

 

 

}

 

 

return (encoded);

 

 

}

 

 

string encode_no_xml()

 

3

{

 

int index; string encoded;

//Make a copy of the string. encoded = _url;

//Scan the input string backward. index = encoded.length();

while (index--)

{

//Check for special characters. if ((!isalnum((unsigned

char)encoded[index])) && (encoded[index] != ‘ ‘) && (encoded[index] != ‘<’) && (encoded[index] != ‘>’) && (encoded[index] != ‘_’) && (encoded[index] != ‘\n’) && (encoded[index] != ‘/’) && (encoded[index] != ‘“‘) && (encoded[index] != ‘\’’))

{

unsigned char special; char insert;

special = (unsigned char) encoded[index];

encoded.erase (index, 1); insert = htoa (special %

16);

encoded.insert (index, &insert, 1);

insert = htoa (special / 16);

encoded.insert (index, &insert, 1);

insert = ‘%’; encoded.insert (index,

&insert, 1);

}

}

 

 

return (encoded);

 

 

}

 

 

string decode()

 

4

{

 

int index; string decoded;

// Make a copy of the string. decoded = _url;

(continued)

340 Technique 56: Encoding and Decoding Data for the Web

LISTING 56-1 (continued)

// Scan input string forwards index = 0;

while (index < decoded.length())

{

// Check for encoded characters. if (decoded[index] == ‘%’)

{

unsigned char special;

special = (unsigned char) atoh(decoded[index+1]) * 16;

special += (unsigned char) atoh(decoded[index+2]); decoded.erase (index, 3); decoded.insert (index, (char

*)&special, 1);

}

index++;

}

return (decoded);

}

};

This class will handle the encoding and decoding of URLs, as well as storing a generic URL string. Each character in the string is examined, starting at the rear of the string and working backwards, so that we can properly interpret characters as we need to.

There are two forms of the encode method shown here:

The first, shown at the line marked 1,

encodes all characters for the string in standard URL format. This is done at the loop, shown by the line marked 2. Each character is checked to see if it is in the valid alphanumeric order, and if not, it is replaced by its hex equivalent.

The second, shown at the line marked with 3, does the same thing, but does not

encode XML characters that some applications for the Web will need.

If you are working with standard URLs for the Web, use the first version. If you are working with Java applets or .Net applications running on the Web that are expecting valid XML characters, use the second. In any case, you may use the decode method, shown at 4, to decode the characters into a human-readable string.

3. Save the source code in the code editor.

Testing the URL Codec Class

After you create a class, you should create a test driver that not only ensures that your code is correct, but also shows people how to use your code.

The following steps show you how to create a test driver that illustrates various kinds of input from the user, and shows how the class is intended to be used.

1. In the code editor of your choice, reopen the source file to hold the code for your test program.

In this example, I named the test program ch56.cpp.

2. Type the code from Listing 56-2 into your file.

Better yet, copy the code from the source file on this book’s companion Web site.

LISTING 56-2: THE URL CODEC TEST DRIVER

int main(int argc, char **argv)

{

if ( argc < 2 )

{

cout << “Usage: ch7_1 url1 [url2 url3]” << endl;

cout << “Where: url[n] is the url you wish to see encoded/decoded” << endl;

return -1;

}

Testing the URL Codec Class 341

for ( int i=1;

i<argc; ++i )

 

 

{

 

 

 

 

URLCodec url( argv[i] );

 

 

// First, try decoding it.

 

5

string enc

= url.encode();

 

string dec

= url.decode();

 

cout

<< “Input String: “ << argv[i]

<<

endl;

 

 

 

cout

<< “Encoded: “ << enc.c_str()

 

<<

endl;

 

 

 

cout

<< “Decoded: “ << dec.c_str()

 

<<endl;

//Now try decoding the result. URLCodec enc_url( enc.c_str() ); string enc1 = url.encode(); string dec1 = url.decode();

cout << “Input String: “ << enc_url.getURL().c_str() << endl;

cout << “Encoded: “ << enc1.c_str()

<<endl;

cout << “Decoded: “ << dec1.c_str() << endl;

}

return 0;

}

The test driver code above simply allows you to test out the functionality of the encode and decode methods of the URLCodec class. If you enter a string from the command line to the application, it will print out the encoded and decoded versions of the string. There is nothing really magical about this application. As you can see from the listing, the code first tries to encode the string you give it (shown at 5) and then decodes the result of that encoding to see if they are the same. The second block of code then encodes the result and decodes it to ensure that the code is working properly. When all is said and done, you should see the same input and output to the console.

3. Save the source-code file in the editor and close the editor application.

4. Compile the source file with your favorite compiler, on your favorite operating system.

5. Run the application on your favorite operating system.

If you have done everything right, you can produce a session similar to the one shown in Listing 56-3 on your console window.

LISTING 56-3: OUTPUT FROM THE TEST DRIVER

$ ./a.exe “http://this is a bad url” “http://localhost/c:/x*.xml”

Input String: http://this is a bad url Encoded: http%3A%2F%2Fthis%20is%

20a%20bad%20url

Decoded: http://this is a bad url

Input String: http%3A%2F%2Fthis%20is%20a% 20bad%20url

Encoded: http%3A%2F%2Fthis%20is%20 a%20bad%20url

Decoded: http://this is a bad url

Input String: http://localhost/c:/x*.xml Encoded: http%3A%2F%2Flocalhost%2Fc%3A%

2Fx%2A%2Exml

Decoded: http://localhost/c:/x*.xml

Input String: http%3A%2F%2Flocalhost%2Fc%3A% 2Fx%2A%2Exml

Encoded: http%3A%2F%2Flocalhost%2Fc%3A% 2Fx%2A%2Exml

Decoded: http://localhost/c:/x*.xml

Note that input strings on the command line must be enclosed in quotes; otherwise, they will be parsed into separate words on the space breaks.

As you can see, the input is properly converted into the encoded version of the URL string that can be used by Web browsers or servers. The decoded version is what you would expect it to be, in a form that can be used by any application.

342 Technique 56: Encoding and Decoding Data for the Web

Whenever you are exchanging data with a Web-based application, encode the data you send; expect the data you get back to be encoded from the application, too. Prepare your code to deal with encoding and decoding this information. If it turns out that the data

does not need to be encoded or decoded, you will have wasted a small amount of time. But if the data does need encoding/decoding, you will have saved a lot of time that would otherwise be spent figuring out why your data looks strange and breaks things.

57 Encrypting and

Decrypting Strings

Technique

Save Time By

Protecting data with encryption

Understanding and implementing the Rot13 algorithm

Understanding and implementing the XOR algorithm

Interpreting output

It would be very nice if we could all trust everyone around us not to view or access our private information. Unfortunately, not everyone is quite as trustworthy as you or I. The fact of the matter is that sensitive

information, such as passwords, user names, and credit card numbers, simply should not be stored in a readily readable fashion. If we fail to hide the information in some way, we can be very sure that the information will find its way to every cracker on the Internet and be used in all sorts of evil and insidious ways. Hiding information is a task normally accomplished by encryption — translating data from a human-readable format to a non-human-readable format. There are almost as many ways to encrypt data as there are to create it in the first place. Serious encryption methods, such as the RSS or Blowfish encryption algorithms are very complex; they would take pages and pages to explain (and in the end, they’d still be about as hard to understand).

This technique looks at two very simple — but effective — methods of encrypting data from prying eyes: the Rot13 algorithm and the XOR algorithm (XOR stands for “Exclusive Or”). Both methods can defeat casual snoopers, but they’re not foolproof; I wouldn’t recommend using either method for industrial-strength applications. It is difficult, if not impossible, to add encryption to an application after it’s been written. In order to make a secure system, encryption should be included as early in the process as possible. By adding these algorithms at the design phase, you will save time and effort and create a more secure system.

Selecting an encryption method is almost as sensitive an issue as selecting a programmer’s editor or compiler. You can save a lot of time by selecting a standard algorithm that provides the level of security your system needs. If you are writing a simple in-house application, XOR encryption is probably more than secure enough. On the other hand, if you are writing a medical-storage application (that is, one that allows access to a database of patient information) that allows access via the Internet, choose a much stronger method, such as the Blowfish algorithm.

344 Technique 57: Encrypting and Decrypting Strings

Implementing the Rot13

Algorithm

The Rot13 algorithm is really a very simple way of encoding data that makes that data difficult to read, but is almost trivial to decode. The algorithm, as the name suggests, simply rotates characters 13 places in the alphabet. Therefore, an A becomes an N and so forth. The algorithm wraps around, so anything past Z goes back to A. The following steps show you how to create a simple class that can both encode and decode Rot13 strings. This class is certainly not industrial-strength encryption, but it will make it difficult for the average person to read your strings.

1. In the code editor of your choice, create a new file to hold the code for the source file of the technique.

In this example, the file is named ch57.cpp, although you can use whatever you choose. This file will contain the class definition for your automation object.

2. Type the code from Listing 57-1 into your file.

Better yet, copy the code from the source file on this book’s companion Web site.

LISTING 57-1: THE ROT13 ALGORITHM CODE

#include <string> #include <iostream>

using namespace std;

class Rot13Encryption

{

private:

string _encrypt; protected:

string rot13(const string& strIn)

{

string sOut = “”;

for ( int i=0; i<(int)strIn.size(); ++i )

{

char ch = strIn[i];

 

 

// the following assumes that

 

‘a’ +

25

== ‘z’ and

 

1

‘A’ +

25

== ‘Z’, etc.

 

 

 

 

 

if( (ch >= ‘N’ && ch <= ‘Z’) ||

 

(ch >=

‘n’ && ch <= ‘z’) )

 

 

ch -= 13;

 

 

else if(

(ch >= ‘A’ && ch <=

 

 

‘M’) || (ch >= ‘a’ && ch <=

 

‘m’) )

 

 

 

 

ch += 13;

 

 

sOut += ch;

 

 

 

}

return sOut;

}

public:

Rot13Encryption(void)

{

}

Rot13Encryption( const char *strIn )

{

if ( strIn )

{

_encrypt = rot13( strIn );

}

}

Rot13Encryption( const Rot13Encryption& aCopy )

{

_encrypt = aCopy._encrypt;

}

Rot13Encryption operator=( const Rot13Encryption& aCopy )

{

_encrypt = aCopy._encrypt; return *this;

}

string operator=( const char *strIn )

{

if ( strIn )

{

_encrypt = rot13( strIn );

}

return _encrypt;

}

const char *operator<<( const char *strIn )

{