root / branches / tbeta / ccv_ofx_0061 / Windows / addons / ofxXmlSettings / libs / tinyxmlparser.cpp @ 214

View | Annotate | Download (38.9 KB)

1
/*
2
www.sourceforge.net/projects/tinyxml
3
Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
4
5
This software is provided 'as-is', without any express or implied 
6
warranty. In no event will the authors be held liable for any 
7
damages arising from the use of this software.
8
9
Permission is granted to anyone to use this software for any 
10
purpose, including commercial applications, and to alter it and 
11
redistribute it freely, subject to the following restrictions:
12
13
1. The origin of this software must not be misrepresented; you must 
14
not claim that you wrote the original software. If you use this
15
software in a product, an acknowledgment in the product documentation
16
would be appreciated but is not required.
17
18
2. Altered source versions must be plainly marked as such, and 
19
must not be misrepresented as being the original software.
20
21
3. This notice may not be removed or altered from any source 
22
distribution.
23
*/
24
25
#include <ctype.h>
26
#include <stddef.h>
27
28
#include "tinyxml.h"
29
30
//#define DEBUG_PARSER
31
#if defined( DEBUG_PARSER )
32
#        if defined( DEBUG ) && defined( _MSC_VER )
33
#                include <windows.h>
34
#                define TIXML_LOG OutputDebugString
35
#        else
36
#                define TIXML_LOG printf
37
#        endif
38
#endif
39
40
// Note tha "PutString" hardcodes the same list. This
41
// is less flexible than it appears. Changing the entries
42
// or order will break putstring.        
43
TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] = 
44
{
45
        { "&amp;",  5, '&' },
46
        { "&lt;",   4, '<' },
47
        { "&gt;",   4, '>' },
48
        { "&quot;", 6, '\"' },
49
        { "&apos;", 6, '\'' }
50
};
51
52
// Bunch of unicode info at:
53
//                http://www.unicode.org/faq/utf_bom.html
54
// Including the basic of this table, which determines the #bytes in the
55
// sequence from the lead byte. 1 placed for invalid sequences --
56
// although the result will be junk, pass it through as much as possible.
57
// Beware of the non-characters in UTF-8:        
58
//                                ef bb bf (Microsoft "lead bytes")
59
//                                ef bf be
60
//                                ef bf bf 
61
62
const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
63
const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
64
const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
65
66
const int TiXmlBase::utf8ByteTable[256] = 
67
{
68
        //        0        1        2        3        4        5        6        7        8        9        a        b        c        d        e        f
69
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x00
70
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x10
71
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x20
72
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x30
73
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x40
74
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x50
75
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x60
76
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x70        End of ASCII range
77
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x80 0x80 to 0xc1 invalid
78
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0x90 
79
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0xa0 
80
                1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        // 0xb0 
81
                1,        1,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        // 0xc0 0xc2 to 0xdf 2 byte
82
                2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        2,        // 0xd0
83
                3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        3,        // 0xe0 0xe0 to 0xef 3 byte
84
                4,        4,        4,        4,        4,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1,        1        // 0xf0 0xf0 to 0xf4 4 byte, 0xf5 and higher invalid
85
};
86
87
88
void TiXmlBase::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
89
{
90
        const unsigned long BYTE_MASK = 0xBF;
91
        const unsigned long BYTE_MARK = 0x80;
92
        const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
93
94
        if (input < 0x80) 
95
                *length = 1;
96
        else if ( input < 0x800 )
97
                *length = 2;
98
        else if ( input < 0x10000 )
99
                *length = 3;
100
        else if ( input < 0x200000 )
101
                *length = 4;
102
        else
103
                { *length = 0; return; }        // This code won't covert this correctly anyway.
104
105
        output += *length;
106
107
        // Scary scary fall throughs.
108
        switch (*length) 
109
        {
110
                case 4:
111
                        --output; 
112
                        *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
113
                        input >>= 6;
114
                case 3:
115
                        --output; 
116
                        *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
117
                        input >>= 6;
118
                case 2:
119
                        --output; 
120
                        *output = (char)((input | BYTE_MARK) & BYTE_MASK); 
121
                        input >>= 6;
122
                case 1:
123
                        --output; 
124
                        *output = (char)(input | FIRST_BYTE_MARK[*length]);
125
        }
126
}
127
128
129
/*static*/ int TiXmlBase::IsAlpha( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
130
{
131
        // This will only work for low-ascii, everything else is assumed to be a valid
132
        // letter. I'm not sure this is the best approach, but it is quite tricky trying
133
        // to figure out alhabetical vs. not across encoding. So take a very 
134
        // conservative approach.
135
136
//        if ( encoding == TIXML_ENCODING_UTF8 )
137
//        {
138
                if ( anyByte < 127 )
139
                        return isalpha( anyByte );
140
                else
141
                        return 1;        // What else to do? The unicode set is huge...get the english ones right.
142
//        }
143
//        else
144
//        {
145
//                return isalpha( anyByte );
146
//        }
147
}
148
149
150
/*static*/ int TiXmlBase::IsAlphaNum( unsigned char anyByte, TiXmlEncoding /*encoding*/ )
151
{
152
        // This will only work for low-ascii, everything else is assumed to be a valid
153
        // letter. I'm not sure this is the best approach, but it is quite tricky trying
154
        // to figure out alhabetical vs. not across encoding. So take a very 
155
        // conservative approach.
156
157
//        if ( encoding == TIXML_ENCODING_UTF8 )
158
//        {
159
                if ( anyByte < 127 )
160
                        return isalnum( anyByte );
161
                else
162
                        return 1;        // What else to do? The unicode set is huge...get the english ones right.
163
//        }
164
//        else
165
//        {
166
//                return isalnum( anyByte );
167
//        }
168
}
169
170
171
class TiXmlParsingData
172
{
173
        friend class TiXmlDocument;
174
  public:
175
        void Stamp( const char* now, TiXmlEncoding encoding );
176
177
        const TiXmlCursor& Cursor()        { return cursor; }
178
179
  private:
180
        // Only used by the document!
181
        TiXmlParsingData( const char* start, int _tabsize, int row, int col )
182
        {
183
                assert( start );
184
                stamp = start;
185
                tabsize = _tabsize;
186
                cursor.row = row;
187
                cursor.col = col;
188
        }
189
190
        TiXmlCursor                cursor;
191
        const char*                stamp;
192
        int                                tabsize;
193
};
194
195
196
void TiXmlParsingData::Stamp( const char* now, TiXmlEncoding encoding )
197
{
198
        assert( now );
199
200
        // Do nothing if the tabsize is 0.
201
        if ( tabsize < 1 )
202
        {
203
                return;
204
        }
205
206
        // Get the current row, column.
207
        int row = cursor.row;
208
        int col = cursor.col;
209
        const char* p = stamp;
210
        assert( p );
211
212
        while ( p < now )
213
        {
214
                // Treat p as unsigned, so we have a happy compiler.
215
                const unsigned char* pU = (const unsigned char*)p;
216
217
                // Code contributed by Fletcher Dunn: (modified by lee)
218
                switch (*pU) {
219
                        case 0:
220
                                // We *should* never get here, but in case we do, don't
221
                                // advance past the terminating null character, ever
222
                                return;
223
224
                        case '\r':
225
                                // bump down to the next line
226
                                ++row;
227
                                col = 0;                                
228
                                // Eat the character
229
                                ++p;
230
231
                                // Check for \r\n sequence, and treat this as a single character
232
                                if (*p == '\n') {
233
                                        ++p;
234
                                }
235
                                break;
236
237
                        case '\n':
238
                                // bump down to the next line
239
                                ++row;
240
                                col = 0;
241
242
                                // Eat the character
243
                                ++p;
244
245
                                // Check for \n\r sequence, and treat this as a single
246
                                // character.  (Yes, this bizarre thing does occur still
247
                                // on some arcane platforms...)
248
                                if (*p == '\r') {
249
                                        ++p;
250
                                }
251
                                break;
252
253
                        case '\t':
254
                                // Eat the character
255
                                ++p;
256
257
                                // Skip to next tab stop
258
                                col = (col / tabsize + 1) * tabsize;
259
                                break;
260
261
                        case TIXML_UTF_LEAD_0:
262
                                if ( encoding == TIXML_ENCODING_UTF8 )
263
                                {
264
                                        if ( *(p+1) && *(p+2) )
265
                                        {
266
                                                // In these cases, don't advance the column. These are
267
                                                // 0-width spaces.
268
                                                if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
269
                                                        p += 3;        
270
                                                else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
271
                                                        p += 3;        
272
                                                else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
273
                                                        p += 3;        
274
                                                else
275
                                                        { p +=3; ++col; }        // A normal character.
276
                                        }
277
                                }
278
                                else
279
                                {
280
                                        ++p;
281
                                        ++col;
282
                                }
283
                                break;
284
285
                        default:
286
                                if ( encoding == TIXML_ENCODING_UTF8 )
287
                                {
288
                                        // Eat the 1 to 4 byte utf8 character.
289
                                        int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
290
                                        if ( step == 0 )
291
                                                step = 1;                // Error case from bad encoding, but handle gracefully.
292
                                        p += step;
293
294
                                        // Just advance one column, of course.
295
                                        ++col;
296
                                }
297
                                else
298
                                {
299
                                        ++p;
300
                                        ++col;
301
                                }
302
                                break;
303
                }
304
        }
305
        cursor.row = row;
306
        cursor.col = col;
307
        assert( cursor.row >= -1 );
308
        assert( cursor.col >= -1 );
309
        stamp = p;
310
        assert( stamp );
311
}
312
313
314
const char* TiXmlBase::SkipWhiteSpace( const char* p, TiXmlEncoding encoding )
315
{
316
        if ( !p || !*p )
317
        {
318
                return 0;
319
        }
320
        if ( encoding == TIXML_ENCODING_UTF8 )
321
        {
322
                while ( *p )
323
                {
324
                        const unsigned char* pU = (const unsigned char*)p;
325
                        
326
                        // Skip the stupid Microsoft UTF-8 Byte order marks
327
                        if (        *(pU+0)==TIXML_UTF_LEAD_0
328
                                 && *(pU+1)==TIXML_UTF_LEAD_1 
329
                                 && *(pU+2)==TIXML_UTF_LEAD_2 )
330
                        {
331
                                p += 3;
332
                                continue;
333
                        }
334
                        else if(*(pU+0)==TIXML_UTF_LEAD_0
335
                                 && *(pU+1)==0xbfU
336
                                 && *(pU+2)==0xbeU )
337
                        {
338
                                p += 3;
339
                                continue;
340
                        }
341
                        else if(*(pU+0)==TIXML_UTF_LEAD_0
342
                                 && *(pU+1)==0xbfU
343
                                 && *(pU+2)==0xbfU )
344
                        {
345
                                p += 3;
346
                                continue;
347
                        }
348
349
                        if ( IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )                // Still using old rules for white space.
350
                                ++p;
351
                        else
352
                                break;
353
                }
354
        }
355
        else
356
        {
357
                while ( *p && IsWhiteSpace( *p ) || *p == '\n' || *p =='\r' )
358
                        ++p;
359
        }
360
361
        return p;
362
}
363
364
#ifdef TIXML_USE_STL
365
/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
366
{
367
        for( ;; )
368
        {
369
                if ( !in->good() ) return false;
370
371
                int c = in->peek();
372
                // At this scope, we can't get to a document. So fail silently.
373
                if ( !IsWhiteSpace( c ) || c <= 0 )
374
                        return true;
375
376
                *tag += (char) in->get();
377
        }
378
}
379
380
/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
381
{
382
        //assert( character > 0 && character < 128 );        // else it won't work in utf-8
383
        while ( in->good() )
384
        {
385
                int c = in->peek();
386
                if ( c == character )
387
                        return true;
388
                if ( c <= 0 )                // Silent failure: can't get document at this scope
389
                        return false;
390
391
                in->get();
392
                *tag += (char) c;
393
        }
394
        return false;
395
}
396
#endif
397
398
// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
399
// "assign" optimization removes over 10% of the execution time.
400
//
401
const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
402
{
403
        // Oddly, not supported on some comilers,
404
        //name->clear();
405
        // So use this:
406
        *name = "";
407
        assert( p );
408
409
        // Names start with letters or underscores.
410
        // Of course, in unicode, tinyxml has no idea what a letter *is*. The
411
        // algorithm is generous.
412
        //
413
        // After that, they can be letters, underscores, numbers,
414
        // hyphens, or colons. (Colons are valid ony for namespaces,
415
        // but tinyxml can't tell namespaces from names.)
416
        if (    p && *p 
417
                 && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
418
        {
419
                const char* start = p;
420
                while(                p && *p
421
                                &&        (                IsAlphaNum( (unsigned char ) *p, encoding ) 
422
                                                 || *p == '_'
423
                                                 || *p == '-'
424
                                                 || *p == '.'
425
                                                 || *p == ':' ) )
426
                {
427
                        //(*name) += *p; // expensive
428
                        ++p;
429
                }
430
                if ( p-start > 0 ) {
431
                        name->assign( start, p-start );
432
                }
433
                return p;
434
        }
435
        return 0;
436
}
437
438
const char* TiXmlBase::GetEntity( const char* p, char* value, int* length, TiXmlEncoding encoding )
439
{
440
        // Presume an entity, and pull it out.
441
    TIXML_STRING ent;
442
        int i;
443
        *length = 0;
444
445
        if ( *(p+1) && *(p+1) == '#' && *(p+2) )
446
        {
447
                unsigned long ucs = 0;
448
                ptrdiff_t delta = 0;
449
                unsigned mult = 1;
450
451
                if ( *(p+2) == 'x' )
452
                {
453
                        // Hexadecimal.
454
                        if ( !*(p+3) ) return 0;
455
456
                        const char* q = p+3;
457
                        q = strchr( q, ';' );
458
459
                        if ( !q || !*q ) return 0;
460
461
                        delta = q-p;
462
                        --q;
463
464
                        while ( *q != 'x' )
465
                        {
466
                                if ( *q >= '0' && *q <= '9' )
467
                                        ucs += mult * (*q - '0');
468
                                else if ( *q >= 'a' && *q <= 'f' )
469
                                        ucs += mult * (*q - 'a' + 10);
470
                                else if ( *q >= 'A' && *q <= 'F' )
471
                                        ucs += mult * (*q - 'A' + 10 );
472
                                else 
473
                                        return 0;
474
                                mult *= 16;
475
                                --q;
476
                        }
477
                }
478
                else
479
                {
480
                        // Decimal.
481
                        if ( !*(p+2) ) return 0;
482
483
                        const char* q = p+2;
484
                        q = strchr( q, ';' );
485
486
                        if ( !q || !*q ) return 0;
487
488
                        delta = q-p;
489
                        --q;
490
491
                        while ( *q != '#' )
492
                        {
493
                                if ( *q >= '0' && *q <= '9' )
494
                                        ucs += mult * (*q - '0');
495
                                else 
496
                                        return 0;
497
                                mult *= 10;
498
                                --q;
499
                        }
500
                }
501
                if ( encoding == TIXML_ENCODING_UTF8 )
502
                {
503
                        // convert the UCS to UTF-8
504
                        ConvertUTF32ToUTF8( ucs, value, length );
505
                }
506
                else
507
                {
508
                        *value = (char)ucs;
509
                        *length = 1;
510
                }
511
                return p + delta + 1;
512
        }
513
514
        // Now try to match it.
515
        for( i=0; i<NUM_ENTITY; ++i )
516
        {
517
                if ( strncmp( entity[i].str, p, entity[i].strLength ) == 0 )
518
                {
519
                        assert( strlen( entity[i].str ) == entity[i].strLength );
520
                        *value = entity[i].chr;
521
                        *length = 1;
522
                        return ( p + entity[i].strLength );
523
                }
524
        }
525
526
        // So it wasn't an entity, its unrecognized, or something like that.
527
        *value = *p;        // Don't put back the last one, since we return it!
528
        //*length = 1;        // Leave unrecognized entities - this doesn't really work.
529
                                        // Just writes strange XML.
530
        return p+1;
531
}
532
533
534
bool TiXmlBase::StringEqual( const char* p,
535
                                                         const char* tag,
536
                                                         bool ignoreCase,
537
                                                         TiXmlEncoding encoding )
538
{
539
        assert( p );
540
        assert( tag );
541
        if ( !p || !*p )
542
        {
543
                assert( 0 );
544
                return false;
545
        }
546
547
        const char* q = p;
548
549
        if ( ignoreCase )
550
        {
551
                while ( *q && *tag && ToLower( *q, encoding ) == ToLower( *tag, encoding ) )
552
                {
553
                        ++q;
554
                        ++tag;
555
                }
556
557
                if ( *tag == 0 )
558
                        return true;
559
        }
560
        else
561
        {
562
                while ( *q && *tag && *q == *tag )
563
                {
564
                        ++q;
565
                        ++tag;
566
                }
567
568
                if ( *tag == 0 )                // Have we found the end of the tag, and everything equal?
569
                        return true;
570
        }
571
        return false;
572
}
573
574
const char* TiXmlBase::ReadText(        const char* p, 
575
                                                                        TIXML_STRING * text, 
576
                                                                        bool trimWhiteSpace, 
577
                                                                        const char* endTag, 
578
                                                                        bool caseInsensitive,
579
                                                                        TiXmlEncoding encoding )
580
{
581
    *text = "";
582
        if (    !trimWhiteSpace                        // certain tags always keep whitespace
583
                 || !condenseWhiteSpace )        // if true, whitespace is always kept
584
        {
585
                // Keep all the white space.
586
                while (           p && *p
587
                                && !StringEqual( p, endTag, caseInsensitive, encoding )
588
                          )
589
                {
590
                        int len;
591
                        char cArr[4] = { 0, 0, 0, 0 };
592
                        p = GetChar( p, cArr, &len, encoding );
593
                        text->append( cArr, len );
594
                }
595
        }
596
        else
597
        {
598
                bool whitespace = false;
599
600
                // Remove leading white space:
601
                p = SkipWhiteSpace( p, encoding );
602
                while (           p && *p
603
                                && !StringEqual( p, endTag, caseInsensitive, encoding ) )
604
                {
605
                        if ( *p == '\r' || *p == '\n' )
606
                        {
607
                                whitespace = true;
608
                                ++p;
609
                        }
610
                        else if ( IsWhiteSpace( *p ) )
611
                        {
612
                                whitespace = true;
613
                                ++p;
614
                        }
615
                        else
616
                        {
617
                                // If we've found whitespace, add it before the
618
                                // new character. Any whitespace just becomes a space.
619
                                if ( whitespace )
620
                                {
621
                                        (*text) += ' ';
622
                                        whitespace = false;
623
                                }
624
                                int len;
625
                                char cArr[4] = { 0, 0, 0, 0 };
626
                                p = GetChar( p, cArr, &len, encoding );
627
                                if ( len == 1 )
628
                                        (*text) += cArr[0];        // more efficient
629
                                else
630
                                        text->append( cArr, len );
631
                        }
632
                }
633
        }
634
        if ( p ) 
635
                p += strlen( endTag );
636
        return p;
637
}
638
639
#ifdef TIXML_USE_STL
640
641
void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
642
{
643
        // The basic issue with a document is that we don't know what we're
644
        // streaming. Read something presumed to be a tag (and hope), then
645
        // identify it, and call the appropriate stream method on the tag.
646
        //
647
        // This "pre-streaming" will never read the closing ">" so the
648
        // sub-tag can orient itself.
649
650
        if ( !StreamTo( in, '<', tag ) ) 
651
        {
652
                SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
653
                return;
654
        }
655
656
        while ( in->good() )
657
        {
658
                int tagIndex = (int) tag->length();
659
                while ( in->good() && in->peek() != '>' )
660
                {
661
                        int c = in->get();
662
                        if ( c <= 0 )
663
                        {
664
                                SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
665
                                break;
666
                        }
667
                        (*tag) += (char) c;
668
                }
669
670
                if ( in->good() )
671
                {
672
                        // We now have something we presume to be a node of 
673
                        // some sort. Identify it, and call the node to
674
                        // continue streaming.
675
                        TiXmlNode* node = Identify( tag->c_str() + tagIndex, TIXML_DEFAULT_ENCODING );
676
677
                        if ( node )
678
                        {
679
                                node->StreamIn( in, tag );
680
                                bool isElement = node->ToElement() != 0;
681
                                delete node;
682
                                node = 0;
683
684
                                // If this is the root element, we're done. Parsing will be
685
                                // done by the >> operator.
686
                                if ( isElement )
687
                                {
688
                                        return;
689
                                }
690
                        }
691
                        else
692
                        {
693
                                SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
694
                                return;
695
                        }
696
                }
697
        }
698
        // We should have returned sooner.
699
        SetError( TIXML_ERROR, 0, 0, TIXML_ENCODING_UNKNOWN );
700
}
701
702
#endif
703
704
const char* TiXmlDocument::Parse( const char* p, TiXmlParsingData* prevData, TiXmlEncoding encoding )
705
{
706
        ClearError();
707
708
        // Parse away, at the document level. Since a document
709
        // contains nothing but other tags, most of what happens
710
        // here is skipping white space.
711
        if ( !p || !*p )
712
        {
713
                SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
714
                return 0;
715
        }
716
717
        // Note that, for a document, this needs to come
718
        // before the while space skip, so that parsing
719
        // starts from the pointer we are given.
720
        location.Clear();
721
        if ( prevData )
722
        {
723
                location.row = prevData->cursor.row;
724
                location.col = prevData->cursor.col;
725
        }
726
        else
727
        {
728
                location.row = 0;
729
                location.col = 0;
730
        }
731
        TiXmlParsingData data( p, TabSize(), location.row, location.col );
732
        location = data.Cursor();
733
734
        if ( encoding == TIXML_ENCODING_UNKNOWN )
735
        {
736
                // Check for the Microsoft UTF-8 lead bytes.
737
                const unsigned char* pU = (const unsigned char*)p;
738
                if (        *(pU+0) && *(pU+0) == TIXML_UTF_LEAD_0
739
                         && *(pU+1) && *(pU+1) == TIXML_UTF_LEAD_1
740
                         && *(pU+2) && *(pU+2) == TIXML_UTF_LEAD_2 )
741
                {
742
                        encoding = TIXML_ENCODING_UTF8;
743
                        useMicrosoftBOM = true;
744
                }
745
        }
746
747
    p = SkipWhiteSpace( p, encoding );
748
        if ( !p )
749
        {
750
                SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
751
                return 0;
752
        }
753
754
        while ( p && *p )
755
        {
756
                TiXmlNode* node = Identify( p, encoding );
757
                if ( node )
758
                {
759
                        p = node->Parse( p, &data, encoding );
760
                        LinkEndChild( node );
761
                }
762
                else
763
                {
764
                        break;
765
                }
766
767
                // Did we get encoding info?
768
                if (    encoding == TIXML_ENCODING_UNKNOWN
769
                         && node->ToDeclaration() )
770
                {
771
                        TiXmlDeclaration* dec = node->ToDeclaration();
772
                        const char* enc = dec->Encoding();
773
                        assert( enc );
774
775
                        if ( *enc == 0 )
776
                                encoding = TIXML_ENCODING_UTF8;
777
                        else if ( StringEqual( enc, "UTF-8", true, TIXML_ENCODING_UNKNOWN ) )
778
                                encoding = TIXML_ENCODING_UTF8;
779
                        else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
780
                                encoding = TIXML_ENCODING_UTF8;        // incorrect, but be nice
781
                        else 
782
                                encoding = TIXML_ENCODING_LEGACY;
783
                }
784
785
                p = SkipWhiteSpace( p, encoding );
786
        }
787
788
        // Was this empty?
789
        if ( !firstChild ) {
790
                SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, encoding );
791
                return 0;
792
        }
793
794
        // All is well.
795
        return p;
796
}
797
798
void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
799
{        
800
        // The first error in a chain is more accurate - don't set again!
801
        if ( error )
802
                return;
803
804
        assert( err > 0 && err < TIXML_ERROR_STRING_COUNT );
805
        error   = true;
806
        errorId = err;
807
        errorDesc = errorString[ errorId ];
808
809
        errorLocation.Clear();
810
        if ( pError && data )
811
        {
812
                data->Stamp( pError, encoding );
813
                errorLocation = data->Cursor();
814
        }
815
}
816
817
818
TiXmlNode* TiXmlNode::Identify( const char* p, TiXmlEncoding encoding )
819
{
820
        TiXmlNode* returnNode = 0;
821
822
        p = SkipWhiteSpace( p, encoding );
823
        if( !p || !*p || *p != '<' )
824
        {
825
                return 0;
826
        }
827
828
        TiXmlDocument* doc = GetDocument();
829
        p = SkipWhiteSpace( p, encoding );
830
831
        if ( !p || !*p )
832
        {
833
                return 0;
834
        }
835
836
        // What is this thing? 
837
        // - Elements start with a letter or underscore, but xml is reserved.
838
        // - Comments: <!--
839
        // - Decleration: <?xml
840
        // - Everthing else is unknown to tinyxml.
841
        //
842
843
        const char* xmlHeader = { "<?xml" };
844
        const char* commentHeader = { "<!--" };
845
        const char* dtdHeader = { "<!" };
846
        const char* cdataHeader = { "<![CDATA[" };
847
848
        if ( StringEqual( p, xmlHeader, true, encoding ) )
849
        {
850
                #ifdef DEBUG_PARSER
851
                        TIXML_LOG( "XML parsing Declaration\n" );
852
                #endif
853
                returnNode = new TiXmlDeclaration();
854
        }
855
        else if ( StringEqual( p, commentHeader, false, encoding ) )
856
        {
857
                #ifdef DEBUG_PARSER
858
                        TIXML_LOG( "XML parsing Comment\n" );
859
                #endif
860
                returnNode = new TiXmlComment();
861
        }
862
        else if ( StringEqual( p, cdataHeader, false, encoding ) )
863
        {
864
                #ifdef DEBUG_PARSER
865
                        TIXML_LOG( "XML parsing CDATA\n" );
866
                #endif
867
                TiXmlText* text = new TiXmlText( "" );
868
                text->SetCDATA( true );
869
                returnNode = text;
870
        }
871
        else if ( StringEqual( p, dtdHeader, false, encoding ) )
872
        {
873
                #ifdef DEBUG_PARSER
874
                        TIXML_LOG( "XML parsing Unknown(1)\n" );
875
                #endif
876
                returnNode = new TiXmlUnknown();
877
        }
878
        else if (    IsAlpha( *(p+1), encoding )
879
                          || *(p+1) == '_' )
880
        {
881
                #ifdef DEBUG_PARSER
882
                        TIXML_LOG( "XML parsing Element\n" );
883
                #endif
884
                returnNode = new TiXmlElement( "" );
885
        }
886
        else
887
        {
888
                #ifdef DEBUG_PARSER
889
                        TIXML_LOG( "XML parsing Unknown(2)\n" );
890
                #endif
891
                returnNode = new TiXmlUnknown();
892
        }
893
894
        if ( returnNode )
895
        {
896
                // Set the parent, so it can report errors
897
                returnNode->parent = this;
898
        }
899
        else
900
        {
901
                if ( doc )
902
                        doc->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, TIXML_ENCODING_UNKNOWN );
903
        }
904
        return returnNode;
905
}
906
907
#ifdef TIXML_USE_STL
908
909
void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
910
{
911
        // We're called with some amount of pre-parsing. That is, some of "this"
912
        // element is in "tag". Go ahead and stream to the closing ">"
913
        while( in->good() )
914
        {
915
                int c = in->get();
916
                if ( c <= 0 )
917
                {
918
                        TiXmlDocument* document = GetDocument();
919
                        if ( document )
920
                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
921
                        return;
922
                }
923
                (*tag) += (char) c ;
924
                
925
                if ( c == '>' )
926
                        break;
927
        }
928
929
        if ( tag->length() < 3 ) return;
930
931
        // Okay...if we are a "/>" tag, then we're done. We've read a complete tag.
932
        // If not, identify and stream.
933
934
        if (    tag->at( tag->length() - 1 ) == '>' 
935
                 && tag->at( tag->length() - 2 ) == '/' )
936
        {
937
                // All good!
938
                return;
939
        }
940
        else if ( tag->at( tag->length() - 1 ) == '>' )
941
        {
942
                // There is more. Could be:
943
                //                text
944
                //                cdata text (which looks like another node)
945
                //                closing tag
946
                //                another node.
947
                for ( ;; )
948
                {
949
                        StreamWhiteSpace( in, tag );
950
951
                        // Do we have text?
952
                        if ( in->good() && in->peek() != '<' ) 
953
                        {
954
                                // Yep, text.
955
                                TiXmlText text( "" );
956
                                text.StreamIn( in, tag );
957
958
                                // What follows text is a closing tag or another node.
959
                                // Go around again and figure it out.
960
                                continue;
961
                        }
962
963
                        // We now have either a closing tag...or another node.
964
                        // We should be at a "<", regardless.
965
                        if ( !in->good() ) return;
966
                        assert( in->peek() == '<' );
967
                        int tagIndex = (int) tag->length();
968
969
                        bool closingTag = false;
970
                        bool firstCharFound = false;
971
972
                        for( ;; )
973
                        {
974
                                if ( !in->good() )
975
                                        return;
976
977
                                int c = in->peek();
978
                                if ( c <= 0 )
979
                                {
980
                                        TiXmlDocument* document = GetDocument();
981
                                        if ( document )
982
                                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
983
                                        return;
984
                                }
985
                                
986
                                if ( c == '>' )
987
                                        break;
988
989
                                *tag += (char) c;
990
                                in->get();
991
992
                                // Early out if we find the CDATA id.
993
                                if ( c == '[' && tag->size() >= 9 )
994
                                {
995
                                        size_t len = tag->size();
996
                                        const char* start = tag->c_str() + len - 9;
997
                                        if ( strcmp( start, "<![CDATA[" ) == 0 ) {
998
                                                assert( !closingTag );
999
                                                break;
1000
                                        }
1001
                                }
1002
1003
                                if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
1004
                                {
1005
                                        firstCharFound = true;
1006
                                        if ( c == '/' )
1007
                                                closingTag = true;
1008
                                }
1009
                        }
1010
                        // If it was a closing tag, then read in the closing '>' to clean up the input stream.
1011
                        // If it was not, the streaming will be done by the tag.
1012
                        if ( closingTag )
1013
                        {
1014
                                if ( !in->good() )
1015
                                        return;
1016
1017
                                int c = in->get();
1018
                                if ( c <= 0 )
1019
                                {
1020
                                        TiXmlDocument* document = GetDocument();
1021
                                        if ( document )
1022
                                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1023
                                        return;
1024
                                }
1025
                                assert( c == '>' );
1026
                                *tag += (char) c;
1027
1028
                                // We are done, once we've found our closing tag.
1029
                                return;
1030
                        }
1031
                        else
1032
                        {
1033
                                // If not a closing tag, id it, and stream.
1034
                                const char* tagloc = tag->c_str() + tagIndex;
1035
                                TiXmlNode* node = Identify( tagloc, TIXML_DEFAULT_ENCODING );
1036
                                if ( !node )
1037
                                        return;
1038
                                node->StreamIn( in, tag );
1039
                                delete node;
1040
                                node = 0;
1041
1042
                                // No return: go around from the beginning: text, closing tag, or node.
1043
                        }
1044
                }
1045
        }
1046
}
1047
#endif
1048
1049
const char* TiXmlElement::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1050
{
1051
        p = SkipWhiteSpace( p, encoding );
1052
        TiXmlDocument* document = GetDocument();
1053
1054
        if ( !p || !*p )
1055
        {
1056
                if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, 0, 0, encoding );
1057
                return 0;
1058
        }
1059
1060
        if ( data )
1061
        {
1062
                data->Stamp( p, encoding );
1063
                location = data->Cursor();
1064
        }
1065
1066
        if ( *p != '<' )
1067
        {
1068
                if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, p, data, encoding );
1069
                return 0;
1070
        }
1071
1072
        p = SkipWhiteSpace( p+1, encoding );
1073
1074
        // Read the name.
1075
        const char* pErr = p;
1076
1077
    p = ReadName( p, &value, encoding );
1078
        if ( !p || !*p )
1079
        {
1080
                if ( document )        document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME, pErr, data, encoding );
1081
                return 0;
1082
        }
1083
1084
    TIXML_STRING endTag ("</");
1085
        endTag += value;
1086
        endTag += ">";
1087
1088
        // Check for and read attributes. Also look for an empty
1089
        // tag or an end tag.
1090
        while ( p && *p )
1091
        {
1092
                pErr = p;
1093
                p = SkipWhiteSpace( p, encoding );
1094
                if ( !p || !*p )
1095
                {
1096
                        if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1097
                        return 0;
1098
                }
1099
                if ( *p == '/' )
1100
                {
1101
                        ++p;
1102
                        // Empty tag.
1103
                        if ( *p  != '>' )
1104
                        {
1105
                                if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );                
1106
                                return 0;
1107
                        }
1108
                        return (p+1);
1109
                }
1110
                else if ( *p == '>' )
1111
                {
1112
                        // Done with attributes (if there were any.)
1113
                        // Read the value -- which can include other
1114
                        // elements -- read the end tag, and return.
1115
                        ++p;
1116
                        p = ReadValue( p, data, encoding );                // Note this is an Element method, and will set the error if one happens.
1117
                        if ( !p || !*p ) {
1118
                                // We were looking for the end tag, but found nothing.
1119
                                // Fix for [ 1663758 ] Failure to report error on bad XML
1120
                                if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1121
                                return 0;
1122
                        }
1123
1124
                        // We should find the end tag now
1125
                        if ( StringEqual( p, endTag.c_str(), false, encoding ) )
1126
                        {
1127
                                p += endTag.length();
1128
                                return p;
1129
                        }
1130
                        else
1131
                        {
1132
                                if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
1133
                                return 0;
1134
                        }
1135
                }
1136
                else
1137
                {
1138
                        // Try to read an attribute:
1139
                        TiXmlAttribute* attrib = new TiXmlAttribute();
1140
                        if ( !attrib )
1141
                        {
1142
                                if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, pErr, data, encoding );
1143
                                return 0;
1144
                        }
1145
1146
                        attrib->SetDocument( document );
1147
                        pErr = p;
1148
                        p = attrib->Parse( p, data, encoding );
1149
1150
                        if ( !p || !*p )
1151
                        {
1152
                                if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT, pErr, data, encoding );
1153
                                delete attrib;
1154
                                return 0;
1155
                        }
1156
1157
                        // Handle the strange case of double attributes:
1158
                        #ifdef TIXML_USE_STL
1159
                        TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
1160
                        #else
1161
                        TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
1162
                        #endif
1163
                        if ( node )
1164
                        {
1165
                                node->SetValue( attrib->Value() );
1166
                                delete attrib;
1167
                                return 0;
1168
                        }
1169
1170
                        attributeSet.Add( attrib );
1171
                }
1172
        }
1173
        return p;
1174
}
1175
1176
1177
const char* TiXmlElement::ReadValue( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1178
{
1179
        TiXmlDocument* document = GetDocument();
1180
1181
        // Read in text and elements in any order.
1182
        const char* pWithWhiteSpace = p;
1183
        p = SkipWhiteSpace( p, encoding );
1184
1185
        while ( p && *p )
1186
        {
1187
                if ( *p != '<' )
1188
                {
1189
                        // Take what we have, make a text element.
1190
                        TiXmlText* textNode = new TiXmlText( "" );
1191
1192
                        if ( !textNode )
1193
                        {
1194
                                if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY, 0, 0, encoding );
1195
                                    return 0;
1196
                        }
1197
1198
                        if ( TiXmlBase::IsWhiteSpaceCondensed() )
1199
                        {
1200
                                p = textNode->Parse( p, data, encoding );
1201
                        }
1202
                        else
1203
                        {
1204
                                // Special case: we want to keep the white space
1205
                                // so that leading spaces aren't removed.
1206
                                p = textNode->Parse( pWithWhiteSpace, data, encoding );
1207
                        }
1208
1209
                        if ( !textNode->Blank() )
1210
                                LinkEndChild( textNode );
1211
                        else
1212
                                delete textNode;
1213
                } 
1214
                else 
1215
                {
1216
                        // We hit a '<'
1217
                        // Have we hit a new element or an end tag? This could also be
1218
                        // a TiXmlText in the "CDATA" style.
1219
                        if ( StringEqual( p, "</", false, encoding ) )
1220
                        {
1221
                                return p;
1222
                        }
1223
                        else
1224
                        {
1225
                                TiXmlNode* node = Identify( p, encoding );
1226
                                if ( node )
1227
                                {
1228
                                        p = node->Parse( p, data, encoding );
1229
                                        LinkEndChild( node );
1230
                                }                                
1231
                                else
1232
                                {
1233
                                        return 0;
1234
                                }
1235
                        }
1236
                }
1237
                pWithWhiteSpace = p;
1238
                p = SkipWhiteSpace( p, encoding );
1239
        }
1240
1241
        if ( !p )
1242
        {
1243
                if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
1244
        }        
1245
        return p;
1246
}
1247
1248
1249
#ifdef TIXML_USE_STL
1250
void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
1251
{
1252
        while ( in->good() )
1253
        {
1254
                int c = in->get();        
1255
                if ( c <= 0 )
1256
                {
1257
                        TiXmlDocument* document = GetDocument();
1258
                        if ( document )
1259
                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1260
                        return;
1261
                }
1262
                (*tag) += (char) c;
1263
1264
                if ( c == '>' )
1265
                {
1266
                        // All is well.
1267
                        return;                
1268
                }
1269
        }
1270
}
1271
#endif
1272
1273
1274
const char* TiXmlUnknown::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1275
{
1276
        TiXmlDocument* document = GetDocument();
1277
        p = SkipWhiteSpace( p, encoding );
1278
1279
        if ( data )
1280
        {
1281
                data->Stamp( p, encoding );
1282
                location = data->Cursor();
1283
        }
1284
        if ( !p || !*p || *p != '<' )
1285
        {
1286
                if ( document ) document->SetError( TIXML_ERROR_PARSING_UNKNOWN, p, data, encoding );
1287
                return 0;
1288
        }
1289
        ++p;
1290
    value = "";
1291
1292
        while ( p && *p && *p != '>' )
1293
        {
1294
                value += *p;
1295
                ++p;
1296
        }
1297
1298
        if ( !p )
1299
        {
1300
                if ( document )        document->SetError( TIXML_ERROR_PARSING_UNKNOWN, 0, 0, encoding );
1301
        }
1302
        if ( *p == '>' )
1303
                return p+1;
1304
        return p;
1305
}
1306
1307
#ifdef TIXML_USE_STL
1308
void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
1309
{
1310
        while ( in->good() )
1311
        {
1312
                int c = in->get();        
1313
                if ( c <= 0 )
1314
                {
1315
                        TiXmlDocument* document = GetDocument();
1316
                        if ( document )
1317
                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1318
                        return;
1319
                }
1320
1321
                (*tag) += (char) c;
1322
1323
                if ( c == '>' 
1324
                         && tag->at( tag->length() - 2 ) == '-'
1325
                         && tag->at( tag->length() - 3 ) == '-' )
1326
                {
1327
                        // All is well.
1328
                        return;                
1329
                }
1330
        }
1331
}
1332
#endif
1333
1334
1335
const char* TiXmlComment::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1336
{
1337
        TiXmlDocument* document = GetDocument();
1338
        value = "";
1339
1340
        p = SkipWhiteSpace( p, encoding );
1341
1342
        if ( data )
1343
        {
1344
                data->Stamp( p, encoding );
1345
                location = data->Cursor();
1346
        }
1347
        const char* startTag = "<!--";
1348
        const char* endTag   = "-->";
1349
1350
        if ( !StringEqual( p, startTag, false, encoding ) )
1351
        {
1352
                document->SetError( TIXML_ERROR_PARSING_COMMENT, p, data, encoding );
1353
                return 0;
1354
        }
1355
        p += strlen( startTag );
1356
1357
        // [ 1475201 ] TinyXML parses entities in comments
1358
        // Oops - ReadText doesn't work, because we don't want to parse the entities.
1359
        // p = ReadText( p, &value, false, endTag, false, encoding );
1360
        //
1361
        // from the XML spec:
1362
        /*
1363
         [Definition: Comments may appear anywhere in a document outside other markup; in addition, 
1364
                      they may appear within the document type declaration at places allowed by the grammar. 
1365
                                  They are not part of the document's character data; an XML processor MAY, but need not, 
1366
                                  make it possible for an application to retrieve the text of comments. For compatibility, 
1367
                                  the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity 
1368
                                  references MUST NOT be recognized within comments.
1369
1370
                                  An example of a comment:
1371
1372
                                  <!-- declarations for <head> & <body> -->
1373
        */
1374
1375
    value = "";
1376
        // Keep all the white space.
1377
        while (        p && *p && !StringEqual( p, endTag, false, encoding ) )
1378
        {
1379
                value.append( p, 1 );
1380
                ++p;
1381
        }
1382
        if ( p ) 
1383
                p += strlen( endTag );
1384
1385
        return p;
1386
}
1387
1388
1389
const char* TiXmlAttribute::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1390
{
1391
        p = SkipWhiteSpace( p, encoding );
1392
        if ( !p || !*p ) return 0;
1393
1394
//        int tabsize = 4;
1395
//        if ( document )
1396
//                tabsize = document->TabSize();
1397
1398
        if ( data )
1399
        {
1400
                data->Stamp( p, encoding );
1401
                location = data->Cursor();
1402
        }
1403
        // Read the name, the '=' and the value.
1404
        const char* pErr = p;
1405
        p = ReadName( p, &name, encoding );
1406
        if ( !p || !*p )
1407
        {
1408
                if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, pErr, data, encoding );
1409
                return 0;
1410
        }
1411
        p = SkipWhiteSpace( p, encoding );
1412
        if ( !p || !*p || *p != '=' )
1413
        {
1414
                if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1415
                return 0;
1416
        }
1417
1418
        ++p;        // skip '='
1419
        p = SkipWhiteSpace( p, encoding );
1420
        if ( !p || !*p )
1421
        {
1422
                if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1423
                return 0;
1424
        }
1425
        
1426
        const char* end;
1427
        const char SINGLE_QUOTE = '\'';
1428
        const char DOUBLE_QUOTE = '\"';
1429
1430
        if ( *p == SINGLE_QUOTE )
1431
        {
1432
                ++p;
1433
                end = "\'";                // single quote in string
1434
                p = ReadText( p, &value, false, end, false, encoding );
1435
        }
1436
        else if ( *p == DOUBLE_QUOTE )
1437
        {
1438
                ++p;
1439
                end = "\"";                // double quote in string
1440
                p = ReadText( p, &value, false, end, false, encoding );
1441
        }
1442
        else
1443
        {
1444
                // All attribute values should be in single or double quotes.
1445
                // But this is such a common error that the parser will try
1446
                // its best, even without them.
1447
                value = "";
1448
                while (    p && *p                                                                                        // existence
1449
                                && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'        // whitespace
1450
                                && *p != '/' && *p != '>' )                                                        // tag end
1451
                {
1452
                        if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
1453
                                // [ 1451649 ] Attribute values with trailing quotes not handled correctly
1454
                                // We did not have an opening quote but seem to have a 
1455
                                // closing one. Give up and throw an error.
1456
                                if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
1457
                                return 0;
1458
                        }
1459
                        value += *p;
1460
                        ++p;
1461
                }
1462
        }
1463
        return p;
1464
}
1465
1466
#ifdef TIXML_USE_STL
1467
void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
1468
{
1469
        while ( in->good() )
1470
        {
1471
                int c = in->peek();        
1472
                if ( !cdata && (c == '<' ) ) 
1473
                {
1474
                        return;
1475
                }
1476
                if ( c <= 0 )
1477
                {
1478
                        TiXmlDocument* document = GetDocument();
1479
                        if ( document )
1480
                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1481
                        return;
1482
                }
1483
1484
                (*tag) += (char) c;
1485
                in->get();        // "commits" the peek made above
1486
1487
                if ( cdata && c == '>' && tag->size() >= 3 ) {
1488
                        size_t len = tag->size();
1489
                        if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
1490
                                // terminator of cdata.
1491
                                return;
1492
                        }
1493
                }    
1494
        }
1495
}
1496
#endif
1497
1498
const char* TiXmlText::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding encoding )
1499
{
1500
        value = "";
1501
        TiXmlDocument* document = GetDocument();
1502
1503
        if ( data )
1504
        {
1505
                data->Stamp( p, encoding );
1506
                location = data->Cursor();
1507
        }
1508
1509
        const char* const startTag = "<![CDATA[";
1510
        const char* const endTag   = "]]>";
1511
1512
        if ( cdata || StringEqual( p, startTag, false, encoding ) )
1513
        {
1514
                cdata = true;
1515
1516
                if ( !StringEqual( p, startTag, false, encoding ) )
1517
                {
1518
                        document->SetError( TIXML_ERROR_PARSING_CDATA, p, data, encoding );
1519
                        return 0;
1520
                }
1521
                p += strlen( startTag );
1522
1523
                // Keep all the white space, ignore the encoding, etc.
1524
                while (           p && *p
1525
                                && !StringEqual( p, endTag, false, encoding )
1526
                          )
1527
                {
1528
                        value += *p;
1529
                        ++p;
1530
                }
1531
1532
                TIXML_STRING dummy; 
1533
                p = ReadText( p, &dummy, false, endTag, false, encoding );
1534
                return p;
1535
        }
1536
        else
1537
        {
1538
                bool ignoreWhite = true;
1539
1540
                const char* end = "<";
1541
                p = ReadText( p, &value, ignoreWhite, end, false, encoding );
1542
                if ( p )
1543
                        return p-1;        // don't truncate the '<'
1544
                return 0;
1545
        }
1546
}
1547
1548
#ifdef TIXML_USE_STL
1549
void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
1550
{
1551
        while ( in->good() )
1552
        {
1553
                int c = in->get();
1554
                if ( c <= 0 )
1555
                {
1556
                        TiXmlDocument* document = GetDocument();
1557
                        if ( document )
1558
                                document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
1559
                        return;
1560
                }
1561
                (*tag) += (char) c;
1562
1563
                if ( c == '>' )
1564
                {
1565
                        // All is well.
1566
                        return;
1567
                }
1568
        }
1569
}
1570
#endif
1571
1572
const char* TiXmlDeclaration::Parse( const char* p, TiXmlParsingData* data, TiXmlEncoding _encoding )
1573
{
1574
        p = SkipWhiteSpace( p, _encoding );
1575
        // Find the beginning, find the end, and look for
1576
        // the stuff in-between.
1577
        TiXmlDocument* document = GetDocument();
1578
        if ( !p || !*p || !StringEqual( p, "<?xml", true, _encoding ) )
1579
        {
1580
                if ( document ) document->SetError( TIXML_ERROR_PARSING_DECLARATION, 0, 0, _encoding );
1581
                return 0;
1582
        }
1583
        if ( data )
1584
        {
1585
                data->Stamp( p, _encoding );
1586
                location = data->Cursor();
1587
        }
1588
        p += 5;
1589
1590
        version = "";
1591
        encoding = "";
1592
        standalone = "";
1593
1594
        while ( p && *p )
1595
        {
1596
                if ( *p == '>' )
1597
                {
1598
                        ++p;
1599
                        return p;
1600
                }
1601
1602
                p = SkipWhiteSpace( p, _encoding );
1603
                if ( StringEqual( p, "version", true, _encoding ) )
1604
                {
1605
                        TiXmlAttribute attrib;
1606
                        p = attrib.Parse( p, data, _encoding );                
1607
                        version = attrib.Value();
1608
                }
1609
                else if ( StringEqual( p, "encoding", true, _encoding ) )
1610
                {
1611
                        TiXmlAttribute attrib;
1612
                        p = attrib.Parse( p, data, _encoding );                
1613
                        encoding = attrib.Value();
1614
                }
1615
                else if ( StringEqual( p, "standalone", true, _encoding ) )
1616
                {
1617
                        TiXmlAttribute attrib;
1618
                        p = attrib.Parse( p, data, _encoding );                
1619
                        standalone = attrib.Value();
1620
                }
1621
                else
1622
                {
1623
                        // Read over whatever it is.
1624
                        while( p && *p && *p != '>' && !IsWhiteSpace( *p ) )
1625
                                ++p;
1626
                }
1627
        }
1628
        return 0;
1629
}
1630
1631
bool TiXmlText::Blank() const
1632
{
1633
        for ( unsigned i=0; i<value.length(); i++ )
1634
                if ( !IsWhiteSpace( value[i] ) )
1635
                        return false;
1636
        return true;
1637
}
1638
1639
1640
bool TiXmlDocument::ReadFromMemory( const char* pBuf, size_t sz, TiXmlEncoding encoding)
1641
{
1642
    // Delete the existing data:
1643
    Clear();
1644
    location.Clear();
1645
1646
    // Get the file size, so we can pre-allocate the string. HUGE speed impact.
1647
    long length = (long) sz;
1648
1649
    // Strange case, but good to handle up front.
1650
    if ( length == 0 )
1651
    {
1652
        SetError( TIXML_ERROR_DOCUMENT_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
1653
        return false;
1654
    }
1655
1656
    // If we have a file, assume it is all one big XML file, and read it in.
1657
    // The document parser may decide the document ends sooner than the entire file, however.
1658
    TIXML_STRING data;
1659
    data.reserve( length );
1660
1661
1662
    char* buf = new char[ length+1 ];
1663
    memset(buf,0,length+1);
1664
1665
    memcpy(buf, pBuf, length);
1666
1667
    const char* lastPos = buf;
1668
    const char* p = buf;
1669
1670
    buf[length] = 0;
1671
    while( *p ) {
1672
        assert( p < (buf+length) );
1673
        if ( *p == 0xa ) {
1674
            // Newline character. No special rules for this. Append all the characters
1675
            // since the last string, and include the newline.
1676
            data.append( lastPos, (p-lastPos+1) );  // append, include the newline
1677
            ++p;                                    // move past the newline
1678
            lastPos = p;                            // and point to the new buffer (may be 0)
1679
            assert( p <= (buf+length) );
1680
        }
1681
        else if ( *p == 0xd ) {
1682
            // Carriage return. Append what we have so far, then
1683
            // handle moving forward in the buffer.
1684
            if ( (p-lastPos) > 0 ) {
1685
                data.append( lastPos, p-lastPos );  // do not add the CR
1686
            }
1687
            data += (char)0xa;                      // a proper newline
1688
1689
            if ( *(p+1) == 0xa ) {
1690
                // Carriage return - new line sequence
1691
                p += 2;
1692
                lastPos = p;
1693
                assert( p <= (buf+length) );
1694
            }
1695
            else {
1696
                // it was followed by something else...that is presumably characters again.
1697
                ++p;
1698
                lastPos = p;
1699
                assert( p <= (buf+length) );
1700
            }
1701
        }
1702
        else {
1703
            ++p;
1704
        }
1705
    }
1706
    // Handle any left over characters.
1707
    if ( p-lastPos ) {
1708
        data.append( lastPos, p-lastPos );
1709
    }
1710
    delete [] buf;
1711
    buf = 0;
1712
1713
    Parse( data.c_str(), 0, encoding );
1714
1715
    if (  Error() )
1716
                        return false;
1717
    else
1718
        return true;
1719
}