Scippy

SCIP

Solving Constraint Integer Programs

xmlparse.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2017 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file xmldef.h
17  * @brief declarations for XML parsing
18  * @author Thorsten Koch
19  * @author Marc Pfetsch
20  *
21  * If SPEC_LIKE_SPACE_HANDLING is not defined, all LF,CR will be changed into spaces and from a
22  * sequence of spaces only one will be used.
23  *
24  * @todo Implement possibility to avoid the construction of parsing information for certain tags
25  * (and their children). For solution files this would avoid parsing the constraints section.
26  */
27 
28 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
29 
30 #include <blockmemshell/memory.h>
31 
32 #include "xml.h"
33 #include "xmldef.h"
34 
35 
36 #include <sys/types.h>
37 #ifdef WITH_ZLIB
38 #if defined(_WIN32) || defined(_WIN64)
39 #define R_OK _A_RDONLY
40 #define access _access
41 #include <io.h>
42 #else
43 #include <unistd.h>
44 #endif
45 #endif
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <assert.h>
49 #include <ctype.h>
50 #include <string.h>
51 
52 
53 #define NAME_EXT_SIZE 128
54 #define ATTR_EXT_SIZE 4096
55 #define DATA_EXT_SIZE 4096
56 #define LINE_BUF_SIZE 8192
57 
58 #define xmlError(a, b) xmlErrmsg(a, b, FALSE, __FILE__, __LINE__)
59 
60 
61 /* forward declarations */
62 typedef struct parse_stack_struct PSTACK;
63 typedef struct parse_pos_struct PPOS;
64 
65 /** state of the parser */
67 {
73 };
74 typedef enum parse_state_enum PSTATE;
75 
76 /** Stack as a (singly) linked list. The top element is the current node. */
78 {
81 };
82 
83 /** Store the current position in the file and the state of the parser. */
85 {
86  const char* filename;
88  char buf[LINE_BUF_SIZE];
89  int pos;
90  int lineno;
91  int nextsym;
92  int lastsym;
95 };
96 
97 
98 /** output error message with corresponding line and position */
99 static void xmlErrmsg(
100  PPOS* ppos,
101  const char* msg,
102  XML_Bool msg_only,
103  const char* file,
104  int line
105  )
106 {
107 #ifndef NDEBUG
108  int ret;
109  assert( ppos != NULL );
110 
111  if ( ! msg_only )
112  {
113  ret = fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
114  assert(ret >= 0);
115 
116  ret = fprintf(stderr, "%s", ppos->buf);
117  assert(ret >= 0);
118 
119  if ( strchr(ppos->buf, '\n') == NULL )
120  {
121  int retc;
122 
123  retc = fputc('\n', stderr);
124  assert(retc != EOF);
125  }
126 
127  ret = fprintf(stderr, "%*s\n", ppos->pos, "^");
128  assert(ret >= 0);
129  }
130  ret = fprintf(stderr, "%s\n\n", msg);
131  assert(ret >= 0);
132 
133 #else
134 
135  if ( ! msg_only )
136  {
137  (void) fprintf(stderr, "%s(%d) Error in file %s line %d\n", file, line, ppos->filename, ppos->lineno);
138 
139  (void) fprintf(stderr, "%s", ppos->buf);
140 
141  if ( strchr(ppos->buf, '\n') == NULL )
142  {
143  (void) fputc('\n', stderr);
144  }
145 
146  (void) fprintf(stderr, "%*s\n", ppos->pos, "^");
147  }
148  (void) fprintf(stderr, "%s\n\n", msg);
149 #endif
150 }
151 
152 
153 /** Push new element on the parse stack.
154  *
155  * TRUE if it worked, FAILURE otherwise.
156  */
157 static
159  PPOS* ppos,
160  XML_NODE* node
161  )
162 {
163  PSTACK* p;
164 
165  assert(ppos != NULL);
166  assert(node != NULL);
167 
168  debugMessage("Pushing %s\n", node->name);
169 
171  assert(p != NULL);
172 
173  p->node = node;
174  p->next = ppos->top;
175  ppos->top = p;
176 
177  return TRUE;
178 }
179 
180 /** returns top element on stack (which has to be present) */
182  const PPOS* ppos
183  )
184 {
185  assert(ppos != NULL);
186  assert(ppos->top != NULL);
187 
188  return ppos->top->node;
189 }
190 
191 /** remove top element from stack and deletes it
192  *
193  * TRUE if ok, FALSE otherwise
194  */
195 static
197  PPOS* ppos /**< input stream position */
198  )
199 {
200  PSTACK* p;
201  XML_Bool result;
202 
203  assert(ppos != NULL);
204 
205  if ( ppos->top == NULL )
206  {
207  xmlError(ppos, "Stack underflow");
208  result = FALSE;
209  }
210  else
211  {
212  result = TRUE;
213  p = ppos->top;
214  ppos->top = p->next;
215 
216  debugMessage("Poping %s\n", p->node->name);
217  BMSfreeMemory(&p);
218  }
219  return result;
220 }
221 
222 /** remove complete stack */
223 static
225  PPOS* ppos
226  )
227 {
228  assert(ppos != NULL);
229 
230  while ( ppos->top != NULL )
231  (void) popPstack(ppos);
232 }
233 
234 /** Returns the next character from the input buffer and fills the buffer if it is empty (similar to fgetc()). */
235 static
236 int mygetc(
237  PPOS* ppos
238  )
239 {
240  assert(ppos != NULL);
241  assert(ppos->fp != NULL);
242  assert(ppos->pos < LINE_BUF_SIZE);
243 
244  if ( ppos->buf[ppos->pos] == '\0' )
245  {
246 #ifdef SCIP_DISABLED_CODE
247  /* the low level function gzread/fread used below seem to be faster */
248  if ( NULL == FGETS(ppos->buf, sizeof(ppos->buf), ppos->fp) )
249  return EOF;
250 #else
251  size_t len = (size_t) FREAD(ppos->buf, sizeof(ppos->buf) - 1, ppos->fp); /*lint !e571 !e747*/
252 
253  if( len == 0 || len > sizeof(ppos->buf) - 1 )
254  return EOF;
255 
256  ppos->buf[len] = '\0';
257 #endif
258  ppos->pos = 0;
259  }
260  return (unsigned char)ppos->buf[ppos->pos++];
261 }
262 
263 
264 #ifdef SPEC_LIKE_SPACE_HANDLING
265 /** Read input from fp_in.
266  *
267  * If there is a LF, CR, CR/LF, or LF/CR it returns exactly on LF. Also counts the number of
268  * characters.
269  */
270 static
271 int getsymbol(
272  PPOS* ppos
273  )
274 {
275  int c;
276 
277  assert(ppos != NULL);
278 
279  if ( ppos->nextsym == 0 )
280  c = mygetc(ppos);
281  else
282  {
283  c = ppos->nextsym;
284  ppos->nextsym = 0;
285  }
286  assert(ppos->nextsym == 0);
287 
288  if (((c == '\n') && (ppos->lastsym == '\r')) || ((c == '\r') && (ppos->lastsym == '\n')))
289  c = mygetc(ppos);
290 
291  ppos->lastsym = c;
292 
293  if ( c == '\r' )
294  c = '\n';
295 
296  if ( c == '\n' )
297  ++ppos->lineno;
298 
299  return c;
300 }
301 #else
302 /** Read input from fp_in (variant).
303  *
304  * Here we convert all LF or CR into SPACE and return maximally one SPACE after the other.
305  *
306  * @note This function counts lines differently. On systems that have only one '\\r' as line feed
307  * (MAC) it does not count correctly.
308  */
309 static
311  PPOS* ppos
312  )
313 {
314  int c;
315 
316  assert(ppos != NULL);
317 
318  do
319  {
320  if ( ppos->nextsym == 0 )
321  c = mygetc(ppos);
322  else
323  {
324  c = ppos->nextsym;
325  ppos->nextsym = 0;
326  }
327  assert(ppos->nextsym == 0);
328 
329  if ( c == '\n' )
330  ++ppos->lineno;
331 
332  if ((c == '\n') || (c == '\r'))
333  c = ' ';
334  } while((c == ' ') && (ppos->lastsym == c));
335 
336  ppos->lastsym = c;
337 
338  debugMessage("[%c]\n", c);
339 
340  return c;
341 }
342 #endif
343 
344 /** Reinserts a character into the input stream */
345 static
347  PPOS* ppos,
348  int c
349  )
350 {
351  assert(ppos != NULL);
352  assert(ppos->nextsym == 0);
353 
354  ppos->nextsym = c;
355 }
356 
357 /** Skip all spaces and return the next non-space character or EOF */
358 static
360  PPOS* ppos
361  )
362 {
363  int c;
364 
365  assert(ppos != NULL);
366 
367  do
368  {
369  c = getsymbol(ppos);
370  }
371  while(isspace(c));
372 
373  return c;
374 }
375 
376 /** Get name of a TAG or attribute from the input stream.
377  *
378  * Either it returns a pointer to allocated memory which contains the name or it returns NULL if
379  * there is some error.
380  */
381 static
382 char* getName(
383  PPOS* ppos
384  )
385 {
386  char* name = NULL;
387  size_t size = 0;
388  size_t len = 0;
389  int c;
390 
391  assert(ppos != NULL);
392 
393  c = getsymbol(ppos);
394 
395  if ( ! isalpha(c) && (c != '_') && (c != ':') )
396  {
397  xmlError(ppos, "Name starting with illegal charater");
398  return NULL;
399  }
400 
401  /* The following is wrong: Here almost all characters that we casted to unicode are feasible */
402  while ( isalnum(c) || (c == '_') || (c == ':') || (c == '.') || (c == '-') )
403  {
404  if ( len + 1 >= size )
405  {
406  size += NAME_EXT_SIZE;
407 
408  if ( name == NULL )
409  {
410  ALLOC_ABORT( BMSallocMemoryArray(&name, size) );
411  }
412  else
413  {
414  ALLOC_ABORT( BMSreallocMemoryArray(&name, size) );
415  }
416  }
417  assert(name != NULL);
418  assert(size > len);
419 
420  name[len++] = (char)c;
421 
422  c = getsymbol(ppos);
423  }
424  if ( c != EOF )
425  ungetsymbol(ppos, c);
426 
427  assert(name != NULL);
428 
429  if ( len == 0 )
430  {
431  BMSfreeMemoryArray(&name);
432  name = NULL;
433  }
434  else
435  name[len] = '\0';
436 
437  return name;
438 }
439 
440 /** Read the value of an attribute from the input stream.
441  *
442  * The value has to be between two " or ' (the other character is then valid as well). The function
443  * returns a pointer to allocated memory containing the value or it returns NULL in case of an
444  * error.
445  */
446 static
448  PPOS* ppos
449  )
450 {
451  char* attr = NULL;
452  int c;
453  int stop;
454  size_t len = 0;
455  size_t size = 0;
456 
457  assert(ppos != NULL);
458 
459  /* The following is not allowed according to the specification (the value has to be directly
460  * after the equation sign). */
461  c = skipSpace(ppos);
462 
463  if ( (c != '"') && (c != '\'') )
464  {
465  xmlError(ppos, "Atribute value does not start with \" or \'");
466  return NULL;
467  }
468  stop = c;
469 
470  for(;;)
471  {
472  if ( len == size )
473  {
474  size += ATTR_EXT_SIZE;
475 
476  if ( attr == NULL )
477  {
478  ALLOC_ABORT( BMSallocMemoryArray(&attr, size) );
479  }
480  else
481  {
482  ALLOC_ABORT( BMSreallocMemoryArray(&attr, size) );
483  }
484  }
485  assert(attr != NULL);
486  assert(size > len);
487 
488  c = getsymbol(ppos);
489 
490  if ( (c == stop) || (c == EOF) )
491  break;
492 
493  attr[len++] = (char)c;
494  }
495 
496  if ( c != EOF )
497  attr[len] = '\0';
498  else
499  {
500  BMSfreeMemoryArray(&attr);
501  attr = NULL;
502  }
503  return attr;
504 }
505 
506 /** Skip comment
507  *
508  * Return FALSE if an error occurs.
509  */
510 static
512  PPOS* ppos
513  )
514 {
515  XML_Bool result = TRUE;
516  int c;
517  int state = 0;
518 
519  assert(ppos != NULL);
520 
521  for(;;)
522  {
523  c = getsymbol(ppos);
524 
525  if ( c == EOF )
526  break;
527 
528  if ( (c == '>') && (state >= 2) )
529  break;
530 
531  state = (c == '-') ? state + 1 : 0;
532  }
533  if ( c == EOF )
534  {
535  xmlError(ppos, "Unexpected EOF in comment");
536  result = FALSE;
537  }
538  return result;
539 }
540 
541 /** Handles a CDATA section.
542  *
543  * Returns a pointer to allocated memory containing the data of this section or NULL in case of an
544  * error.
545  */
546 static
547 char* doCdata(
548  PPOS* ppos
549  )
550 {
551  char* data = NULL;
552  size_t size = 0;
553  size_t len = 0;
554  int state = 0;
555  int c;
556 
557  assert(ppos != NULL);
558 
559  for(;;)
560  {
561  c = getsymbol(ppos);
562 
563  if ( c == EOF )
564  break;
565 
566  if ( c == ']' )
567  state++;
568  else
569  if ( (c == '>') && (state >= 2) )
570  break;
571  else
572  state = 0;
573 
574  if ( len == size )
575  {
576  size += DATA_EXT_SIZE;
577 
578  if ( data == NULL )
579  {
580  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
581  }
582  else
583  {
584  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
585  }
586  }
587  assert(data != NULL);
588  assert(size > len);
589 
590  data[len++] = (char)c;
591  }
592  assert(data != NULL);
593 
594  /*lint --e{527}*/
595  if ( c != EOF )
596  {
597  assert(len >= 2);
598  assert(data != NULL);
599 
600  data[len - 2] = '\0'; /*lint !e413*/
601  }
602  else
603  {
604  BMSfreeMemoryArray(&data);
605  data = NULL;
606  xmlError(ppos, "Unexpected EOF in CDATA");
607  }
608  return data;
609 }
610 
611 /** Handle processing instructions (skipping) */
612 static
613 void handlePi(
614  PPOS* ppos
615  )
616 {
617  int c;
618 
619  assert(ppos != NULL);
620  assert(ppos->state == XML_STATE_BEFORE);
621 
622  do
623  {
624  c = getsymbol(ppos);
625  }
626  while ( (c != EOF) && (c != '>') );
627 
628  if ( c != EOF )
629  ppos->state = XML_STATE_PCDATA;
630  else
631  {
632  xmlError(ppos, "Unexpected EOF in PI");
633  ppos->state = XML_STATE_ERROR;
634  }
635 }
636 
637 /** Handles declarations that start with a <!.
638  *
639  * This includes comments. Does currenlty not work very well, because of DTDs.
640  */
641 static
643  PPOS* ppos
644  )
645 {
646  enum XmlSection
647  {
648  IS_COMMENT,
649  IS_ATTLIST,
650  IS_DOCTYPE,
651  IS_ELEMENT,
652  IS_ENTITY,
653  IS_NOTATION,
654  IS_CDATA
655  };
656  typedef enum XmlSection XMLSECTION;
657 
658  static struct
659  {
660  const char* name;
661  XMLSECTION what;
662  } key[] =
663  {
664  { "--", IS_COMMENT },
665  { "ATTLIST", IS_ATTLIST },
666  { "DOCTYPE", IS_DOCTYPE },
667  { "ELEMENT", IS_ELEMENT },
668  { "ENTITY", IS_ENTITY },
669  { "NOTATION", IS_NOTATION },
670  { "[CDATA[", IS_CDATA }
671  };
672  XML_NODE* node;
673  char* data;
674  int c;
675  int k = 0;
676  int beg = 0;
677  int end;
678 
679  assert(ppos != NULL);
680  assert(ppos->state == XML_STATE_BEFORE);
681 
682  end = (int) (sizeof(key) / sizeof(key[0])) - 1;
683  do
684  {
685  c = getsymbol(ppos);
686 
687  for(; (beg <= end) && (c != key[beg].name[k]); beg++)
688  ;
689  for(; (end >= beg) && (c != key[end].name[k]); end--)
690  ;
691  k++;
692  } while(beg < end);
693 
694  if ( beg != end )
695  {
696  xmlError(ppos, "Unknown declaration");
697 
698  while ( (c != EOF) && (c != '>') )
699  c = getsymbol(ppos);
700  }
701  else
702  {
703  assert(beg == end);
704  assert(beg < (int)(sizeof(key) / sizeof(*key)));
705 
706  switch(key[beg].what)
707  {
708  case IS_COMMENT :
709  if ( ! doComment(ppos) )
710  ppos->state = XML_STATE_ERROR;
711  break;
712  case IS_CDATA :
713  if ( (data = doCdata(ppos)) == NULL )
714  ppos->state = XML_STATE_ERROR;
715  else
716  {
717  if ( NULL == (node = xmlNewNode("#CDATA", ppos->lineno)) )
718  {
719  xmlError(ppos, "Can't create new node");
720  ppos->state = XML_STATE_ERROR;
721  }
722  else
723  {
724  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
725  BMSfreeMemoryArray(&data);
726  xmlAppendChild(topPstack(ppos), node);
727  }
728  }
729  break;
730  case IS_ATTLIST :
731  case IS_ELEMENT :
732  case IS_NOTATION :
733  case IS_ENTITY :
734  case IS_DOCTYPE :
735  break;
736  default :
737  abort();
738  }
739  }
740 }
741 
742 /** Handle end tag */
743 static
745  PPOS* ppos
746  )
747 {
748  char* name;
749  int c;
750 
751  assert(ppos != NULL);
752 
753  if ( (name = getName(ppos)) == NULL )
754  xmlError(ppos, "Missing name in endtag");
755  else
756  {
757  c = skipSpace(ppos);
758 
759  if ( c != '>' )
760  {
761  xmlError(ppos, "Missing '>' in endtag");
762  ppos->state = XML_STATE_ERROR;
763  }
764  else
765  {
766  if ( strcmp(name, topPstack(ppos)->name) )
767  {
768  xmlError(ppos, "Name of endtag does not match starttag");
769  ppos->state = XML_STATE_ERROR;
770  }
771  else
772  {
773  if ( popPstack(ppos) )
774  ppos->state = XML_STATE_PCDATA;
775  else
776  ppos->state = XML_STATE_ERROR;
777  }
778  }
779 
780  BMSfreeMemoryArray(&name);
781  }
782 }
783 
784 /** Handle start tag */
785 static
787  PPOS* ppos
788  )
789 {
790  XML_NODE* node;
791  char* name;
792 
793  assert(ppos != NULL);
794 
795  name = getName(ppos);
796  if ( name == NULL )
797  {
798  xmlError(ppos, "Missing name in tagstart");
799  ppos->state = XML_STATE_ERROR;
800  }
801  else
802  {
803  node = xmlNewNode(name, ppos->lineno);
804  if ( node == NULL )
805  {
806  xmlError(ppos, "Can't create new node");
807  ppos->state = XML_STATE_ERROR;
808  }
809  else
810  {
811  xmlAppendChild(topPstack(ppos), node);
812 
813  if ( pushPstack(ppos, node) )
814  ppos->state = XML_STATE_IN_TAG;
815  else
816  ppos->state = XML_STATE_ERROR;
817  }
818  BMSfreeMemoryArray(&name);
819  }
820 }
821 
822 /** Checks for next tag */
823 static
825  PPOS* ppos /**< input stream position */
826  )
827 {
828  int c;
829 
830  assert(ppos != NULL);
831  assert(ppos->state == XML_STATE_BEFORE);
832 
833  c = skipSpace(ppos);
834 
835  if ( c != '<' )
836  {
837  xmlError(ppos, "Expecting '<'");
838  ppos->state = XML_STATE_ERROR;
839  }
840  else
841  {
842  c = getsymbol(ppos);
843 
844  switch(c)
845  {
846  case EOF :
847  xmlError(ppos, "Unexpected EOF");
848  ppos->state = XML_STATE_ERROR;
849  break;
850  case '!' :
851  handleDecl(ppos);
852  break;
853  case '?' :
854  handlePi(ppos);
855  break;
856  case '/' :
857  handleEndtag(ppos);
858  break;
859  default :
860  ungetsymbol(ppos, c);
861  handleStarttag(ppos);
862  break;
863  }
864  }
865 }
866 
867 /** Process tag */
868 static
870  PPOS* ppos /**< input stream position */
871  )
872 {
873  XML_ATTR* attr;
874  int c;
875  XML_Bool empty = FALSE;
876  char* name;
877  char* value;
878 
879  assert(ppos != NULL);
880  assert(ppos->state == XML_STATE_IN_TAG);
881 
882  c = skipSpace(ppos);
883 
884  if ( (c == '/') || (c == '>') || (c == EOF) )
885  {
886  if ( c == '/' )
887  {
888  empty = TRUE;
889  c = getsymbol(ppos);
890  }
891 
892  if ( c == EOF )
893  {
894  xmlError(ppos, "Unexpected EOF while in a tag");
895  ppos->state = XML_STATE_ERROR;
896  }
897 
898  if ( c == '>' )
899  {
900  ppos->state = XML_STATE_PCDATA;
901 
902  if (empty && ! popPstack(ppos))
903  ppos->state = XML_STATE_ERROR;
904  }
905  else
906  {
907  xmlError(ppos, "Expected tag end marker '>'");
908  ppos->state = XML_STATE_ERROR;
909  }
910  }
911  else
912  {
913  ungetsymbol(ppos, c);
914 
915  name = getName(ppos);
916  if ( name == NULL )
917  {
918  xmlError(ppos, "No name for attribute");
919  ppos->state = XML_STATE_ERROR;
920  }
921  else
922  {
923  c = skipSpace(ppos);
924 
925  if ( (c != '=') || ((value = getAttrval(ppos)) == NULL) )
926  {
927  xmlError(ppos, "Missing attribute value");
928  ppos->state = XML_STATE_ERROR;
929  BMSfreeMemoryArray(&name);
930  }
931  else
932  {
933  attr = xmlNewAttr(name, value);
934  if ( attr == NULL )
935  {
936  xmlError(ppos, "Can't create new attribute");
937  ppos->state = XML_STATE_ERROR;
938  }
939  else
940  {
941  xmlAddAttr(topPstack(ppos), attr);
942  }
943  BMSfreeMemoryArray(&name);
944  BMSfreeMemoryArray(&value);
945  }
946  }
947  }
948 }
949 
950 /* Handles PCDATA */
951 static
953  PPOS* ppos /**< input stream position */
954  )
955 {
956  XML_NODE* node;
957  char* data = NULL;
958  size_t size = 0;
959  size_t len = 0;
960  int c;
961 
962  assert(ppos != NULL);
963  assert(ppos->state == XML_STATE_PCDATA);
964 
965 #ifndef SPEC_LIKE_SPACE_HANDLING
966  c = skipSpace(ppos);
967  if ( c != EOF )
968  ungetsymbol(ppos, c);
969 #endif
970  c = getsymbol(ppos);
971 
972  while ( (c != EOF) && (c != '<') )
973  {
974  if ( len + 1 >= size ) /* leave space for terminating '\0' */
975  {
976  size += DATA_EXT_SIZE;
977 
978  if ( data == NULL )
979  {
980  ALLOC_ABORT( BMSallocMemoryArray(&data, size) );
981  }
982  else
983  {
984  ALLOC_ABORT( BMSreallocMemoryArray(&data, size) );
985  }
986  }
987  assert(data != NULL);
988  assert(size > len + 1);
989 
990  data[len++] = (char)c;
991 
992  c = getsymbol(ppos);
993  }
994  if ( data == NULL )
995  {
996  if ( c == EOF )
997  ppos->state = XML_STATE_EOF;
998  else if ( c == '<' )
999  {
1000  ppos->state = XML_STATE_BEFORE;
1001  ungetsymbol(ppos, c);
1002  }
1003  else
1004  {
1005  ppos->state = XML_STATE_ERROR;
1006  }
1007  }
1008  else
1009  {
1010  assert(len < size);
1011  data[len] = '\0';
1012 
1013  if ( c == EOF )
1014  ppos->state = XML_STATE_ERROR;
1015  else
1016  {
1017  ungetsymbol(ppos, c);
1018 
1019  node = xmlNewNode("#PCDATA", ppos->lineno);
1020  if ( node == NULL )
1021  {
1022  xmlError(ppos, "Can't create new node");
1023  ppos->state = XML_STATE_ERROR;
1024  }
1025  else
1026  {
1027  BMSduplicateMemoryArray(&node->data, data, strlen(data)+1);
1028  xmlAppendChild(topPstack(ppos), node);
1029  ppos->state = XML_STATE_BEFORE;
1030  }
1031  }
1032 
1033  BMSfreeMemoryArray(&data);
1034  }
1035 }
1036 
1037 /** Parse input stream */
1038 static
1040  PPOS* ppos /**< input stream position */
1041  )
1042 {
1043  XML_Bool ok = TRUE;
1044 
1045  while (ok)
1046  {
1047  debugMessage("state=%d\n", ppos->state);
1048 
1049  switch (ppos->state)
1050  {
1051  case XML_STATE_BEFORE :
1052  procBefore(ppos);
1053  break;
1054  case XML_STATE_IN_TAG :
1055  procInTag(ppos);
1056  break;
1057  case XML_STATE_PCDATA :
1058  procPcdata(ppos);
1059  break;
1060  case XML_STATE_EOF :
1061  ok = FALSE;
1062  break;
1063  case XML_STATE_ERROR :
1064  ok = FALSE;
1065  break;
1066  default :
1067  xmlError(ppos, "Internal Error, illegal state");
1068  ok = FALSE;
1069  }
1070  }
1071  return (ppos->state == XML_STATE_EOF);
1072 }
1073 
1074 /** Parse file */
1076  const char* filename /**< XML file name */
1077  )
1078 {
1079  PPOS ppos;
1080  XML_NODE* node = NULL;
1081  XML_ATTR* attr;
1082  XML_Bool result = FALSE;
1083  char* myfilename;
1084  size_t filenamelen;
1085 
1086  /* allocate space and copy filename (possibly modified below) in two steps in order to satisfy valgrind */
1087  assert( filename != NULL );
1088  filenamelen = strlen(filename);
1089  if ( BMSallocMemoryArray(&myfilename, filenamelen + 5) == NULL )
1090  return NULL;
1091  BMScopyMemoryArray(myfilename, filename, filenamelen + 1);
1092 
1093 #ifdef WITH_ZLIB
1094  if ( access(filename, R_OK) != 0 )
1095  {
1096  strcat(myfilename, ".gz");
1097 
1098  /* If .gz also does not work, revert to the old name
1099  * to get a better error message.
1100  */
1101  if ( access(myfilename, R_OK) != 0 )
1102  strcpy(myfilename, filename);
1103  }
1104 #endif
1105  ppos.fp = FOPEN(myfilename, "r");
1106  if ( ppos.fp == NULL )
1107  perror(myfilename);
1108  else
1109  {
1110  ppos.filename = myfilename;
1111  ppos.buf[0] = '\0';
1112  ppos.pos = 0;
1113  ppos.lineno = 1;
1114  ppos.nextsym = 0;
1115  ppos.lastsym = 0;
1116  ppos.state = XML_STATE_BEFORE;
1117  ppos.top = NULL;
1118 
1119  node = xmlNewNode("#ROOT", ppos.lineno);
1120  if ( node == NULL )
1121  {
1122  xmlError(&ppos, "Can't create new node");
1123  }
1124  else
1125  {
1126  attr = xmlNewAttr("filename", myfilename);
1127  if ( attr == NULL )
1128  xmlError(&ppos, "Can't create new attribute");
1129  else
1130  {
1131  xmlAddAttr(node, attr);
1132 
1133  /* push root node on stack and start to process */
1134  if ( pushPstack(&ppos, node) )
1135  {
1136  result = xmlParse(&ppos);
1137 
1138  clearPstack(&ppos);
1139  }
1140  }
1141  }
1142 
1143  if ( ! result && (node != NULL) )
1144  {
1145  xmlErrmsg(&ppos, "Parsing error, processing stopped", TRUE, __FILE__, __LINE__);
1146  xmlFreeNode(node);
1147  node = NULL;
1148  }
1149  if ( FCLOSE(ppos.fp) )
1150  perror(myfilename);
1151  }
1152  BMSfreeMemoryArray(&myfilename);
1153 
1154  return node;
1155 }
1156 
1157 
1158 
1159 
1160 
1161 
1162 /*----------------------------------------------------------------------------------------------*/
1163 
1164 
1165 /** create new node */
1167  const char* name,
1168  int lineno
1169  )
1170 {
1171  XML_NODE* n = NULL;
1172 
1173  assert(name != NULL);
1174 
1175  if ( BMSallocMemory(&n) != NULL )
1176  {
1177  BMSclearMemory(n);
1178  BMSduplicateMemoryArray(&n->name, name, strlen(name)+1);
1179  n->lineno = lineno;
1180  }
1181  return n;
1182 }
1183 
1184 /** create new attribute */
1186  const char* name,
1187  const char* value
1188  )
1189 {
1190  XML_ATTR* a = NULL;
1191 
1192  assert(name != NULL);
1193  assert(value != NULL);
1194 
1195  if ( BMSallocMemory(&a) != NULL )
1196  {
1197  BMSclearMemory(a);
1198  BMSduplicateMemoryArray(&a->name, name, strlen(name)+1);
1199  BMSduplicateMemoryArray(&a->value, value, strlen(value)+1);
1200  }
1201  return a;
1202 }
1203 
1204 /** add attribute */
1206  XML_NODE* n,
1207  XML_ATTR* a
1208  )
1209 {
1210  assert(n != NULL);
1211  assert(a != NULL);
1212 
1213  a->next = n->attrlist;
1214  n->attrlist = a;
1215 }
1216 
1217 /** append child node */
1219  XML_NODE* parent,
1220  XML_NODE* child
1221  )
1222 {
1223  assert(parent != NULL);
1224  assert(child != NULL);
1225 
1226  child->parent = parent;
1227  child->prevsibl = parent->lastchild;
1228  child->nextsibl = NULL;
1229  parent->lastchild = child;
1230 
1231  if ( child->prevsibl != NULL )
1232  child->prevsibl->nextsibl = child;
1233 
1234  if ( parent->firstchild == NULL )
1235  parent->firstchild = child;
1236 }
1237 
1238 /** free attribute */
1239 static
1241  XML_ATTR* attr
1242  )
1243 {
1244  XML_ATTR* a;
1245 
1246  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1247  * and might overflow the heap. */
1248  a = attr;
1249  while (a != NULL)
1250  {
1251  XML_ATTR* b;
1252  b = a->next;
1253 
1254  assert(a->name != NULL);
1255  assert(a->value != NULL);
1256 
1257  BMSfreeMemoryArray(&a->name);
1258  BMSfreeMemoryArray(&a->value);
1259  BMSfreeMemory(&a);
1260  a = b;
1261  }
1262 }
1263 
1264 /** free node */
1266  XML_NODE* node
1267  )
1268 {
1269  XML_NODE* n;
1270 
1271  if ( node == NULL )
1272  return;
1273 
1274  /* Free data from back to front (because free is faster this way). */
1275  /* Note: use an iterative implementation instead of a recursive one; the latter is much slower for large instances
1276  * and might overflow the heap. */
1277  n = node->lastchild;
1278  while ( n != NULL )
1279  {
1280  XML_NODE* m;
1281  m = n->prevsibl;
1282  xmlFreeNode(n);
1283  n = m;
1284  }
1285 
1286  xmlFreeAttr(node->attrlist);
1287 
1288  if ( node->data != NULL )
1289  {
1290  BMSfreeMemoryArray(&node->data);
1291  }
1292  assert(node->name != NULL);
1293 
1294  BMSfreeMemoryArray(&node->name);
1295  BMSfreeMemory(&node);
1296 }
1297 
1298 /** output node */
1300  const XML_NODE* root
1301  )
1302 {
1303  const XML_NODE* n;
1304  const XML_ATTR* a;
1305 
1306  assert(root != NULL);
1307 
1308  for (n = root; n != NULL; n = n->nextsibl)
1309  {
1310  infoMessage("Name: %s\n", n->name);
1311  infoMessage("Line: %d\n", n->lineno);
1312  infoMessage("Data: %s\n", (n->data != NULL) ? n->data : "***");
1313 
1314  for (a = n->attrlist; a != NULL; a = a->next)
1315  infoMessage("Attr: %s = [%s]\n", a->name, a->value);
1316 
1317  if ( n->firstchild != NULL )
1318  {
1319  infoMessage("->\n");
1320  xmlShowNode(n->firstchild);
1321  infoMessage("<-\n");
1322  }
1323  }
1324 }
1325 
1326 /** get attribute value */
1327 const char* xmlGetAttrval(
1328  const XML_NODE* node,
1329  const char* name
1330  )
1331 {
1332  XML_ATTR* a;
1333 
1334  assert(node != NULL);
1335  assert(name != NULL);
1336 
1337  for (a = node->attrlist; a != NULL; a = a->next)
1338  {
1339  if ( ! strcmp(name, a->name) )
1340  break;
1341  }
1342 
1343 #ifdef SCIP_DEBUG
1344  if (a == NULL)
1345  infoMessage("Error: Attribute %s in TAG <%s> not found\n", name, node->name);
1346 #endif
1347 
1348  return (a == NULL) ? NULL : a->value;
1349 }
1350 
1351 /** return first node */
1353  const XML_NODE* node,
1354  const char* name
1355  )
1356 {
1357  const XML_NODE* n;
1358 
1359  assert(node != NULL);
1360  assert(name != NULL);
1361 
1362  for (n = node; n != NULL; n = n->nextsibl)
1363  {
1364  if ( ! strcmp(name, n->name) )
1365  break;
1366  }
1367 
1368  return n;
1369 }
1370 
1371 /** return next node */
1373  const XML_NODE* node,
1374  const char* name
1375  )
1376 {
1377  assert(node != NULL);
1378  assert(name != NULL);
1379 
1380  return (node->nextsibl == NULL) ? NULL : xmlFirstNode(node->nextsibl, name);
1381 }
1382 
1383 /** find node */
1385  const XML_NODE* node,
1386  const char* name
1387  )
1388 {
1389  const XML_NODE* n;
1390  const XML_NODE* r;
1391 
1392  assert(node != NULL);
1393  assert(name != NULL);
1394 
1395  if ( ! strcmp(name, node->name) )
1396  return node;
1397 
1398  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1399  {
1400  r = xmlFindNode(n, name);
1401  if ( r != NULL )
1402  return r;
1403  }
1404 
1405  return NULL;
1406 }
1407 
1408 /** find node with bound on the depth */
1410  const XML_NODE* node, /**< current node - use start node to begin */
1411  const char* name, /**< name of tag to search for */
1412  int depth, /**< current depth - start with 0 for root */
1413  int maxdepth /**< maximal depth */
1414  )
1415 {
1416  const XML_NODE* n;
1417  const XML_NODE* r;
1418 
1419  assert(node != NULL);
1420  assert(name != NULL);
1421 
1422  if ( ! strcmp(name, node->name) )
1423  return node;
1424 
1425  if ( depth < maxdepth )
1426  {
1427  for (n = node->firstchild; n != NULL; n = n->nextsibl)
1428  {
1429  r = xmlFindNodeMaxdepth(n, name, depth+1, maxdepth);
1430  if ( r != NULL )
1431  return r;
1432  }
1433  }
1434 
1435  return NULL;
1436 }
1437 
1438 /** return next sibling */
1440  const XML_NODE* node
1441  )
1442 {
1443  assert(node != NULL);
1444 
1445  return node->nextsibl;
1446 }
1447 
1448 /** return previous sibling */
1450  const XML_NODE* node
1451  )
1452 {
1453  assert(node != NULL);
1454 
1455  return node->prevsibl;
1456 }
1457 
1458 /** return first child */
1460  const XML_NODE* node
1461  )
1462 {
1463  assert(node != NULL);
1464 
1465  return node->firstchild;
1466 }
1467 
1468 /** return last child */
1470  const XML_NODE* node
1471  )
1472 {
1473  assert(node != NULL);
1474 
1475  return node->lastchild;
1476 }
1477 
1478 /** return name of node */
1479 const char* xmlGetName(
1480  const XML_NODE* node
1481  )
1482 {
1483  assert(node != NULL);
1484 
1485  return node->name;
1486 }
1487 
1488 /** get line number */
1490  const XML_NODE* node
1491  )
1492 {
1493  assert(node != NULL);
1494 
1495  return node->lineno;
1496 }
1497 
1498 /** get data */
1499 const char* xmlGetData(
1500  const XML_NODE* node
1501  )
1502 {
1503  assert(node != NULL);
1504 
1505  return node->data;
1506 }
1507 
1508 /** find PCDATA */
1509 const char* xmlFindPcdata(
1510  const XML_NODE* node,
1511  const char* name
1512  )
1513 {
1514  const XML_NODE* n;
1515 
1516  assert(node != NULL);
1517  assert(name != NULL);
1518 
1519  n = xmlFindNode(node, name);
1520  if ( n == NULL )
1521  return NULL;
1522 
1523  if ( ! strcmp(n->firstchild->name, "#PCDATA") )
1524  return n->firstchild->data;
1525 
1526  return NULL;
1527 }
#define XML_Bool
Definition: xmldef.h:33
#define LINE_BUF_SIZE
Definition: xmlparse.c:56
const XML_NODE * xmlFirstNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1352
static void handleDecl(PPOS *ppos)
Definition: xmlparse.c:642
PSTACK * next
Definition: xmlparse.c:80
const char * xmlFindPcdata(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1509
void xmlFreeNode(XML_NODE *node)
Definition: xmlparse.c:1265
#define FREAD(buf, len, fp)
Definition: xmldef.h:54
static int getsymbol(PPOS *ppos)
Definition: xmlparse.c:310
#define ALLOC_ABORT(x)
Definition: tclique_def.h:35
#define FCLOSE(fp)
Definition: xmldef.h:52
struct XML_ATTR_struct XML_ATTR
Definition: xml.h:32
static char * doCdata(PPOS *ppos)
Definition: xmlparse.c:547
static void xmlFreeAttr(XML_ATTR *attr)
Definition: xmlparse.c:1240
#define FALSE
Definition: def.h:64
XML_NODE * xmlNewNode(const char *name, int lineno)
Definition: xmlparse.c:1166
#define TRUE
Definition: def.h:63
const char * xmlGetName(const XML_NODE *node)
Definition: xmlparse.c:1479
#define BMSallocMemoryArray(ptr, num)
Definition: memory.h:82
#define DATA_EXT_SIZE
Definition: xmlparse.c:55
enum parse_state_enum PSTATE
Definition: xmlparse.c:74
#define ATTR_EXT_SIZE
Definition: xmlparse.c:54
XML_ATTR * xmlNewAttr(const char *name, const char *value)
Definition: xmlparse.c:1185
#define BMSfreeMemory(ptr)
Definition: memory.h:104
void xmlShowNode(const XML_NODE *root)
Definition: xmlparse.c:1299
static void procInTag(PPOS *ppos)
Definition: xmlparse.c:869
const char * xmlGetData(const XML_NODE *node)
Definition: xmlparse.c:1499
#define debugMessage
Definition: tclique_def.h:65
static void xmlErrmsg(PPOS *ppos, const char *msg, XML_Bool msg_only, const char *file, int line)
Definition: xmlparse.c:99
static void procPcdata(PPOS *ppos)
Definition: xmlparse.c:952
static int mygetc(PPOS *ppos)
Definition: xmlparse.c:236
PSTACK * top
Definition: xmlparse.c:94
static void ungetsymbol(PPOS *ppos, int c)
Definition: xmlparse.c:346
#define BMSfreeMemoryArray(ptr)
Definition: memory.h:106
parse_state_enum
Definition: xmlparse.c:66
static XML_Bool popPstack(PPOS *ppos)
Definition: xmlparse.c:196
struct XML_NODE_struct XML_NODE
Definition: xml.h:41
static XML_Bool doComment(PPOS *ppos)
Definition: xmlparse.c:511
#define ALLOC_FALSE(x)
Definition: tclique_def.h:47
#define NULL
Definition: lpi_spx1.cpp:137
const XML_NODE * xmlFindNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1384
const char * xmlGetAttrval(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1327
const XML_NODE * xmlNextNode(const XML_NODE *node, const char *name)
Definition: xmlparse.c:1372
static XML_Bool xmlParse(PPOS *ppos)
Definition: xmlparse.c:1039
#define BMSduplicateMemoryArray(ptr, source, num)
Definition: memory.h:102
const XML_NODE * xmlFirstChild(const XML_NODE *node)
Definition: xmlparse.c:1459
XML_NODE * xmlProcess(const char *filename)
Definition: xmlparse.c:1075
static int skipSpace(PPOS *ppos)
Definition: xmlparse.c:359
static XML_NODE * topPstack(const PPOS *ppos)
Definition: xmlparse.c:181
static char * getAttrval(PPOS *ppos)
Definition: xmlparse.c:447
void xmlAddAttr(XML_NODE *n, XML_ATTR *a)
Definition: xmlparse.c:1205
#define FGETS(buf, len, fp)
Definition: xmldef.h:53
#define BMScopyMemoryArray(ptr, source, num)
Definition: memory.h:93
#define FOPEN(file, mode)
Definition: xmldef.h:51
#define infoMessage
Definition: tclique_def.h:71
#define FPTYPE
Definition: xmldef.h:55
#define xmlError(a, b)
Definition: xmlparse.c:58
#define BMSclearMemory(ptr)
Definition: memory.h:88
static char * getName(PPOS *ppos)
Definition: xmlparse.c:382
XML_NODE * node
Definition: xmlparse.c:79
const XML_NODE * xmlNextSibl(const XML_NODE *node)
Definition: xmlparse.c:1439
char buf[LINE_BUF_SIZE]
Definition: xmlparse.c:88
const char * filename
Definition: xmlparse.c:86
static XML_Bool pushPstack(PPOS *ppos, XML_NODE *node)
Definition: xmlparse.c:158
#define BMSallocMemory(ptr)
Definition: memory.h:78
#define BMSreallocMemoryArray(ptr, num)
Definition: memory.h:86
declarations for XML parsing
#define NAME_EXT_SIZE
Definition: xmlparse.c:53
const XML_NODE * xmlPrevSibl(const XML_NODE *node)
Definition: xmlparse.c:1449
const XML_NODE * xmlLastChild(const XML_NODE *node)
Definition: xmlparse.c:1469
const XML_NODE * xmlFindNodeMaxdepth(const XML_NODE *node, const char *name, int depth, int maxdepth)
Definition: xmlparse.c:1409
static void handlePi(PPOS *ppos)
Definition: xmlparse.c:613
int xmlGetLine(const XML_NODE *node)
Definition: xmlparse.c:1489
void xmlAppendChild(XML_NODE *parent, XML_NODE *child)
Definition: xmlparse.c:1218
static void handleStarttag(PPOS *ppos)
Definition: xmlparse.c:786
PSTATE state
Definition: xmlparse.c:93
static void procBefore(PPOS *ppos)
Definition: xmlparse.c:824
static void handleEndtag(PPOS *ppos)
Definition: xmlparse.c:744
static void clearPstack(PPOS *ppos)
Definition: xmlparse.c:224
memory allocation routines
definitions for XML parsing