diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor.h raptor-1.4.21/src/raptor.h --- src/raptor.h 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor.h 2012-02-04 15:29:56.000000000 -0800 @@ -407,6 +407,7 @@ * @RAPTOR_FEATURE_RSS_TRIPLES: Atom/RSS serializer writes extra RDF triples it finds (none, rdf-xml, atom-triples) * @RAPTOR_FEATURE_ATOM_ENTRY_URI: Atom entry URI. If given, generate an Atom Entry Document with the item having the given URI, otherwise generate an Atom Feed Document with any items found. * @RAPTOR_FEATURE_PREFIX_ELEMENTS: Integer. If set, generate Atom/RSS1.0 documents with prefixed elements, otherwise unprefixed. + * @RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: When reading XML, load external entities. * @RAPTOR_FEATURE_LAST: Internal * * Raptor parser, serializer or XML writer features. @@ -448,7 +449,8 @@ RAPTOR_FEATURE_RSS_TRIPLES, RAPTOR_FEATURE_ATOM_ENTRY_URI, RAPTOR_FEATURE_PREFIX_ELEMENTS, - RAPTOR_FEATURE_LAST = RAPTOR_FEATURE_PREFIX_ELEMENTS + RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, + RAPTOR_FEATURE_LAST = RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES } raptor_feature; diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_feature.c raptor-1.4.21/src/raptor_feature.c --- src/raptor_feature.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_feature.c 2012-02-04 15:29:56.000000000 -0800 @@ -93,7 +93,8 @@ { RAPTOR_FEATURE_JSON_EXTRA_DATA , 6, "jsonExtraData", "JSON serializer extra data" }, { RAPTOR_FEATURE_RSS_TRIPLES , 6, "rssTriples", "Atom/RSS serializer writes extra RDF triples" }, { RAPTOR_FEATURE_ATOM_ENTRY_URI , 6, "atomEntryUri", "Atom serializer Entry URI" }, - { RAPTOR_FEATURE_PREFIX_ELEMENTS , 2, "prefixElements", "Atom/RSS serializers write namespace-prefixed elements" } + { RAPTOR_FEATURE_PREFIX_ELEMENTS , 2, "prefixElements", "Atom/RSS serializers write namespace-prefixed elements" }, + { RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, 1, "loadExternalEntities", "Load external XML entities." } }; diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_internal.h raptor-1.4.21/src/raptor_internal.h --- src/raptor_internal.h 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_internal.h 2012-02-04 15:30:55.000000000 -0800 @@ -852,7 +852,6 @@ #ifdef RAPTOR_WWW_LIBCURL #include -#include #include #endif @@ -1060,6 +1059,14 @@ /* sax2 init failed - do not try to do anything with it */ int failed; + + /* call SAX2 handlers if non-0 */ + int enabled; + + /* FEATURE: + * non 0 if XML entities should be loaded + */ + int feature_load_external_entities; }; int raptor_sax2_init(raptor_world* world); diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_libxml.c raptor-1.4.21/src/raptor_libxml.c --- src/raptor_libxml.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_libxml.c 2012-02-22 12:29:38.000000000 -0800 @@ -142,18 +142,120 @@ static xmlParserInputPtr raptor_libxml_resolveEntity(void* user_data, - const xmlChar *publicId, const xmlChar *systemId) { - raptor_sax2* sax2=(raptor_sax2*)user_data; - return libxml2_resolveEntity(sax2->xc, publicId, systemId); + const xmlChar *publicId, const xmlChar *systemId) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr ctxt = sax2->xc; + const unsigned char *uri_string = NULL; + xmlParserInputPtr entity_input; + int load_entity = 0; + + if(!ctxt) + return NULL; + + if(ctxt->input) + uri_string = (const unsigned char *)ctxt->input->filename; + + if(!uri_string) + uri_string = (const unsigned char *)ctxt->directory; + + load_entity = sax2->feature_load_external_entities; + + if(load_entity) { + entity_input = xmlLoadExternalEntity((const char*)uri_string, + (const char*)publicId, + ctxt); + } else { + RAPTOR_DEBUG4("Not loading entity URI %s by policy for publicId '%s' systemId '%s'\n", uri_string, publicId, systemId); + } + + return entity_input; } static xmlEntityPtr -raptor_libxml_getEntity(void* user_data, const xmlChar *name) { - raptor_sax2* sax2=(raptor_sax2*)user_data; - return libxml2_getEntity(sax2->xc, name); -} +raptor_libxml_getEntity(void* user_data, const xmlChar *name) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr xc = sax2->xc; + xmlEntityPtr ret = NULL; + + if(!xc) + return NULL; + if(!xc->inSubset) { + /* looks for hardcoded set of entity names - lt, gt etc. */ + ret = xmlGetPredefinedEntity(name); + if(ret) { + RAPTOR_DEBUG2("Entity '%s' found in predefined set\n", name); + return ret; + } + } + + /* This section uses xmlGetDocEntity which looks for entities in + * memory only, never from a file or URI + */ + if(xc->myDoc && (xc->myDoc->standalone == 1)) { + RAPTOR_DEBUG2("Entity '%s' document is standalone\n", name); + /* Document is standalone: no entities are required to interpret doc */ + if(xc->inSubset == 2) { + xc->myDoc->standalone = 0; + ret = xmlGetDocEntity(xc->myDoc, name); + xc->myDoc->standalone = 1; + } else { + ret = xmlGetDocEntity(xc->myDoc, name); + if(!ret) { + xc->myDoc->standalone = 0; + ret = xmlGetDocEntity(xc->myDoc, name); + xc->myDoc->standalone = 1; + } + } + } else { + ret = xmlGetDocEntity(xc->myDoc, name); + } + + if(ret && !ret->children && + (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + /* Entity is an external general parsed entity. It may be in a + * catalog file, user file or user URI + */ + int val = 0; + xmlNodePtr children; + int load_entity = 0; + + load_entity = sax2->feature_load_external_entities; + + if(!load_entity) { + RAPTOR_DEBUG2("Not getting entity URI %s by policy\n", ret->URI); + children = xmlNewText((const xmlChar*)""); + } else { + /* Disable SAX2 handlers so that the SAX2 events do not all get + * sent to callbacks during dealing with the entity parsing. + */ + sax2->enabled = 0; + val = xmlParseCtxtExternalEntity(xc, ret->URI, ret->ExternalID, &children); + sax2->enabled = 1; + } + + if(!val) { + xmlAddChildList((xmlNodePtr)ret, children); + } else { + xc->validate = 0; + return NULL; + } + + ret->owner = 1; + +#if LIBXML_VERSION >= 20627 + /* Mark this entity as having been checked - never do this again */ + if(!ret->checked) + ret->checked = 1; + } +#endif + + return ret; +} + static xmlEntityPtr raptor_libxml_getParameterEntity(void* user_data, const xmlChar *name) { diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_parse.c raptor-1.4.21/src/raptor_parse.c --- src/raptor_parse.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_parse.c 2012-02-04 15:29:56.000000000 -0800 @@ -1443,6 +1443,7 @@ case RAPTOR_FEATURE_MICROFORMATS: case RAPTOR_FEATURE_HTML_LINK: case RAPTOR_FEATURE_WWW_TIMEOUT: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: parser->features[(int)feature]=value; break; @@ -1564,6 +1565,7 @@ case RAPTOR_FEATURE_MICROFORMATS: case RAPTOR_FEATURE_HTML_LINK: case RAPTOR_FEATURE_WWW_TIMEOUT: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: result = parser->features[(int)feature]; break; diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_rdfxml.c raptor-1.4.21/src/raptor_rdfxml.c --- src/raptor_rdfxml.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_rdfxml.c 2012-02-04 15:29:56.000000000 -0800 @@ -1130,6 +1130,9 @@ raptor_sax2_set_feature(rdf_xml_parser->sax2, RAPTOR_FEATURE_NO_NET, rdf_parser->features[RAPTOR_FEATURE_NO_NET]); + raptor_sax2_set_feature(rdf_xml_parser->sax2, + RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, + rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]); raptor_sax2_parse_start(rdf_xml_parser->sax2, uri); diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_rss.c raptor-1.4.21/src/raptor_rss.c --- src/raptor_rss.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_rss.c 2012-02-04 15:29:56.000000000 -0800 @@ -247,6 +247,9 @@ raptor_sax2_set_feature(rss_parser->sax2, RAPTOR_FEATURE_NO_NET, rdf_parser->features[RAPTOR_FEATURE_NO_NET]); + raptor_sax2_set_feature(rss_parser->sax2, + RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES, + rdf_parser->features[RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES]); raptor_sax2_parse_start(rss_parser->sax2, uri); diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_sax2.c raptor-1.4.21/src/raptor_sax2.c --- src/raptor_sax2.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_sax2.c 2012-02-04 15:29:56.000000000 -0800 @@ -106,6 +106,8 @@ sax2->user_data=user_data; + sax2->enabled = 1; + sax2->locator=error_handlers->locator; sax2->error_handlers=error_handlers; @@ -721,6 +723,10 @@ sax2->feature_no_net=value; break; + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: + sax2->feature_load_external_entities=value; + break; + case RAPTOR_FEATURE_SCANNING: case RAPTOR_FEATURE_ASSUME_IS_RDF: case RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES: @@ -802,7 +808,7 @@ unsigned char *xml_language=NULL; raptor_uri *xml_base=NULL; - if(sax2->failed) + if(sax2->failed || !sax2->enabled) return; #ifdef RAPTOR_XML_EXPAT @@ -1031,7 +1037,7 @@ raptor_sax2* sax2=(raptor_sax2*)user_data; raptor_xml_element* xml_element; - if(sax2->failed) + if(sax2->failed || !sax2->enabled) return; #ifdef RAPTOR_XML_EXPAT @@ -1069,7 +1075,11 @@ raptor_sax2_characters(void* user_data, const unsigned char *s, int len) { raptor_sax2* sax2=(raptor_sax2*)user_data; - if(!sax2->failed && sax2->characters_handler) + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->characters_handler) sax2->characters_handler(sax2->user_data, sax2->current_element, s, len); } @@ -1085,7 +1095,10 @@ #endif #endif - if(!sax2->failed && sax2->cdata_handler) + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->cdata_handler) sax2->cdata_handler(sax2->user_data, sax2->current_element, s, len); } @@ -1095,7 +1108,11 @@ raptor_sax2_comment(void* user_data, const unsigned char *s) { raptor_sax2* sax2=(raptor_sax2*)user_data; - if(!sax2->failed && sax2->comment_handler) + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->comment_handler) sax2->comment_handler(sax2->user_data, sax2->current_element, s); } @@ -1110,7 +1127,11 @@ const unsigned char* notationName) { raptor_sax2* sax2=(raptor_sax2*)user_data; - if(!sax2->failed && sax2->unparsed_entity_decl_handler) + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->unparsed_entity_decl_handler) sax2->unparsed_entity_decl_handler(sax2->user_data, entityName, base, systemId, publicId, notationName); @@ -1127,7 +1148,7 @@ { raptor_sax2* sax2=(raptor_sax2*)user_data; - if(sax2->failed) + if(sax2->failed || !sax2->enabled) return 0; if(sax2->external_entity_ref_handler) diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_serialize.c raptor-1.4.21/src/raptor_serialize.c --- src/raptor_serialize.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_serialize.c 2012-02-04 15:29:56.000000000 -0800 @@ -974,6 +974,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: @@ -1081,6 +1082,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: @@ -1222,6 +1224,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: @@ -1324,6 +1327,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_WRITER_AUTO_INDENT: diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_turtle_writer.c raptor-1.4.21/src/raptor_turtle_writer.c --- src/raptor_turtle_writer.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_turtle_writer.c 2012-02-04 15:29:56.000000000 -0800 @@ -740,6 +740,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: @@ -854,6 +855,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: diff -urN -X /home/dajobe/dev/dontdiff -x raptor.rdf -x file1.txt -x xmlent1.rdf -x rapper -x rdfdiff raptor-1.4.21.orig/src/raptor_xml_writer.c raptor-1.4.21/src/raptor_xml_writer.c --- src/raptor_xml_writer.c 2010-01-29 15:54:42.000000000 -0800 +++ src/raptor_xml_writer.c 2012-02-04 15:29:56.000000000 -0800 @@ -973,6 +973,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: @@ -1094,6 +1095,7 @@ /* Shared */ case RAPTOR_FEATURE_NO_NET: + case RAPTOR_FEATURE_LOAD_EXTERNAL_ENTITIES: /* XML writer features */ case RAPTOR_FEATURE_RELATIVE_URIS: