From bee57e1f909a760ffd76dcdf6e6bc6efd5b5fce3 Mon Sep 17 00:00:00 2001 From: Tristan Penman Date: Sun, 14 Feb 2016 11:01:37 -0800 Subject: [PATCH] Update SchemaParser to fully support schema caching --- include/valijson/schema_parser.hpp | 360 ++++++++++++++++++++----- tests/test_fetch_document_callback.cpp | 4 +- 2 files changed, 288 insertions(+), 76 deletions(-) diff --git a/include/valijson/schema_parser.hpp b/include/valijson/schema_parser.hpp index 4649901..033cbe5 100644 --- a/include/valijson/schema_parser.hpp +++ b/include/valijson/schema_parser.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #ifdef __clang__ @@ -97,7 +98,7 @@ public: typename DocumentCache::Type docCache; SchemaCache schemaCache; try { - populateSchema(schema, node, node, schema, boost::none, "", + resolveThenPopulateSchema(schema, node, node, schema, boost::none, "", fetchDoc, NULL, NULL, docCache, schemaCache); } catch (...) { freeDocumentCache(docCache, freeDoc); @@ -118,7 +119,7 @@ private: typedef std::map Type; }; - typedef std::map > SchemaCache; + typedef std::map SchemaCache; /** * @brief Free memory used by fetched documents @@ -140,6 +141,52 @@ private: } } + /** + * @brief Find the absolute URI for a document, within a resolution scope + * + * This function captures five different cases that can occur when + * attempting to resolve a document URI within a particular resolution + * scope: + * + * - resolution scope not present, but absolute document URI is + * => document URI as-is + * - resolution scope not present, and document URI is relative or absent + * => no result + * - resolution scope is present, and document URI is a relative path + * => resolve document URI relative to resolution scope + * - resolution scope is present, and document URI is absolute + * => document URI as-is + * - resolution scope is present, but document URI is not + * => resolution scope as-is + * + * This function assumes that the resolution scope is absolute. + * + * When resolving a document URI relative to the resolution scope, the + * document URI should be used to replace the path, query and fragment + * portions of URI provided by the resolution scope. + */ + static boost::optional findAbsoluteDocumentUri( + const boost::optional resolutionScope, + const boost::optional documentUri) + { + if (resolutionScope) { + if (documentUri) { + if (internal::uri::isUriAbsolute(*documentUri)) { + return *documentUri; + } else { + return internal::uri::resolveRelativeUri( + *resolutionScope, *documentUri); + } + } else { + return *resolutionScope; + } + } else if (documentUri && internal::uri::isUriAbsolute(*documentUri)) { + return *documentUri; + } else { + return boost::none; + } + } + /** * @brief Extract a JSON Reference string from a node * @@ -189,6 +236,55 @@ private: return ""; } + /** + * @brief Search the schema cache for a schema matching a given key + * + * If the key is not present in the query cache, a NULL pointer will be + * returned, and the contents of the cache will remain unchanged. This is + * in contrast to the behaviour of the std::map [] operator, which would + * add the NULL pointer to the cache. + * + * @param schemaCache schema cache to query + * @param queryKey key to search for + * + * @return shared pointer to Schema if found, NULL pointer otherwise + */ + static const Subschema * querySchemaCache(SchemaCache &schemaCache, + const std::string &queryKey) + { + const SchemaCache::iterator itr = schemaCache.find(queryKey); + if (itr == schemaCache.end()) { + return NULL; + } + + return itr->second; + } + + /** + * @brief Add entries to the schema cache for a given list of keys + * + * @param schemaCache schema cache to update + * @param keysToCreate list of keys to create entries for + * @param schema shared pointer to schema that keys will map to + * + * @throws std::logic_error if any of the keys are already present in the + * schema cache. This behaviour is intended to help detect incorrect + * usage of the schema cache during development, and is not expected + * to occur otherwise, even for malformed schemas. + */ + void updateSchemaCache(SchemaCache &schemaCache, + const std::vector &keysToCreate, + const Subschema *schema) + { + BOOST_FOREACH( const std::string &keyToCreate, keysToCreate ) { + const SchemaCache::value_type value(keyToCreate, schema); + if (!schemaCache.insert(value).second) { + throw std::logic_error( + "Key '" + keyToCreate + "' already in schema cache."); + } + } + } + /** * @brief Recursive helper function for retrieving or creating schemas * @@ -234,12 +330,133 @@ private: SchemaCache &schemaCache, std::vector &newCacheKeys) { - const Subschema *subschema = rootSchema.createSubschema(); - populateSchema(rootSchema, rootNode, node, *subschema, - currentScope, nodePath, fetchDoc, parentSubschema, ownName, - docCache, schemaCache); + std::string jsonRef; - return subschema; + // Check for the first termination condition (found a non-$ref node) + if (!extractJsonReference(node, jsonRef)) { + + // Construct a key that we can use to search the schema cache for + // a schema corresponding to the current node + const std::string schemaCacheKey = + currentScope ? (*currentScope + nodePath) : nodePath; + + // Retrieve an existing schema from the cache if possible + const Subschema *cachedPtr = + querySchemaCache(schemaCache, schemaCacheKey); + + // Create a new schema otherwise + const Subschema *subschema = cachedPtr ? cachedPtr : + rootSchema.createSubschema(); + + // Add cache entries for keys belonging to any $ref nodes that were + // visited before arriving at the current node + updateSchemaCache(schemaCache, newCacheKeys, subschema); + + // Schema cache did not contain a pre-existing schema corresponding + // to the current node, so the schema that was returned will need + // to be populated + if (!cachedPtr) { + populateSchema(rootSchema, rootNode, node, *subschema, + currentScope, nodePath, fetchDoc, parentSubschema, + ownName, docCache, schemaCache); + } + + return subschema; + } + + // Returns a document URI if the reference points somewhere + // other than the current document + const boost::optional documentUri = + internal::json_reference::getJsonReferenceUri(jsonRef); + + // Extract JSON Pointer from JSON Reference, with any trailing + // slashes removed so that keys in the schema cache end + // consistently + const std::string actualJsonPointer = sanitiseJsonPointer( + internal::json_reference::getJsonReferencePointer(jsonRef)); + + // Determine the actual document URI based on the resolution + // scope. An absolute document URI will take precedence when + // present, otherwise we need to resolve the URI relative to + // the current resolution scope + const boost::optional actualDocumentUri = + findAbsoluteDocumentUri(currentScope, documentUri); + + // Construct a key to search the schema cache for an existing schema + const std::string queryKey = actualDocumentUri ? + (*actualDocumentUri + actualJsonPointer) : actualJsonPointer; + + // Check for the second termination condition (found a $ref node that + // already has an entry in the schema cache) + const Subschema *cachedPtr = querySchemaCache(schemaCache, queryKey); + if (cachedPtr) { + updateSchemaCache(schemaCache, newCacheKeys, cachedPtr); + return cachedPtr; + } + + if (actualDocumentUri) { + const AdapterType *newDoc = NULL; + + // Have we seen this document before? + typename DocumentCache::Type::iterator docCacheItr = + docCache.find(*actualDocumentUri); + if (docCacheItr == docCache.end()) { + // Resolve reference against remote document + if (!fetchDoc) { + throw std::runtime_error( + "Fetching of remote JSON References not enabled."); + } + + // Returns a pointer to the remote document that was + // retrieved, or null if retrieval failed. This class + // will take ownership of the pointer, and call freeDoc + // when it is no longer needed. + newDoc = fetchDoc(*actualDocumentUri); + + // Can't proceed without the remote document + if (!newDoc) { + throw std::runtime_error( + "Failed to fetch referenced schema document: " + + *actualDocumentUri); + } + + typedef typename DocumentCache::Type::value_type + DocCacheValueType; + + docCache.insert(DocCacheValueType(*actualDocumentUri, newDoc)); + + } else { + newDoc = docCacheItr->second; + } + + // Find where we need to be in the document + const AdapterType &referencedAdapter = + internal::json_pointer::resolveJsonPointer(*newDoc, + actualJsonPointer); + + newCacheKeys.push_back(queryKey); + + // Populate the schema, starting from the referenced node, with + // nested JSON References resolved relative to the new root node + return makeOrReuseSchema(rootSchema, *newDoc, referencedAdapter, + currentScope, actualJsonPointer, fetchDoc, parentSubschema, + ownName, docCache, schemaCache, newCacheKeys); + + } + + // JSON References in nested schema will be resolved relative to the + // current document + const AdapterType &referencedAdapter = + internal::json_pointer::resolveJsonPointer( + rootNode, actualJsonPointer); + + newCacheKeys.push_back(queryKey); + + // Populate the schema, starting from the referenced node, with + // nested JSON References resolved relative to the new root node + return makeOrReuseSchema(rootSchema, rootNode, referencedAdapter, + currentScope, actualJsonPointer, fetchDoc, parentSubschema, + ownName, docCache, schemaCache, newCacheKeys); } /** @@ -329,14 +546,6 @@ private: "SchemaParser::populateSchema must be invoked with an " "appropriate Adapter implementation"); - std::string jsonRef; - if (extractJsonReference(node, jsonRef)) { - populateSchemaUsingJsonReference(rootSchema, jsonRef, rootNode, - node, subschema, currentScope, nodePath, fetchDoc, - parentSubschema, ownName, docCache, schemaCache); - return; - } - const typename AdapterType::Object object = node.asObject(); typename AdapterType::Object::const_iterator itr(object.end()); @@ -614,98 +823,101 @@ private: } /** - * @brief Populate a schema using a JSON Reference + * @brief Resolves a chain of JSON References before populating a schema * - * Allows JSON references to be used with minimal changes to the parser - * helper functions. + * This helper function is used directly by the publicly visible + * populateSchema function. It ensures that the node being parsed is a + * concrete node, and not a JSON Reference. This function will call itself + * recursively to resolve references until a concrete node is found. * - * @param rootSchema The Schema instance, and root subschema, through - * which other subschemas can be created and - * modified - * @param jsonRef String containing JSON Reference value - * @param rootNode Reference to the node from which JSON References - * will be resolved when they refer to the current - * document; used for recursive parsing of schemas - * @param node Reference to node to parse - * @param schema Reference to Schema to populate - * @param currentScope URI for current resolution scope - * @param nodePath JSON Pointer representing path to current node - * @param fetchDoc Optional function to fetch remote JSON documents - * @param parentSubschema Optional pointer to the parent schema, used to - * support required keyword in Draft 3 - * @param ownName Optional pointer to a node name, used to support - * the 'required' keyword in Draft 3 - */ + * @param rootSchema The Schema instance, and root subschema, through + * which other subschemas can be created and modified + * @param rootNode Reference to the node from which JSON References + * will be resolved when they refer to the current + * document + * @param node Reference to node to parse + * @param subschema Reference to Schema to populate + * @param currentScope URI for current resolution scope + * @param nodePath JSON Pointer representing path to current node + * @param fetchDoc Function to fetch remote JSON documents (optional) + * @param parentSchema Optional pointer to the parent schema, used to + * support required keyword in Draft 3 + * @param ownName Optional pointer to a node name, used to support + * the 'required' keyword in Draft 3 + * @param docCache Cache of resolved and fetched remote documents + * @param schemaCache Cache of populated schemas + */ template - void populateSchemaUsingJsonReference( + void resolveThenPopulateSchema( Schema &rootSchema, - const std::string &jsonRef, const AdapterType &rootNode, - const AdapterType &, + const AdapterType &node, const Subschema &subschema, const boost::optional currentScope, const std::string &nodePath, const typename FunctionPtrs::FetchDoc fetchDoc, - const Subschema *parentSubschema, + const Subschema *parentSchema, const std::string *ownName, typename DocumentCache::Type &docCache, SchemaCache &schemaCache) { + std::string jsonRef; + if (!extractJsonReference(node, jsonRef)) { + populateSchema(rootSchema, rootNode, node, subschema, currentScope, + nodePath, fetchDoc, parentSchema, ownName, docCache, + schemaCache); + return; + } + // Returns a document URI if the reference points somewhere // other than the current document const boost::optional documentUri = internal::json_reference::getJsonReferenceUri(jsonRef); // Extract JSON Pointer from JSON Reference - const std::string jsonPointer = sanitiseJsonPointer( + const std::string actualJsonPointer = sanitiseJsonPointer( internal::json_reference::getJsonReferencePointer(jsonRef)); - if (documentUri) { + if (documentUri && internal::uri::isUriAbsolute(*documentUri)) { // Resolve reference against remote document if (!fetchDoc) { throw std::runtime_error( - "Support for JSON References not enabled."); + "Fetching of remote JSON References not enabled."); } - const AdapterType * docPtr = NULL; - const typename DocumentCache::Type::const_iterator - docCacheItr = docCache.find(*documentUri); - if (docCacheItr == docCache.end()) { - // Returns a shared pointer to the remote document that was - // retrieved, or null if retrieval failed. The resulting - // document must remain in scope until populateSchema returns. - docPtr = (*fetchDoc)(*documentUri); + const AdapterType *newDoc = fetchDoc(*documentUri); - // Can't proceed without the remote document - if (!docPtr) { - throw std::runtime_error( - "Failed to fetch referenced schema document."); - } - - // TODO: If this fails, how would the document be freed? - docCache.insert( - typename DocumentCache::Type::value_type( - *documentUri, docPtr)); - } else { - docPtr = docCacheItr->second; + // Can't proceed without the remote document + if (!newDoc) { + throw std::runtime_error( + "Failed to fetch referenced schema document: " + + *documentUri); } - const AdapterType &ref = internal::json_pointer::resolveJsonPointer( - *docPtr, jsonPointer); + // Add to document cache + typedef typename DocumentCache::Type::value_type + DocCacheValueType; - // Resolve reference against retrieved document - populateSchema(rootSchema, ref, ref, subschema, - currentScope, nodePath, fetchDoc, parentSubschema, ownName, - docCache, schemaCache); + docCache.insert(DocCacheValueType(*documentUri, newDoc)); + + const AdapterType &referencedAdapter = + internal::json_pointer::resolveJsonPointer( + *newDoc, actualJsonPointer); + + // TODO: Need to detect degenerate circular references + resolveThenPopulateSchema(rootSchema, *newDoc, referencedAdapter, + schema, boost::none, actualJsonPointer, fetchDoc, + parentSchema, ownName, docCache, schemaCache); } else { - const AdapterType &ref = internal::json_pointer::resolveJsonPointer( - rootNode, jsonPointer); + const AdapterType &referencedAdapter = + internal::json_pointer::resolveJsonPointer( + rootNode, actualJsonPointer); - // Resolve reference against current document - populateSchema(rootSchema, rootNode, ref, subschema, - currentScope, nodePath, fetchDoc, parentSubschema, ownName, - docCache, schemaCache); + // TODO: Need to detect degenerate circular references + resolveThenPopulateSchema(rootSchema, rootNode, referencedAdapter, + schema, boost::none, actualJsonPointer, fetchDoc, + parentSchema, ownName, docCache, schemaCache); } } diff --git a/tests/test_fetch_document_callback.cpp b/tests/test_fetch_document_callback.cpp index 6f636e2..3103cd9 100644 --- a/tests/test_fetch_document_callback.cpp +++ b/tests/test_fetch_document_callback.cpp @@ -28,7 +28,7 @@ class TestFetchDocumentCallback : public ::testing::Test const RapidJsonAdapter * fetchDocument(const std::string &uri) { - EXPECT_STREQ("test", uri.c_str()); + EXPECT_STREQ("http://localhost:1234/", uri.c_str()); rapidjson::Value valueOfTypeAttribute; valueOfTypeAttribute.SetString("string", allocator); @@ -60,7 +60,7 @@ TEST_F(TestFetchDocumentCallback, Basics) rapidjson::Document schemaDocument; RapidJsonAdapter schemaDocumentAdapter(schemaDocument); schemaDocument.SetObject(); - schemaDocument.AddMember("$ref", "test#/", allocator); + schemaDocument.AddMember("$ref", "http://localhost:1234/#/", allocator); // Parse schema document Schema schema;