Create and load knowledge graph backups

Never lose your graphs again with backups!

Create a backup of an ArcGIS Knowledge graph you have created to store your data model and data. The backup will be in the format of json files that can read to recreate your graph or be shared for others to be able to create the same graph on a different server.

Load a new graph easily from your backup files!

Use the backup files with the load backup files portion of this notebook to create a graph with the same data model and data.

Setup

Import necessary libraries

Start by importing the libraries we need for connecting to portal, accessing knowledge graph, and manipulating data as needed

# imports
import os, json, requests
from datetime import datetime
from uuid import UUID

from arcgis.gis import GIS
from arcgis.graph import KnowledgeGraph

Define folder and file names

Define all names for files so we can stay consistent for backing up and loading files

# output folder name
output_folder = r"C:\backups\myknowledgegraph_backup"

# output backup json file names
dm_ent = "datamodel_entities.json"
dm_rel = "datamodel_relationships.json"
dm_prov = "datamodel_provenance.json"
all_ent = "all_entities.json"
all_rel = "all_relationships.json"
prov_file = "provenance_entities.json"  # this will only be used if you want to backup provenance records
servicedef = "service_definition.json"

Create backup files

Connect to portal and knowledge graph

Connect to the portal and knowledge graph on that portal, also ensure knowledge graph service exists (url is correct)

gis_backup = GIS("home")  # connect to portal
# connect to knowledge graph service
knowledgegraph_backup = KnowledgeGraph(
    "https://myportal.com/server/rest/services/Hosted/myknowledgegraph/KnowledgeGraphServer",
    gis=gis_backup,
)
try:
    knowledgegraph_backup.datamodel
except:
    raise Exception("Knowledge graph to backup does not exist")

Get the service definition for the knowledge graph service and save it to the service definition file

# create a token for request
token_url = f"https://myportal.com/portal/sharing/rest/generateToken"
creds = {
    "username": "myUsername",
    "password": "myPassword",
    "referer": "https://myportal.com/portal",
    "f": "json",
}
token_response = requests.post(token_url, data=creds, verify=False)
sd_data = requests.get(
    url="https://myportal.com/server/rest/services/Hosted/myknowledgegraph/KnowledgeGraphServer",
    params={"f": "json", "token": token_response.json()["token"]},
    verify=False,
)
with open(os.path.join(output_folder, servicedef), "w") as f:
    json.dump(sd_data.text, f)

Write data model entity types to backup json file

Iterate through the data model of the knowledge graph to write all entity type objects to a backup json file

# create list of formatted entity types
entity_types = []
for types in knowledgegraph_backup.datamodel["entity_types"]:
    curr_entity_type = {
        "name": knowledgegraph_backup.datamodel["entity_types"][types]["name"],
        "properties": knowledgegraph_backup.datamodel["entity_types"][types][
            "properties"
        ],
    }
    entity_types.append(curr_entity_type)

# write entity types to json file
with open(os.path.join(output_folder, dm_ent), "w") as f:
    json.dump(entity_types, f)

Write data model relationship types to backup json file

Iterate through the data model of the knowledge graph to write all relationship type objects to a backup json file

# create list of formatted relationship types
relationship_types = []
for types in knowledgegraph_backup.datamodel["relationship_types"]:
    curr_relationship_type = {
        "name": knowledgegraph_backup.datamodel["relationship_types"][types]["name"],
        "properties": knowledgegraph_backup.datamodel["relationship_types"][types][
            "properties"
        ],
    }
    relationship_types.append(curr_relationship_type)

# write relationship types to json file
with open(os.path.join(output_folder, dm_rel), "w") as f:
    json.dump(relationship_types, f)

Write entities to backup json file

Get all entities from the knowledge graph using a streaming query, clean them up for better writing when loading, and save the resulting entities to a json backup file

# query for all entities in graph
original_entities = knowledgegraph_backup.query_streaming("MATCH (n) RETURN distinct n")

# create list of formatted entities to add to the graph
all_entities_fromquery = []
for entity in list(original_entities):
    curr_entity = entity[0]
    # convert UUID values to a string since json can't store UUIDs
    curr_entity["_id"] = str(curr_entity["_id"])
    for prop in curr_entity["_properties"]:
        if type(curr_entity["_properties"][prop]) == UUID:
            curr_entity["_properties"][prop] = str(curr_entity["_properties"][prop])
    # delete objectid, the server will handle creating new ones when we load the backup
    del curr_entity["_properties"]["objectid"]
    all_entities_fromquery.append(curr_entity)

# write entities list to json file
with open(os.path.join(output_folder, all_ent), "w") as f:
    json.dump(all_entities_fromquery, f)

Write relationships to backup json file

Get all relationships from the knowledge graph using a streaming query, clean them up for better writing when loading, and save the resulting relationships to a json backup file

# query for all relationships in graph
original_relationships = knowledgegraph_backup.query_streaming(
    "MATCH ()-[rel]->() RETURN distinct rel"
)

# create list of formatted entities to add to the graph
all_relationships_fromquery = []
for relationship in list(original_relationships):
    curr_relationship = relationship[0]
    # convert UUID values to a string since json can't store UUIDs
    curr_relationship["_id"] = str(curr_relationship["_id"])
    curr_relationship["_originEntityId"] = str(curr_relationship["_originEntityId"])
    curr_relationship["_destinationEntityId"] = str(
        curr_relationship["_destinationEntityId"]
    )
    for prop in curr_relationship["_properties"]:
        if type(curr_relationship["_properties"][prop]) == UUID:
            curr_relationship["_properties"][prop] = str(
                curr_relationship["_properties"][prop]
            )
    # delete objectid, the server will handle creating new ones when we load the backup
    del curr_relationship["_properties"]["objectid"]
    all_relationships_fromquery.append(curr_relationship)

# write relationships list to json file
with open(os.path.join(output_folder, all_rel), "w") as f:
    json.dump(all_relationships_fromquery, f)

OPTIONAL: Write provenance records to backup json file

If you have provenance records that you want to maintain in the backups, this will get all provenance records and save them to a json backup file

# write provenance information to the provenance data model file
prov_structure = knowledgegraph_backup.datamodel["meta_entity_types"]["Provenance"]
with open(os.path.join(output_folder, dm_prov), "w") as f:
    json.dump(prov_structure, f)

# query for all provenance records in the graph
provenance_entities = knowledgegraph_backup.query_streaming(
    "MATCH (n:Provenance) RETURN distinct n", include_provenance=True
)

# create list of formatted provenance records to the graph
all_provenance_fromquery = []
for entity in list(provenance_entities):
    curr_provenance = entity[0]
    # convert UUID values to a string since json can't store UUIDs
    curr_provenance["_id"] = str(curr_provenance["_id"])
    for prop in curr_provenance["_properties"]:
        if type(curr_provenance["_properties"][prop]) == UUID:
            curr_provenance["_properties"][prop] = str(
                curr_provenance["_properties"][prop]
            )
    # delete objectid, the server will handle creating new ones when we load the backup
    del curr_provenance["_properties"]["objectid"]
    all_provenance_fromquery.append(curr_provenance)

# write provenance list to json file
with open(os.path.join(output_folder, prov_file), "w") as f:
    json.dump(all_provenance_fromquery, f)

Load backup files

# load data model json files into graph data model
with open(os.path.join(output_folder, dm_ent), "r") as file:
    dm_ents = json.load(file)
with open(os.path.join(output_folder, dm_rel), "r") as file:
    dm_rels = json.load(file)

Get original documents names to correctly load data

This section will get the names of the document entity and relationship types from the original graph data, using the above commands the document types get their default names of 'Document' and 'HasDocument'. Just in case they have been named differently in the original knowledge graph, this steps allows us to match them up in later loading steps.

# get document entity type name
doc_type_name = "Document"
for entity_type in dm_ents:
    for prop in entity_type["properties"]:
        if entity_type["properties"][prop]["role"] == "esriGraphNamedObjectDocument":
            doc_type_name = entity_type["name"]

# get document relationship type name
doc_rel_type_name = "HasDocument"
for relationship_type in dm_rels:
    for prop in relationship_type["properties"]:
        if (
            relationship_type["properties"][prop]["role"]
            == "esriGraphNamedObjectDocument"
        ):
            doc_rel_type_name = relationship_type["name"]

Connect to portal and create knowledge graph

Connect to the portal and create a new knowledge graph service to load data model and data into

# connect to portal via GIS
gis_load = GIS("home")

with open(os.path.join(output_folder, servicedef), "r") as file:
    service_def = json.load(file)
service_def = json.loads(service_def)
updated_service_def = {
    "name": "myNewKnowledgeGraph",  # replace with the name for your new knowledge graph
    "capabilities": service_def["capabilities"],
    "jsonProperties": {
        "allowGeometryUpdates": service_def["allowGeometryUpdates"],
        "searchMaxRecordCount": service_def["searchMaxRecordCount"],
        "spatialReference": service_def["spatialReference"],
        "maxRecordCount": service_def["maxRecordCount"],
        "description": service_def["description"],
        "copyrightText": service_def["copyrightText"],
        "documentEntityTypeInfo": {
            "documentEntityTypeName": doc_type_name,
            "hasDocumentsRelationshipTypeName": doc_rel_type_name,
        },
        "supportsDocuments": service_def["supportsDocuments"],
        "supportsSearch": service_def["supportsSearch"],
        "supportsProvenance": service_def["supportsProvenance"],
    },
}

result = gis_load.content.create_service(
    name="", service_type="KnowledgeGraph", create_params=updated_service_def
)

knowledgegraph_load = KnowledgeGraph(result.url, gis=gis_load)

Populate data model from saved json files

Populate entity and relationship types from saved json files. This is using the variables defined in the 'Setup' section above.

# populate entity and relationship types based on data model (errors about Document/HasDocument in the output are expected)
knowledgegraph_load.named_object_type_adds(
    entity_types=dm_ents, relationship_types=dm_rels
)

Add additional document entity and relationship type properties

In the case additional properties exist in the original graph on document entity or relationship types, since the type was created at the time of knowledge graph creation we need to additional properties using graph_property_adds.

# load any additional document entity type properties (errors about properties that already exist are expected)
origin_document_properties = None
for entity_type in dm_ents:
    if entity_type["name"] == doc_type_name:
        origin_document_properties = entity_type["properties"]
prop_list = []
for prop in origin_document_properties:
    prop_list.append(origin_document_properties[prop])
knowledgegraph_load.graph_property_adds(
    type_name=doc_type_name, graph_properties=prop_list
)

# load any additional document relationship type properties (errors about properties that already exist are expected)
for relationship_type in dm_rels:
    if relationship_type["name"] == doc_rel_type_name:
        origin_document_rel_properties = relationship_type["properties"]
prop_list = []
for prop in origin_document_rel_properties:
    prop_list.append(origin_document_rel_properties[prop])
knowledgegraph_load.graph_property_adds(
    type_name=doc_rel_type_name, graph_properties=prop_list
)

Get list of date properties from the data model json files

From the data model type json files, find and make a list of date files so we can correctly create datetime objects when loading the data into the knowledge graph.

date_properties = []
# add date property names for entity types
for types in dm_ents:
    for prop in types["properties"]:
        if types["properties"][prop]["fieldType"] == "esriFieldTypeDate":
            date_properties.append(prop)

# add date property names for relationship types
for types in dm_rels:
    for prop in types["properties"]:
        if types["properties"][prop]["fieldType"] == "esriFieldTypeDate":
            date_properties.append(prop)

Add all entities to the knowledge graph

Add all entities from json file to the knowledge graph, formatting UUIDs, dates, and documents before loading the values. This loads the data in batches of 20,000 entities.

# load entities json file
with open(os.path.join(output_folder, all_ent), "r") as file:
    original_entities = json.load(file)
batch = []
for curr_entity in original_entities:
    # if a batch reaches 20k records, apply that batch of edits to the knowledge graph
    if len(batch) > 20000:
        result = knowledgegraph_load.apply_edits(adds=batch)
        batch = []
        # print error if one occurs during edit operation
        try:
            print(result["error"])
        except:
            print("No error adding entities")
    # in case original document type name is different, change name to Document
    if curr_entity["_typeName"] == doc_type_name:
        curr_entity["_typeName"] = "Document"
    # format UUID and date properties
    for prop in curr_entity["_properties"]:
        if prop in date_properties:
            try:
                curr_entity["_properties"][prop] = datetime.fromtimestamp(
                    int(curr_entity["_properties"][prop] / 1000)
                )
            except:
                curr_entity["_properties"][prop] = None
        try:
            curr_entity["_properties"][prop] = UUID(curr_entity["_properties"][prop])
        except:
            continue
    # format id UUID
    curr_entity["_id"] = UUID(curr_entity["_id"])
    batch.append(curr_entity)
# apply final batch of edits to the knowledge graph
result = knowledgegraph_load.apply_edits(adds=batch)
# print error if one occurs during edit operation
try:
    print(result["error"])
except:
    print("No error adding entities")

Add all relationships to the knowledge graph

Add all relationships from json file to the knowledge graph, formatting UUIDs, dates, and documents before loading the values. This loads the data in batches of 20,000 relationships.

# load relationships json file
with open(os.path.join(output_folder, all_rel), "r") as file:
    original_rels = json.load(file)
batch = []
for curr_relationship in original_rels:
    # if a batch reaches 20k records, apply that batch of edits to the knowledge graph
    if len(batch) > 20000:
        result = knowledgegraph_load.apply_edits(adds=batch)
        batch = []
        # print error if one occurs during edit operation
        try:
            print(result["error"])
        except:
            print("No error adding relationships")
    # in case original document type name is different, change name to HasDocument
    if curr_relationship["_typeName"] == doc_rel_type_name:
        curr_relationship["_typeName"] = "HasDocument"
    # format UUID and date properties
    for prop in curr_relationship["_properties"]:
        if prop in date_properties:
            try:
                curr_relationship["_properties"][prop] = datetime.fromtimestamp(
                    int(curr_relationship["_properties"][prop] / 1000)
                )
            except:
                curr_relationship["_properties"][prop] = None
        try:
            curr_relationship["_properties"][prop] = UUID(
                curr_relationship["_properties"][prop]
            )
        except:
            continue
    # format other relationship specific UUIDs
    curr_relationship["_id"] = UUID(curr_relationship["_id"])
    curr_relationship["_originEntityId"] = UUID(curr_relationship["_originEntityId"])
    curr_relationship["_destinationEntityId"] = UUID(
        curr_relationship["_destinationEntityId"]
    )
    batch.append(curr_relationship)
# apply final batch of edits to the knowledge graph
result = knowledgegraph_load.apply_edits(adds=batch)
# print error if one occurs during edit operation
try:
    print(result["error"])
except:
    print("No error adding relationships")

Add search indexes to all text properties

Adding search indexes to all text properties will allow for the values in those properties to be searched for when using search in any client.

load_dm = knowledgegraph_load.datamodel
# add search indexes for all entity text properties
for entity_type in load_dm["entity_types"]:
    prop_list = []
    for prop in load_dm["entity_types"][entity_type]["properties"]:
        if (
            load_dm["entity_types"][entity_type]["properties"][prop]["fieldType"]
            == "esriFieldTypeString"
        ):
            prop_list.append(prop)
    knowledgegraph_load.update_search_index(
        adds={entity_type: {"property_names": prop_list}}
    )

# add search indexes for all relationship text properties
for relationship_type in load_dm["relationship_types"]:
    prop_list = []
    for prop in load_dm["relationship_types"][relationship_type]["properties"]:
        if (
            load_dm["relationship_types"][relationship_type]["properties"][prop][
                "fieldType"
            ]
            == "esriFieldTypeString"
        ):
            prop_list.append(prop)
    knowledgegraph_load.update_search_index(
        adds={relationship_type: {"property_names": prop_list}}
    )

OPTIONAL: Add provenance records to the knowledge graph

This will only apply if you created a backup of provenance records and have enabled provenance on your knowledge graph service.

Add additional provenance entity type properties

In the case additional properties exist in the original graph on the provenance type, we need to add those properties to the data model using graph_property_adds.

with open(os.path.join(output_folder, dm_prov), "r") as file:
    prov_dm = json.load(file)

prop_list = []
for prop in prov_dm["properties"]:
    prop_list.append(prov_dm["properties"][prop])
knowledgegraph_load.graph_property_adds(
    type_name="Provenance", graph_properties=prop_list
)
# errors about already existing properties are expected

# load provenance records json file
with open(os.path.join(output_folder, prov_file), "r") as file:
    prov_entities = json.load(file)

# add all provenance records
for curr_prov in prov_entities:
    # format UUID properties
    for prop in curr_prov["_properties"]:
        try:
            curr_prov["_properties"][prop] = UUID(curr_prov["_properties"][prop])
        except:
            continue
    # format id as UUID
    curr_prov["_id"] = UUID(curr_prov["_id"])
    # add provenance record
    knowledgegraph_load.apply_edits(adds=[curr_prov])