# Imports
from d3blocks import D3Blocks  
# https://d3blocks.github.io/d3blocks/pages/html
from datetime import datetime as dt
import json
import pandas as pd
from pprint import pp
from urllib import request

date = dt.now().strftime("%Y-%m-%d")
date

'2024-11-08'

# Load the file from GitHub and (note that we're using the raw-URL)
metadata_url = "https://raw.githubusercontent.com/OP-TED/eForms-SDK/refs/heads/develop/fields/fields.json"
with request.urlopen(metadata_url) as url:
    data = json.load(url)
    # Print info about the metadata file
    ubl_version = data["ublVersion"]
    sdk_version = data["sdkVersion"]
    metadata_database = data["metadataDatabase"]
    print(f"{ubl_version = }\n{sdk_version = }\n{metadata_database = }")
    
# Get the xmlStructure (array of objects)
xml_structure = data["xmlStructure"]

# Look at the first three elements of the XML-structure
print("\nNumber of elements in xml_structure: ", len(xml_structure))
pp(xml_structure[:3])

ubl_version = '2.3'
sdk_version = 'eforms-sdk-1.14.0-SNAPSHOT'
metadata_database = {'version': '1.13.23', 'createdOn': '2024-10-31T15:55:40'}

Number of elements in xml_structure:  307
[{'id': 'ND-Root',
  'xpathAbsolute': '/*',
  'xpathRelative': '/*',
  'repeatable': False},
 {'id': 'ND-GazetteReference',
  'parentId': 'ND-Root',
  'xpathAbsolute': '/*/cac:AdditionalDocumentReference',
  'xpathRelative': 'cac:AdditionalDocumentReference',
  'xsdSequenceOrder': [{'cac:AdditionalDocumentReference': 34}],
  'repeatable': False},
 {'id': 'ND-BusinessCapability',
  'parentId': 'ND-Root',
  'xpathAbsolute': '/*/cac:BusinessCapability',
  'xpathRelative': 'cac:BusinessCapability',
  'xsdSequenceOrder': [{'cac:BusinessCapability': 35}],
  'repeatable': True}]

# Convert JSON (dict) to DataFrame, so that 
# each row represent an XML element.
df_xml = pd.DataFrame(xml_structure)
# Save `df_xml` as CSV for later use
df_xml.to_csv("df_xml.csv", index=False, encoding="UTF-8")
# Print shape and look at first 5 rows
print(df_xml.shape)
df_xml[:5]

(307, 9)

# Create copy of `df_xml` where id and parentId are included
df_tree = df_xml.copy()[["id", "parentId"]]

# Drop first row
df_tree = df_tree[1:]  
print("Shape etter fjerning av første rad: ", df_tree.shape)

# Remove the prefix `"ND-"` to increase readability.
df_tree["id"] = df_tree["id"].str.replace("ND-", "")
df_tree["parentId"] = df_tree["parentId"].str.replace("ND-", "")

df_tree[:5]

Shape etter fjerning av første rad:  (306, 2)

# Rename columns: `id` -> `target`, and `parentId` -> `source`. 
df_tree = df_tree.rename(columns={"id": "target", "parentId": "source"})
# Add the column `weight` with value `1`.
df_tree["weight"] = 1
df_tree[:5]

df_tree.to_csv("df_tree.csv", index=False, encoding="UTF-8")

# Initialize D3Blocks
d3 = D3Blocks()

# Create graph
d3.d3graph(
    df=df_tree,
    title="eForms XML struktur",
    collision=0.7, # Default=0.5. Higher -> prevent more collisions.
    charge=500, # Default=400. Higher -> less dense.
    color="cluster", # Based on community distance clusters.
    size="degree", # Based on centrality measure.
    opacity="degree", # Based on centrality measure.
    support=False, # Remove ad
)  
# Set edge properties
d3.D3graph.set_edge_properties(directed=True, marker_end='arrow')

# Show and save the graph.
d3.D3graph.show(
    save_button=True, # Show save button
    show_slider=False, # Do not show slider
    figsize=(1500, 800),
    filepath=f"../charts/eForm-fields-graph-{date}.html"
)

def make_tree(df, filepath=None):
    # Initialize D3Blocks
    d3 = D3Blocks(verbose=True, chart='tree', frame=False, support=False)
    
    # Set node- and edge properties
    d3.set_node_properties(df)
    d3.set_edge_properties(df)
    
    # Show and save the graph.
    d3.show(
        filepath=filepath,
        save_button=True, # Show save button
        figsize=[2200, 3400],
        # If the font is too big, the text will overlap horizontally.
        font={"size": 12} 
    )

make_tree(
    df_tree, 
    filepath=f"../charts/eForm-fields-tree-{date}.html")

	id	xpathAbsolute	xpathRelative	repeatable	parentId	xsdSequenceOrder	identifierFieldId	captionFieldId	businessEntityId
0	ND-Root	/*	/*	False	NaN	NaN	NaN	NaN	NaN
1	ND-GazetteReference	/*/cac:AdditionalDocumentReference	cac:AdditionalDocumentReference	False	ND-Root	[{'cac:AdditionalDocumentReference': 34}]	NaN	NaN	NaN
2	ND-BusinessCapability	/*/cac:BusinessCapability	cac:BusinessCapability	True	ND-Root	[{'cac:BusinessCapability': 35}]	NaN	NaN	NaN
3	ND-BusinessParty	/*/cac:BusinessParty	cac:BusinessParty	False	ND-Root	[{'cac:BusinessParty': 32}]	NaN	NaN	NaN
4	ND-BusinessContact	/*/cac:BusinessParty/cac:Contact	cac:Contact	False	ND-BusinessParty	[{'cac:Contact': 15}]	NaN	NaN	NaN

1. Load the JSON file and convert to dataframe¶

2. Adapt `df_xml` to the input format required by d3blocks¶

3. Create graph and tree with d3graph¶

	id	parentId
1	GazetteReference	Root
2	BusinessCapability	Root
3	BusinessParty	Root
4	BusinessContact	BusinessParty
5	EuEntity	BusinessParty

1. Load the JSON file and convert to dataframe¶

2. Adapt df_xml to the input format required by d3blocks¶

3. Create graph and tree with d3graph¶

2. Adapt `df_xml` to the input format required by d3blocks¶