Source code for revelionn.concept_extraction

import networkx as nx
from semantic_loss_pytorch import SemanticLoss


[docs] class ConceptExtractor: """ A class that provides concept extraction algorithms. Attributes ---------- ontology : nxontology.NXOntology Ontology represented as a graph, where edge direction goes from superterm to subterm. trainer : MappingTrainer An instance of the MappingTrainer class that provides an interface for training mapping networks. Methods ------- create_subgraph(graph, node) Returns a subgraph containing all child nodes for a given, including this one. order_concepts(target_concept, ontology) Performs topological sorting of a subgraph formed by a given parent node (target concept). exhaustive_search(concept, layer_names, mapping_neurons) Trains and evaluates mapping networks based on the activations of each of the specified layers of the network. linear_search(concept, top_layer_num, patience_layers, mapping_neurons) Trains and evaluates mapping networks based on the activations of each of the layers starting from the specified one, until the value of the quality metric deteriorates over several layers (the value of patience). heuristic_search(target_concept, top_layer_num, patience_layers, mapping_neurons) Due to the heuristic reduction of the set of specified layers, mapping networks are not trained for every combination of layer-concept. Uses linear search. simultaneous_extraction(target_concept, decoder_channels, num_shared_neurons, num_output_neurons, sdd_path=None, vtree_path=None, sem_loss_weight=None, unlabeled_samples=None) Trains a mapping network that can simultaneously extract a set of relevant concepts from the entire set of layers of specified types (the types are set when initializing the MappingTrainer instance). """ def __init__(self, mapping_trainer, nxonto): """ Sets all the necessary attributes for the ConceptExtractor object. Parameters ---------- mapping_trainer : MappingTrainer An instance of the MappingTrainer class that provides an interface for training mapping networks. nxonto : nxontology.NXOntology Ontology represented as a graph, where edge direction goes from superterm to subterm. """ self.ontology = nxonto self.trainer = mapping_trainer
[docs] @staticmethod def create_subgraph(graph, node): """ Returns a subgraph containing all child nodes for a given node, including the given node. Parameters ---------- graph : networkx.Graph The graph from which to extract the subgraph. node : str The node for which to create the subgraph. Returns ------- networkx.Graph A subgraph of `graph` containing all child nodes of `node`, including `node`. """ edges = nx.dfs_successors(graph, node) nodes = [] for k, v in edges.items(): nodes.extend([k]) nodes.extend(v) return graph.subgraph(nodes)
[docs] def order_concepts(self, target_concept, ontology): """ Performs topological sorting of a subgraph formed by a given parent node (target concept). Parameters ---------- target_concept : str The target concept node for which to perform topological sorting. ontology : nxontology.NXOntology The ontology graph. Returns ------- list A list of concepts in topologically sorted order within the subgraph. """ subgraph = self.create_subgraph(ontology.graph, target_concept) return list(nx.topological_sort(nx.line_graph(subgraph)))
[docs] def simultaneous_extraction(self, target_concept, decoder_channels, num_shared_neurons, num_output_neurons, sdd_path=None, vtree_path=None, sem_loss_weight=None, unlabeled_samples=None): """ Parameters ---------- target_concept : str The target concept that should be obtained by ontological inference. Mapping networks are trained to extract concepts relevant to the target concept. decoder_channels : int The number of decoder channels. The output number of channels of the convolutional layer of the decoder or the output number of neurons of the decoder of the fully connected layer. num_shared_neurons : list[int] The number of neurons in consecutive fully connected layers of the common part of the network (internal representation of the simultaneous extraction network). num_output_neurons : list[int] The number of neurons in consecutive fully connected layers of each of the concept blocks. sdd_path : str The path to the .sdd file. vtree_path : str The path to the .vtree file. sem_loss_weight : float The contribution of semantic loss to the overall loss function. unlabeled_samples : int or float The number of unlabeled samples to include. If float, it represents the fraction of unlabeled samples. Returns ------- concepts_auc : list[float] ROC AUC values for each of the concepts. all_auc : float ROC AUC value for all labels of a simultaneous mapping network. """ concepts = self.create_subgraph(self.ontology.graph, target_concept) concepts.remove(target_concept) if sdd_path is None: self.trainer.train_simultaneous_model(concepts, decoder_channels, num_shared_neurons, num_output_neurons) else: sl = SemanticLoss(sdd_path, vtree_path) self.trainer.train_simultaneous_model_semisupervised(concepts, decoder_channels, num_shared_neurons, num_output_neurons, sl, sem_loss_weight, unlabeled_samples) concepts_auc, all_auc = self.trainer.evaluate_model() return concepts_auc, all_auc