Source code for procars.utils.util_adjacency_functions

# -*- coding: utf-8 -*-:
Copyright © Bonsai - LIFL (Université Lille 1, CNRS UMR 8022) and Inria-Lille Nord Europe


This software is a computer program whose purpose is to progressively reconstruct ancestral
gene orders.

This software is governed by the CeCILL-B license under French law and
abiding by the rules of distribution of free software. You can use,
modify and/or redistribute the software under the terms of the CeCILL-B
license as circulated by CEA, CNRS and Inria at the following URL, or in the LICENCE file at the root directory of this program.

The fact that you are presently reading this means that you have had
knowledge of the CeCILL-B license and that you accept its terms.


``util_adjacency_functions`` **module description**:

Module containing methods used by other modules like ``compute_resolved_conflicts`` and
``compute_conserved_adjacencies`` in order to find required adjacencies.

It contains for exemple methods in order to:

    - split a set of adjacencies in a set of conflicting adjacencies and a set of
      non-conflicting ones.
    - Find the block numbers involved in a given adjacency according to the adjacency ID
      in the matrix

.. moduleauthor:: Aïda Ouangraoua, Amandine PERRIN

February 2014

from procars.utils.util_classes import SparseMatrix

[docs]def split_adjacencymatrix(adjacencymatrix, nb_blocks): """ Function splitting an adjacency matrix into two matrices: the non-conflicting and the conflicting adjacency matrices Parameters ---------- adjacencymatrix : SparseMatrix 0-1 Matrix, the adjacency matrix (1 for each possible adjacency, 0 otherwise) nb_blocks : int Total number of blocks Returns ------- tuple of SparseMatrix *non_conflicting:* 0-1 Matrix containing non-conflicting adjs *conflicting:* 0-1 Matrix containing conflicting adjs. """ # Matrix containing only non conflicting adjacencies non_conflicting = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) # Matrix containing only conflicting adjacencies conflicting = SparseMatrix(2 * nb_blocks + 1, 2 * nb_blocks + 1) # for each block adjacency in adjacencymatrix, check if it is conflicting or not for extremity_id1, y_ids in adjacencymatrix.sparse_matrix.iteritems(): for extremity_id2 in y_ids: if sum(adjacencymatrix[extremity_id1]) + sum(adjacencymatrix[extremity_id2]) == 2: non_conflicting[extremity_id1, extremity_id2] = 1 non_conflicting[extremity_id2, extremity_id1] = 1 else: conflicting[extremity_id1, extremity_id2] = 1 conflicting[extremity_id2, extremity_id1] = 1 return non_conflicting, conflicting
[docs]def compute_extremity_pair(adjacencymatrix): """ Function computing the extremity pairs stored into an adjacency matrix: it finds all couples of indexes for which an adjacency exists (=indexes for which adjacencymatrix[x, y] != 0) Parameters ---------- adjacencymatrix : SparseMatrix Sparse matrix, with 1 at indices of possible adjacencies, 0 otherwise Returns ------- list *extremity_pairs:* array containing pairs of integers (extremities) """ extremity_pairs = adjacencymatrix.pairs() extremity_pairs = [elem for elem in extremity_pairs if elem[0] < elem[1]] return extremity_pairs
[docs]def compute_adjacencies(extremity_pairs, nb_blocks): """ Function computing the CAR adjacencies (car1, car2) stored into an extremity pairs array Parameters ---------- extremity_pairs : list Array containing pairs of extremities (corresponding to indexes of SparseMatrix) nb_blocks : int Total number of blocks Returns ------- list *adjacencies:* array containing pairs of signed integers (adjacencies) """ adjacencies = [] # Array containing adjacencies (pairs of signed blocks) for pair in extremity_pairs: [extremity_id1, extremity_id2] = pair adj = [] if(extremity_id1 <= nb_blocks): adj.append(extremity_id1) else: adj.append(-(extremity_id1 - nb_blocks)) if(extremity_id2 <= nb_blocks): adj.append(-extremity_id2) else: adj.append(extremity_id2 - nb_blocks) adjacencies.append(adj) return adjacencies
[docs]def car_to_block(signed_car, side, cars): """ From a signed car, returns the signed block at the ``side`` extremity Parameters ---------- signed_car : int A given CAR ID side : int ``0`` for left end of the car, ``1`` for right end cars : list list of all cars Returns ------- int *signed block*, which is at the given end of the given car """ if(signed_car > 0): if(side == 1): signed_block = cars[signed_car][-1] elif(side == 0): signed_block = cars[signed_car][0] else: if(side == 1): signed_block = -cars[-signed_car][0] elif(side == 0): signed_block = -cars[-signed_car][-1] return signed_block
[docs]def find_adj_info(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id): """ Find information on a car_adjacency: in which genomes it is present Parameters ---------- car_adjacency : tuple The given car adjacency (two car numbers) car_left : list For each car, left neighbor car_right : list For each car, right neighbor species1_id : list Ids of species in group1 in tree partition from ancestor species2_id : list Ids of species in group2 in tree partition from ancestor species3_id : list Ids of species in group3 in tree partition from ancestor Returns ------- list *labels:* a list of integers indicating if the adjacency is present (1) or absent (0) in each species """ supporting_species_labels = conservation_status(car_adjacency, car_left, car_right, species1_id, species2_id, species3_id)[1] supporting_species_labels = [str(lab) for lab in supporting_species_labels] labels = " " + " ".join(supporting_species_labels) return labels
[docs]def conservation_status(adjacency, left, right, species1_id, species2_id, species3_id): """ Function computing the conservation status of an adjacency *nbgroup* = number of group in which adjacency is present. If *nbgroup = 3*, the adjacency is 'fully'. If *nbgroup = 2*, the adjacency is partly. Otherwise, the adjacency is not conserved. Parameters ---------- adjacency : list Pair of signed integers left : list Array such that *left[block_id]* is an array containing all left neighbors of block *block_id* right : list Array such that *right[block_id]* is an array containing all right neighbors of *block_id* species1_id: list Ids of group1 species species2_id: list Ids of group2 species species3_id: list Ids of group3 species Returns ------- tuple *nbgroup:* integer, number of group(s) in which adjacency is present *species_supporting:* 0-1 array indicating species in which adjacency is present (1) or absent (0) """ nbgroup = 0 # number of groups in the species partition (input1, input2, output) # in wich the adjacency appears species_supporting = [] # array such that species_supporting[spe_id] = 1 if species contains # adjacency, and species_supporting[spe_id] = 0 otherwise signed_block1, signed_block2 = adjacency block_id = abs(signed_block1) side_block = 0 side_map = {} if(signed_block1 > 0): side_map = right side_block = signed_block2 else: side_map = left side_block = -signed_block2 output = 0 input1 = 0 input2 = 0 for spe in species1_id: if(side_block in side_map[spe][block_id]): output = 1 species_supporting.append(1) else: species_supporting.append(0) for spe in species2_id: if(side_block in side_map[spe][block_id]): input1 = 1 species_supporting.append(1) else: species_supporting.append(0) for spe in species3_id: if(side_block in side_map[spe][block_id]): input2 = 1 species_supporting.append(1) else: species_supporting.append(0) nbgroup = output + input1 + input2 return nbgroup, species_supporting