#!/usr/bin/python3
''' pdbcop.py is a script that is based on program PDBCOP.
The script should help to analyze post-refineme structures.

Usage: python3 pdbcop.py [arguments]

The report is written into the log file (default pdbcop.log).
'''

__author__ = "Petr Kolenko"
__license__ = "Creative Commons Attribution 4.0 International License"
__email__ = "kolenpe1@cvut.cz"
__version__ = "0.7"

import os
import argparse
import textwrap
import re
import sys
from datetime import datetime
from math import sqrt


parser=argparse.ArgumentParser(
    prog='pdbcop.py',
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=textwrap.dedent('''\
    Report all issues at kolenpe1 [at] cvut.cz.
    ''')
)
parser.add_argument("-c", "--cif", help="input mmCIF file", type=str)
parser.add_argument("-c2", "--cif2", 
                    help="second mmCIF file for comparison", type=str)
parser.add_argument("-p", "--pdb", help="input PDB file", type=str)
parser.add_argument("-p2", "--pdb2", help=" second PDB file for comparison",
                    type=str)
parser.add_argument("-s", "--sequence", help="prints out sequence",
                    action="store_true")
parser.add_argument("-dc", "--delta_coordinates",
                    help="minimal shift to be reported, default 0.2",
                    type=float)
parser.add_argument("-da", "--delta_adp",
                    help="minimal delta in ADP to be reported, default 3.5",
                    type=float)
parser.add_argument("-l", "--log",
                    help="logfile name (default pdbcop.log)", type=str)
parser.add_argument("-v", "--verbose",
                    help="logfile verbosity, 1 = short, 2 = normal, 3 = long, default 2",
                    type=int, default=2)
args = parser.parse_args()


#   ____                _  ____ _  __ 
#  |  _ \ ___  __ _  __| |/ ___(_)/ _|
#  | |_) / _ \/ _` |/ _` | |   | | |_ 
#  |  _ <  __/ (_| | (_| | |___| |  _|
#  |_| \_\___|\__,_|\__,_|\____|_|_|  

def read_cif(filename):
    global atomSiteId               # cislo atomu
    global atomSiteLabelAtomId      # znacka atomu, napr. CA pro Calfa
    global atomSiteLabelAltId       # alternativa
    global atomSiteLabelCompId      # tripismenovy kod pro reziduum
    global atomSiteAuthAsymId       # asi retezec?
    global atomSiteAuthSeqId        # poradi sekvenci
    global atomSiteCartnX           # souradnice X
    global atomSiteCartnY           # souradnice Y
    global atomSiteCartnZ           # souradnice Z
    global atomSiteOccupancy        # okupance
    global atomSiteBIsoOrEquiv      # ADP

    # simple reading lines
    with open(filename, "r") as file:
        lines = file.readlines()

    # initiation of lists
    counter = -1
    atomSiteId = []             # cislo atomu
    atomSiteLabelAtomId = []    # znacka atomu, napr. CA pro Calfa
    atomSiteLabelAltId = []     # alternativa
    atomSiteLabelCompId = []    # tripismenovy kod pro reziduum
    atomSiteAuthAsymId = []     # asi retezec?
    atomSiteAuthSeqId = []      # poradi v sekvenci
    atomSiteCartnX = []         # souradnice X
    atomSiteCartnY = []         # souradnice Y
    atomSiteCartnZ = []         # souradnice Z
    atomSiteOccupancy = []      # okupance
    atomSiteBIsoOrEquiv = []    # ADP
    
    # analysis of positions within the CIF file
    for i in range(len(lines)):
        line = lines[i].split()
        if lines[i][0:4] == "loop":
            counter = -1
        if "atom_site.id" in lines[i] and len(line) == 1:
            positionSiteId = counter
        if "atom_site.label_atom_id" in lines[i] and len(line) ==1:
            positionSiteLabelAtomId = counter
        if "atom_site.label_alt_id" in lines[i] and len(line) ==1:
            positionSiteLabelAltId = counter
        if "atom_site.label_comp_id" in lines[i] and len(line) ==1:
            positionSiteLabelCompId = counter
        if "atom_site.pdbx_PDB_ins_code" in lines[i] and len(line) ==1:
            positionPDB_ins_code = counter
        if "atom_site.auth_asym_id" in lines[i] and len(line) ==1:
            positionSiteAuthAsymId = counter
        if "atom_site.auth_seq_id" in lines[i] and len(line) ==1:
            positionSiteAuthSeqId = counter
        if "atom_site.Cartn_x" in lines[i] and len(line) ==1:
            positionSiteCartnX = counter
        if "atom_site.Cartn_y" in lines[i] and len(line) ==1:
            positionSiteCartnY = counter
        if "atom_site.Cartn_z" in lines[i] and len(line) ==1:
            positionSiteCartnZ = counter
        if "atom_site.occupancy" in lines[i] and len(line) ==1:
            positionSiteOccupancy = counter
        if "atom_site.B_iso_or_equiv" in lines[i] and len(line) ==1:
            positionSiteBIsoOrEquiv = counter
        counter = counter + 1

        
    # Filling of lists with values
    for i in range(len(lines)):
        # looking for loop
        if "loop" in lines[i] or lines[i] == []:
            scan = False
        # looking for loop and X coordinates
        if "atom_site.Cartn_x" in lines[i]:
            scan = True


        line = lines[i].split()
        # generation of arrays
        if len(line) > positionSiteBIsoOrEquiv and scan:
            atomSiteId.append(line[positionSiteId])
            atomSiteLabelAtomId.append(line[positionSiteLabelAtomId])
            atomSiteLabelAltId.append(line[positionSiteLabelAltId])
            atomSiteLabelCompId.append(line[positionSiteLabelCompId])
            atomSiteAuthAsymId.append(line[positionSiteAuthAsymId])
            # checking for insertion codes
            if positionPDB_ins_code:
                if line[positionPDB_ins_code] != "?" and line[positionPDB_ins_code] != ".":
                    atomSiteAuthSeqId.append(line[positionSiteAuthSeqId]+ "." + line[positionPDB_ins_code])
                else:
                    atomSiteAuthSeqId.append(line[positionSiteAuthSeqId])
            else:
                atomSiteAuthSeqId.append(line[positionSiteAuthSeqId])
            atomSiteCartnX.append(float(line[positionSiteCartnX]))
            atomSiteCartnY.append(float(line[positionSiteCartnY]))
            atomSiteCartnZ.append(float(line[positionSiteCartnZ]))
            atomSiteOccupancy.append(float(line[positionSiteOccupancy]))
            atomSiteBIsoOrEquiv.append(float(line[positionSiteBIsoOrEquiv]))


#   ____                _ ____  ____  ____  
#  |  _ \ ___  __ _  __| |  _ \|  _ \| __ ) 
#  | |_) / _ \/ _` |/ _` | |_) | | | |  _ \ 
#  |  _ <  __/ (_| | (_| |  __/| |_| | |_) |
#  |_| \_\___|\__,_|\__,_|_|   |____/|____/ 

def read_pdb(filename):
    global atomSiteId               # cislo atomu
    global atomSiteLabelAtomId      # znacka atomu, napr. CA pro Calfa
    global atomSiteLabelAltId       # alternativa
    global atomSiteLabelCompId      # tripismenovy kod pro reziduum
    global atomSiteAuthAsymId       # asi retezec?
    global atomSiteAuthSeqId        # poradi sekvenci
    global atomSiteCartnX           # souradnice X
    global atomSiteCartnY           # souradnice Y
    global atomSiteCartnZ           # souradnice Z
    global atomSiteOccupancy        # okupance
    global atomSiteBIsoOrEquiv      # ADP

    # initiation of lists
    atomSiteId = []             # cislo atomu
    atomSiteLabelAtomId = []    # znacka atomu, napr. CA pro Calfa
    atomSiteLabelAltId = []     # alternativa
    atomSiteLabelCompId = []    # tripismenovy kod pro reziduum
    atomSiteAuthAsymId = []     # asi retezec?
    atomSiteAuthSeqId = []      # poradi v sekvenci
    atomSiteCartnX = []         # souradnice X
    atomSiteCartnY = []         # souradnice Y
    atomSiteCartnZ = []         # souradnice Z
    atomSiteOccupancy = []      # okupance
    atomSiteBIsoOrEquiv = []    # ADP

    # simple reading lines
    with open(filename, "r") as file:
        lines = file.readlines()
    for i in range(len(lines)):
        if lines[i][0:4] == "ATOM" or lines[i][0:6] == "HETATM":
            atomSiteId.append(lines[i][6:11])
            atomSiteLabelAtomId.append(lines[i][13:16])
            atomSiteLabelAltId.append(lines[i][16:17])
            atomSiteLabelCompId.append(lines[i][17:20])
            atomSiteAuthAsymId.append(lines[i][21:22])
            if lines[i][26:27] != " ":
                atomSiteAuthSeqId.append(lines[i][22:26].strip() + "." + lines[i][26:27])
            else:
                atomSiteAuthSeqId.append(lines[i][22:26].strip())
            atomSiteCartnX.append(float(lines[i][30:38]))
            atomSiteCartnY.append(float(lines[i][38:46]))
            atomSiteCartnZ.append(float(lines[i][46:54]))
            atomSiteOccupancy.append(float(lines[i][54:60]))
            atomSiteBIsoOrEquiv.append(float(lines[i][60:66]))
            

#      _   _                     _                _           _     
#     / \ | |_ ___  _ __ ___    / \   _ __   __ _| |_   _ ___(_)___ 
#    / _ \| __/ _ \| '_ ` _ \  / _ \ | '_ \ / _` | | | | / __| / __|
#   / ___ \ || (_) | | | | | |/ ___ \| | | | (_| | | |_| \__ \ \__ \
#  /_/   \_\__\___/|_| |_| |_/_/   \_\_| |_|\__,_|_|\__, |___/_|___/
#                                                   |___/           

def atom_analysis():
    global number_of_waters, list_of_chains, number_of_residues
    global number_of_nonstandard_residues, total_residues
    
    list_of_chains = []
    number_of_atoms = []

    # for cycle for analysis of atoms
    for i in range(len(atomSiteAuthAsymId)):
        if not list_of_chains:
            list_of_chains.append(atomSiteAuthAsymId[i])
        else:
            if atomSiteAuthAsymId[i] not in list_of_chains:
                list_of_chains.append(atomSiteAuthAsymId[i])
    data_logfile.append("\n *** Atoms in chains ***\n\n")
    number_of_atoms = [0] * len(list_of_chains)

    # counting and printing atoms
    for i in range(len(atomSiteAuthAsymId)):
        number_of_atoms[list_of_chains.index(atomSiteAuthAsymId[i])] = \
        	number_of_atoms[list_of_chains.index(atomSiteAuthAsymId[i])] + 1
    for i in range(len(list_of_chains)):
    	data_logfile.append("  " + list_of_chains[i] + " : "
    		+ str(number_of_atoms[i]) + "\n")

    # counting and printing residues
    number_of_residues = [0] * len(list_of_chains)
    number_of_nas = [0] * len(list_of_chains)
    number_of_others = [0] * len(list_of_chains)
    number_of_waters = [0] * len(list_of_chains)

    current_residue_number = '-999'
    current_residue_name = 'NONE'
    current_chain = 'NONE'
    for i in range(len(atomSiteLabelCompId)):
        # condition
        if atomSiteLabelCompId[i] != current_residue_name \
            or atomSiteAuthSeqId[i] != current_residue_number \
            or atomSiteAuthAsymId[i] != current_chain:

        # now the function - residues
            if atomSiteLabelCompId[i] in three_letter_protein:
                number_of_residues[list_of_chains.index(atomSiteAuthAsymId[i])] = \
                    number_of_residues[list_of_chains.index(atomSiteAuthAsymId[i])] + 1
            
        # now nucleic acids
            if atomSiteLabelCompId[i] in three_letter_na:
                number_of_nas[list_of_chains.index(atomSiteAuthAsymId[i])] = \
                    number_of_nas[list_of_chains.index(atomSiteAuthAsymId[i])] + 1
        # now other molecules
            if atomSiteLabelCompId[i] != 'HOH' \
                and atomSiteLabelCompId[i] not in three_letter_protein \
                and atomSiteLabelCompId[i] not in three_letter_na:
                number_of_others[list_of_chains.index(atomSiteAuthAsymId[i])] = \
                    number_of_others[list_of_chains.index(atomSiteAuthAsymId[i])] + 1
            if atomSiteLabelCompId[i] == "HOH":
                number_of_waters[list_of_chains.index(atomSiteAuthAsymId[i])] = \
                    number_of_waters[list_of_chains.index(atomSiteAuthAsymId[i])] + 1
        # finally making difference to current values
        current_residue_number = atomSiteAuthSeqId[i]
        current_residue_name = atomSiteLabelCompId[i]
        current_chain = atomSiteAuthAsymId[i]
        
    # appending report
    data_logfile.append("\n\n\n")
    data_logfile.append(" *** Composition of AU ***\n\n")
    data_logfile.append(" Chain:   Res /  Nuc /  Oth /  Wat:" + "\n")
    data_logfile.append(35*"-" + "\n")
    # addition chain by chain
    for i in range(len(list_of_chains)):
        data_logfile.append("  " 
            + list_of_chains[i] 
            + (3-len(list_of_chains[i]))*" "
            + " : " + (5-len(str(number_of_residues[i])))*" " 
            + str(number_of_residues[i])
            + " /" + (5-len(str(number_of_nas[i])))*" " 
            + str(number_of_nas[i]) + " /" +
            + (5-len(str(number_of_others[i])))*" "
            + str(number_of_others[i])
            + " /"
            + (5-len(str(number_of_waters[i])))*" "
            + str(number_of_waters[i]) + "\n")

    # number of alternate atoms
    low_occup = 0
    alternate_occup = 0
    list_of_reduced_occup_residues = []
    list_of_alternative_residues = []
    list_of_no_alternative = [".", " "]

    # minimal verbosity
    for i in range(len(atomSiteOccupancy)):
        if float(atomSiteOccupancy[i]) != 1 and \
            atomSiteLabelAltId[i] in list_of_no_alternative:
            low_occup = low_occup + 1
        if atomSiteLabelAltId[i] not in list_of_no_alternative:
            alternate_occup = alternate_occup + 1
    # Making output
    data_logfile.append("\n\n *** Alternatives and low occupancies ***\n \n")
    if low_occup != 0:
        data_logfile.append(" Atoms in alternatives: "
            + (21-len(str(alternate_occup)))*" "
            + str(alternate_occup) + "\n")
        data_logfile.append(" Low-occupancy atoms: " 
            + (23-len(str(low_occup)))*" "
            + str(low_occup) + "\n")
        data_logfile.append(" All atoms with non-zero occupancy: "
            + (9-len(str(alternate_occup + low_occup)))*" "
            + str(alternate_occup + low_occup) + "\n")
    data_logfile.append(" Minimal occupancy: " 
        + (25-len(str(min(atomSiteOccupancy))))*" "
        + str(min(atomSiteOccupancy)) + "\n")

    # analysis of extremes
    extreme_occupancy_work = []
    extreme_occupancy = []
    number_of_extreme_occ = 0
    for i in range(len(atomSiteOccupancy)):
        if float(atomSiteOccupancy[i]) <= 0.1:
            number_of_extreme_occ = number_of_extreme_occ + 1
            extreme_occupancy_work.append(atomSiteAuthAsymId[i]
                + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i])
    data_logfile.append(" Atoms with occupancy lower than 0.1: "
        + (7-len(str(number_of_extreme_occ)))*" "
        + str(number_of_extreme_occ) + "\n\n")
    if number_of_extreme_occ != 0:
        data_logfile.append("  Occupancy lower than 0.1 found in residues:\n")
        for i in range(len(extreme_occupancy_work)):
            if extreme_occupancy_work[i] not in extreme_occupancy:
                extreme_occupancy.append(extreme_occupancy_work[i])
        #filling the logfile
        for i in range(len(extreme_occupancy)):
            data_logfile.append(" " + extreme_occupancy[i] + "\n")
        data_logfile.append("\n\n")

    
    # Standard verbosity
    if args.verbose == 2:
        residues_reduced = []   # list of residues with reduced occupancy

        # global search for reduced occupancies
        for i in range(len(atomSiteLabelAltId)):
            if float(atomSiteOccupancy[i]) != 1.0 and \
                atomSiteLabelAltId[i] in list_of_no_alternative:
                    value = atomSiteAuthAsymId[i] + "/" \
                        + atomSiteLabelCompId[i] + "/" \
                        + atomSiteAuthSeqId[i] \
                        + "--" + str(atomSiteOccupancy[i])
                    if value not in residues_reduced:
                        residues_reduced.append(value)
        
        residues_alternatives = []
        # global search for alternatives
        for i in range(len(atomSiteLabelAltId)):
            if atomSiteLabelAltId[i] not in list_of_no_alternative:
                    value = atomSiteAuthAsymId[i] + "/" \
                        + atomSiteLabelCompId[i] + "/" \
                        + atomSiteAuthSeqId[i] \
                        + "(" + atomSiteLabelAltId[i] + ")" \
                        + "--" + str(atomSiteOccupancy[i])
                    if value not in residues_alternatives:
                        residues_alternatives.append(value)
        
        # Making output
        if residues_reduced:
            data_logfile.append("  List of reduced occupancies:" + "\n")
            data_logfile.append("  ----------------------------\n")
            for i in range(len(residues_reduced)):
                data_logfile.append(" " + residues_reduced[i] + "\n")
        if residues_alternatives:
            data_logfile.append("\n  List of alternatives:" + "\n")
            data_logfile.append("  ---------------------\n")
            for i in range(len(residues_alternatives)):
                data_logfile.append(" " + residues_alternatives[i] + "\n")
        data_logfile.append("\n\n")

    # Full verbosity
    if args.verbose == 3:
        # Adding list of reduced occupancies
        data_logfile.append("  List of reduced occupancies:\n")
        data_logfile.append("  ----------------------------\n")
        for i in range(len(atomSiteLabelAltId)):
            if float(atomSiteOccupancy[i]) != 1.0 and \
                atomSiteLabelAltId[i] in list_of_no_alternative:
                    data_logfile.append(" " + atomSiteAuthAsymId[i] + "/"
                        + atomSiteLabelCompId[i] + "/"
                        + atomSiteAuthSeqId[i] + "/"
                        + atomSiteLabelAtomId[i] + "--"
                        + str(atomSiteOccupancy[i]) + "\n")
        # Adding list of alternatives
        data_logfile.append("\n  List of alternatives:\n")
        data_logfile.append("-----------------------\n")        
        for i in range(len(atomSiteLabelAltId)):
            if atomSiteLabelAltId[i] not in list_of_no_alternative:
                data_logfile.append(" " + atomSiteAuthAsymId[i] + "/"
                    + atomSiteLabelCompId[i] + "/"
                    + atomSiteAuthSeqId[i] + "/"
                    + atomSiteLabelAtomId[i] + "("
                    + atomSiteLabelAltId[i] + ")--"
                    + str(atomSiteOccupancy[i]) + "\n")
        data_logfile.append("\n\n")


    # analysis of ADPs
    data_logfile.append(" *** Analysis of ADPs *** \n\n")
    data_logfile.append("  Overall ADP (ISO or equivalent):\n")
    data_logfile.append("  --------------------------------\n")
    data_logfile.append(" Minimal: "
        + (8-len(str(round(min(atomSiteBIsoOrEquiv), 2))))*" "
        + str(round(min(atomSiteBIsoOrEquiv), 2)) + "\n")
    data_logfile.append(" Maximal: "
        + (8-len(str(round(max(atomSiteBIsoOrEquiv), 2))))*" "
        + str(round(max(atomSiteBIsoOrEquiv), 2))
        + "\n")
    avg = sum(atomSiteBIsoOrEquiv)/len(atomSiteBIsoOrEquiv)
    data_logfile.append(" Average: "
        + (8-len(str(round(avg,1))))*" "
        + str(round(avg,1)) + "\n\n")


    data_logfile.append(" Chain:  Min /    Max /  Av.ADP\n")
    data_logfile.append(" ------------------------------\n")
    for i in range(len(list_of_chains)):
        work_list_of_adp = []
        for j in range(len(atomSiteBIsoOrEquiv)):
            if atomSiteAuthAsymId[j] == list_of_chains[i]:
                work_list_of_adp.append(atomSiteBIsoOrEquiv[j])
        avg = sum(work_list_of_adp)/len(work_list_of_adp)
        data_logfile.append(" " 
            + (3-len(list_of_chains[i]))*" "
            + list_of_chains[i] + ": "
            + (6-len(str(round(min(work_list_of_adp), 1))))*" "
            + str(round(min(work_list_of_adp), 1)) + " / " 
            + (6-len(str(round(max(work_list_of_adp), 1))))*" "
            + str(round(max(work_list_of_adp), 1)) + " / "
            + (6-len(str(round(avg,1))))*" "
            + str(round(avg,1)) +"\n")
    data_logfile.append("\n\n")


    # Normal list of ADP extremes
    if args.verbose == 2:
        Bmin = min(atomSiteBIsoOrEquiv)
        Bmax = max(atomSiteBIsoOrEquiv)
        list_of_adp_low = []
        list_of_adp_high = []
        # low ADP first
        for i in range(len(atomSiteBIsoOrEquiv)):
            if atomSiteBIsoOrEquiv[i] <= Bmin + 5:
                value = " " + atomSiteAuthAsymId[i] + "/" \
                    + atomSiteLabelCompId[i] + "/"        \
                    + atomSiteAuthSeqId[i]
                if value not in list_of_adp_low:
                    list_of_adp_low.append(value)
        data_logfile.append("\n  Residues with ADP lower than "
            + str(Bmin + 5) + "\n")
        data_logfile.append("  -----------------------------")
        data_logfile.append(len(str(Bmin+5))*"-" + "\n")
        for i in range(len(list_of_adp_low)):
            data_logfile.append(list_of_adp_low[i] + "\n")
        # high ADP
        for i in range(len(atomSiteBIsoOrEquiv)):
            if atomSiteBIsoOrEquiv[i] >= Bmax - 10:
                value = " " + atomSiteAuthAsymId[i] + "/" \
                    + atomSiteLabelCompId[i] + "/"        \
                    + atomSiteAuthSeqId[i]
                if value not in list_of_adp_high:
                    list_of_adp_high.append(value)
        data_logfile.append("\n  Atoms with ADP higher than "
            + str(Bmax - 10) + "\n")
        data_logfile.append("  ---------------------------"
            + len(str(Bmax-10))*"-" + "\n")
        for i in range(len(list_of_adp_high)):
            data_logfile.append(list_of_adp_high[i] + "\n")
        
    
    # Full list of ADP extremes
    if args.verbose == 3:
        Bmin = min(atomSiteBIsoOrEquiv)
        Bmax = max(atomSiteBIsoOrEquiv)
        data_logfile.append("  Atoms with ADP lower than "
            + str(Bmin + 5) + "\n")
        data_logfile.append("  --------------------------"
            + len(str(Bmin + 5))*"-" + "\n")
        for i in range(len(atomSiteBIsoOrEquiv)):
            if atomSiteBIsoOrEquiv[i] <= Bmin + 5:
                data_logfile.append(" " + atomSiteAuthAsymId[i] + "/"
                    + atomSiteLabelCompId[i] + "/"
                    + atomSiteAuthSeqId[i] + "/"
                    + atomSiteLabelAtomId[i] + "--"
                    + str(atomSiteBIsoOrEquiv[i]) + "\n")
        data_logfile.append("\n  Atoms with ADP higher than "
            + str(Bmax - 10) + "\n")
        data_logfile.append("  ---------------------------"
            + len(str(Bmax - 10))*"-" + "\n")
        for i in range(len(atomSiteBIsoOrEquiv)):
            if atomSiteBIsoOrEquiv[i] >= Bmax - 10:
                data_logfile.append(" " + atomSiteAuthAsymId[i] + "/"
                    + atomSiteLabelCompId[i] + "/"
                    + atomSiteAuthSeqId[i] + "/"
                    + atomSiteLabelAtomId[i] + "--"
                    + str(atomSiteBIsoOrEquiv[i]) + "\n")


# Example to find average of list
#number_list = [45, 34, 10, 36, 12, 6, 80]
#avg = sum(number_list)/len(number_list)
#print("The average is ", round(avg,2))


def calculate_shift(a, b, c, d, e, f):
    return sqrt((a-b)**2 + (c-d)**2 + (e-f)**2)
    
def convert_3to1(residue):
    value = ''
    if residue in three_letter_protein:
        value = one_letter_protein[three_letter_protein.index(residue)]
    if residue in three_letter_na:
        value = '(' + residue + ')'
    return value
        

#                                            _____ _ _           
#    ___ ___  _ __ ___  _ __   __ _ _ __ ___|  ___(_) | ___  ___ 
#   / __/ _ \| '_ ` _ \| '_ \ / _` | '__/ _ \ |_  | | |/ _ \/ __|
#  | (_| (_) | | | | | | |_) | (_| | | |  __/  _| | | |  __/\__ \
#   \___\___/|_| |_| |_| .__/ \__,_|_|  \___|_|   |_|_|\___||___/
#                      |_|                                           

def compare_files(first_file, second_file):
    # compare lengths
    if len(atomSiteCartnX) != len(atomSiteCartnX2):
        print("Number of atoms in " + str(first_file) + "is diffrent from number"
            + " of atoms in " + str(second_file))
        print("Exiting.")
        sys.exit()

    # Setting deltas
    if args.delta_coordinates:
        delta_coord = args.delta_coordinates
    else:
        delta_coord = 0.2
    # delta ADPs
    if args.delta_adp:
        delta_adp = args.delta_adp
    else:
        delta_adp = 3.5


    # calculation of shifts
    shift_of_atoms = []
    for i in range(len(atomSiteCartnX)):
        shift_of_atoms.append(calculate_shift(atomSiteCartnX[i],
            atomSiteCartnX2[i], atomSiteCartnY[i], atomSiteCartnY2[i],
            atomSiteCartnZ[i], atomSiteCartnZ2[i]))

    # shifts in adps
    delta_adp_list = []   
    delta_adp_work = []
    delta_adp_values = []
    for i in range(len(atomSiteCartnX)):
        delta_adp_values.append(abs(atomSiteBIsoOrEquiv[i] \
        - atomSiteBIsoOrEquiv2[i]))

    # total number of shifts greater than defined
    number_of_shifts = 0 
    for i in range(len(shift_of_atoms)):
        if shift_of_atoms[i] >= delta_coord:
            number_of_shifts = number_of_shifts + 1

    # total number of adp shifts
    number_of_adp_changes = 0
    for i in range(len(delta_adp_values)):
        if delta_adp_values[i] >= delta_adp:
            number_of_adp_changes = number_of_adp_changes + 1
    
    # printing to log file
    data_logfile.append("\n\n *** Comparison between two files: ***\n\n")
    data_logfile.append(" First file: " + first_file + "\n")
    data_logfile.append(" Second file: " + second_file + "\n\n")
    data_logfile.append(" No. of atoms shifted by more than " + str(delta_coord)
        + " A: " + str(number_of_shifts) + "\n")
    data_logfile.append(" Maximal deviation: " 
        + str(round(max(shift_of_atoms), 2)) + "\n")
    data_logfile.append(" No. of ADP shifts larger than " + str(delta_adp)
        + " :" + str(number_of_adp_changes) + "\n")
    data_logfile.append(" Maximal deviation in ADP: " 
        + str(round(max(delta_adp_values), 2)) + "\n\n")

 
    # Report verbosity 2        
    if args.verbose == 2:
        data_logfile.append("  Atoms shifted by more than " + str(delta_coord)
            + " A:\n\n")

        # building of list of shifted atoms
        work_list_of_shift = []
        # generation of all first
        for i in range(len(shift_of_atoms)):
            if shift_of_atoms[i] >= delta_coord:
                work_list_of_shift.append(atomSiteAuthAsymId[i]
                + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i])
        # removal of duplicates
        list_of_shifts = []
        for i in range(len(work_list_of_shift)):
            if work_list_of_shift[i] not in list_of_shifts:
                list_of_shifts.append(work_list_of_shift[i])
        
        for i in range(len(list_of_shifts)):
            data_logfile.append(" " + list_of_shifts[i] + "\n")
        data_logfile.append("\n")


        # reports of delta adps
        data_logfile.append("  Atoms with change in ADP by more than "
            + str(delta_adp) + " A^2:\n\n")
        for i in range(len(atomSiteBIsoOrEquiv)):
            if abs(delta_adp_values[i]) >= delta_adp:
                delta_adp_work.append(atomSiteAuthAsymId[i]
                + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i])
        # removal of duplicates
        for i in range(len(delta_adp_work)):
            if delta_adp_work[i] not in delta_adp_list:
                delta_adp_list.append(delta_adp_work[i])
        
        for i in range(len(delta_adp_list)):
            data_logfile.append(" " + delta_adp_list[i] + "\n")
        data_logfile.append("\n")

    if args.verbose == 3:
        data_logfile.append("  Atoms shifted by more than " + str(delta_coord)
            + " A:\n\n")
        for i in range(len(shift_of_atoms)):
            if shift_of_atoms[i] >= delta_coord:
                data_logfile.append(atomSiteAuthAsymId[i] + "/" 
                    + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i]
                    + "/" + atomSiteLabelAtomId[i] + "\n")
        data_logfile.append("\n")
        data_logfile.append("  Atoms with change in ADP by more than "
            + str(delta_adp) + " A^2:\n\n")
        for i in range(len(delta_adp_values)):
            if delta_adp_values[i] >= delta_adp:
                data_logfile.append(atomSiteAuthAsymId[i] + "/" 
                    + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i]
                    + "/" + atomSiteLabelAtomId[i] + "\n")

    
#   __  __    _    ___ _   _   ____  ____   ___   ____ ____      _    __  __ 
#  |  \/  |  / \  |_ _| \ | | |  _ \|  _ \ / _ \ / ___|  _ \    / \  |  \/  |
#  | |\/| | / _ \  | ||  \| | | |_) | |_) | | | | |  _| |_) |  / _ \ | |\/| |
#  | |  | |/ ___ \ | || |\  | |  __/|  _ <| |_| | |_| |  _ <  / ___ \| |  | |
#  |_|  |_/_/   \_\___|_| \_| |_|   |_| \_\\___/ \____|_| \_\/_/   \_\_|  |_|


if __name__ == "__main__":

    three_letter_protein = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN',
        'GLY', 'HIS', "ILE", 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR',
        'TRP', 'TYR', 'VAL']
    three_letter_na = ['DA', 'DG', 'DC', 'DT', 'A', 'C', 'G', 'U']
    one_letter_protein = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H',
        'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V']


#  ______        __  _             _                __ 
# /_  __/__ ___ / /_(_)__  ___ _  (_)__  ___  __ __/ /_
#  / / / -_|_-</ __/ / _ \/ _ `/ / / _ \/ _ \/ // / __/
# /_/  \__/___/\__/_/_//_/\_, / /_/_//_/ .__/\_,_/\__/ 
#                        /___/        /_/              
                       
                       
    # Testing input
    if args.cif and args.pdb:
        print("Conflict of input PDB and mmCIF file. Select one input.")
        sys.exit()
    if not args.cif and not args.pdb:
        print("No input file. Exiting ...")
        print("Use parameter -h or --help for more information")
        sys.exit()

    if args.cif:
        cif1 = os.path.abspath(args.cif)
    if args.cif2:
        cif2 = os.path.abspath(args.cif2)
    if args.pdb:
        pdb1 = os.path.abspath(args.pdb)
    if args.pdb2:
        pdb2 = os.path.abspath(args.pdb2)
    if args.log:
        logfile = os.path.abspath(args.log)
    else:
        working_directory = os.getcwd()
        logfile = "pdbcop.log"
    if args.cif2:
        file2 = os.path.abspath(args.cif2)

    data_logfile = ["\n",
                    "   *** pdbcop.py (v0.7) ***\n",
                    "\n",
                    "{:%A, %B %d %Y %I:%M:%S %p %Z}".format(datetime.now()),
                    "Verbosity: ",str(args.verbose), "\n",
                    "\n","\n"]
    if args.cif:
        data_logfile.append("Input file: ")
        data_logfile.append(cif1)
        data_logfile.append("\n")
        if args.cif2:
            data_logfile.append("Input file 2: " + cif2 + "\n")
        data_logfile.append("Log file: ")
        data_logfile.append(logfile)
        data_logfile.append("\n")
        read_cif(cif1)
        #atom_analysis()

#    test_input()
#    read_cif(cif1)
    if args.pdb:
        data_logfile.append("Input file: " + pdb1 + "\n")
        if args.pdb2:
            data_logfile.append("Input file 2: " + pdb2 + "\n")
        data_logfile.append("Log file: " + logfile + "\n")
        read_pdb(pdb1)
    atom_analysis()
    

#    ____       __            __                                      
#   / __ \__ __/ /____  __ __/ /_  ___ ___ ___ ___ _____ ___  _______ 
#  / /_/ / // / __/ _ \/ // / __/ (_-</ -_) _ `/ // / -_) _ \/ __/ -_)
#  \____/\_,_/\__/ .__/\_,_/\__/ /___/\__/\_, /\_,_/\__/_//_/\__/\__/ 
#               /_/                        /_/                        


    if args.sequence:
        data_logfile.append("\n  *** Export sequence ***\n\n")
        current_residue_number = '-999'
        current_residue_name = 'NONE'
        current_sequence = []
        # looping through the whole file - first searching for chains
        for i in range(len(list_of_chains)):
            for j in range(len(atomSiteLabelCompId)):            
                if (atomSiteLabelCompId[j] != current_residue_name \
                    or atomSiteAuthSeqId[j] != current_residue_number) \
                    and atomSiteAuthAsymId[j] == list_of_chains[i]:
                    # here starts the action
                    current_sequence.append(convert_3to1(atomSiteLabelCompId[j][0:3]))
                current_residue_number = atomSiteAuthSeqId[j]
                current_residue_name = atomSiteLabelCompId[j]
            
            # appending log file
            data_logfile.append(">chain " + list_of_chains[i] + ":\n")
            for i in range(len(current_sequence)):
                data_logfile.append(current_sequence[i])
            data_logfile.append("\n\n")
            current_sequence = []
                

#                      _  ____ ___ _____ ____  
#   _ __ ___  __ _  __| |/ ___|_ _|  ___|___ \ 
#  | '__/ _ \/ _` |/ _` | |    | || |_    __) |
#  | | |  __/ (_| | (_| | |___ | ||  _|  / __/ 
#  |_|  \___|\__,_|\__,_|\____|___|_|   |_____|

    if args.cif2:
        with open(cif2, "r") as file:
            lines2 = file.readlines()

        # initiation of lists
        counter2 = -1
        atomSiteId2 = []
        atomSiteLabelAtomId2 = []
        atomSiteLabelAltId2 = []
        atomSiteLabelCompId2 = []
        atomSiteAuthAsymId2 = []
        atomSiteAuthSeqId2 = []
        atomSiteCartnX2 = []
        atomSiteCartnY2 = []
        atomSiteCartnZ2 = []
        atomSiteOccupancy2 = []
        atomSiteBIsoOrEquiv2 = []
    
        # analysis of positions within the CIF file
        counter = -1
        for i in range(len(lines2)):
            line2 = lines2[i].split()
            if lines2[i][0:4] == "loop":
                counter = -1
            if "atom_site.id" in lines2[i] and len(line2) == 1:
                positionSiteId2 = counter
            if "atom_site.label_atom_id" in lines2[i] and len(line2) ==1:
                positionSiteLabelAtomId2 = counter
            if "atom_site.label_alt_id" in lines2[i] and len(line2) ==1:
                positionSiteLabelAltId2 = counter
            if "atom_site.label_comp_id" in lines2[i] and len(line2) ==1:
                positionSiteLabelCompId2 = counter
            if "atom_site.auth_asym_id" in lines2[i] and len(line2) ==1:
                positionSiteAuthAsymId2 = counter
            if "atom_site.auth_seq_id" in lines2[i] and len(line2) ==1:
                positionSiteAuthSeqId2 = counter
            if "atom_site.Cartn_x" in lines2[i] and len(line2) ==1:
                positionSiteCartnX2 = counter
            if "atom_site.Cartn_y" in lines2[i] and len(line2) ==1:
                positionSiteCartnY2 = counter
            if "atom_site.Cartn_z" in lines2[i] and len(line2) ==1:
                positionSiteCartnZ2 = counter
            if "atom_site.occupancy" in lines2[i] and len(line2) ==1:
                positionSiteOccupancy2 = counter
            if "atom_site.B_iso_or_equiv" in lines2[i] and len(line2) ==1:
                positionSiteBIsoOrEquiv2 = counter
            counter = counter + 1        

        # Filling of lists with values for file2
        for i in range(len(lines2)):
            # looking for loop
            if "loop" in lines2[i] or lines2[i] == []:
                scan = False
            # looking for loop and X coordinates
            if "atom_site.Cartn_x" in lines2[i]:
                scan = True

            line2 = lines2[i].split()
            if len(line2) > positionSiteBIsoOrEquiv2 and scan:
                atomSiteId2.append(line2[positionSiteId2])
                atomSiteLabelAtomId2.append(line2[positionSiteLabelAtomId2])
                atomSiteLabelAltId2.append(line2[positionSiteLabelAltId2])
                atomSiteLabelCompId2.append(line2[positionSiteLabelCompId2])
                atomSiteAuthAsymId2.append(line2[positionSiteAuthAsymId2])
                atomSiteAuthSeqId2.append(line2[positionSiteAuthSeqId2])
                atomSiteCartnX2.append(float(line2[positionSiteCartnX2]))
                atomSiteCartnY2.append(float(line2[positionSiteCartnY2]))
                atomSiteCartnZ2.append(float(line2[positionSiteCartnZ2]))
                atomSiteOccupancy2.append(float(line2[positionSiteOccupancy2]))
                atomSiteBIsoOrEquiv2.append(float(line2[positionSiteBIsoOrEquiv2]))

        # MAIN ANALYSIS
        compare_files(cif1, cif2)


#                      _   ____  ____  ____    ____  
#   _ __ ___  __ _  __| | |  _ \|  _ \| __ )  |___ \ 
#  | '__/ _ \/ _` |/ _` | | |_) | | | |  _ \    __) |
#  | | |  __/ (_| | (_| | |  __/| |_| | |_) |  / __/ 
#  |_|  \___|\__,_|\__,_| |_|   |____/|____/  |_____|

    if args.pdb2:
        with open(pdb2, "r") as file:
            lines2 = file.readlines()

        # initiation of lists
        atomSiteId2 = []
        atomSiteLabelAtomId2 = []
        atomSiteLabelAltId2 = []
        atomSiteLabelCompId2 = []
        atomSiteAuthAsymId2 = []
        atomSiteAuthSeqId2 = []
        atomSiteCartnX2 = []
        atomSiteCartnY2 = []
        atomSiteCartnZ2 = []
        atomSiteOccupancy2 = []
        atomSiteBIsoOrEquiv2 = []
    
        for i in range(len(lines2)):
            if lines2[i][0:4] == "ATOM" or lines2[i][0:6] == "HETATM":
                atomSiteId2.append(lines2[i][6:11])
                atomSiteLabelAtomId2.append(lines2[i][13:16])
                atomSiteLabelAltId2.append(lines2[i][16:17])
                atomSiteLabelCompId2.append(lines2[i][17:20])
                atomSiteAuthAsymId2.append(lines2[i][21:22])
                if lines2[i][26:27] != " ":
                    atomSiteAuthSeqId2.append(lines2[i][22:26].strip() + "." + lines2[i][26:27])
                else:
                    atomSiteAuthSeqId2.append(lines2[i][22:26].strip())
                atomSiteCartnX2.append(float(lines2[i][30:38]))
                atomSiteCartnY2.append(float(lines2[i][38:46]))
                atomSiteCartnZ2.append(float(lines2[i][46:54]))
                atomSiteOccupancy2.append(float(lines2[i][54:60]))
                atomSiteBIsoOrEquiv2.append(float(lines2[i][60:66]))

        # MAIN ANALYSIS
        compare_files(pdb1, pdb2)


#                _ __  _           __             _____ __   
#    _    ______(_) /_(_)__  ___ _/ /  ___  ___ _/ __(_) /__ 
#   | |/|/ / __/ / __/ / _ \/ _ `/ /__/ _ \/ _ `/ _// / / -_)
#   |__,__/_/ /_/\__/_/_//_/\_, /____/\___/\_, /_/ /_/_/\__/ 
#                          /___/          /___/              

    
    # writing the log file
    with open(logfile, "w") as file:
        file.writelines(data_logfile)

    # printing the log file
    f = open(logfile, 'r')
    content = f.read()
    print(content)
    f.close()