#!/usr/bin/python3 ''' pdbcop.py is a script that is based on program PDBCOP. The script should help to analyze post-refineme structures. Usage: python3 pdbcop.py [arguments] The report is written into the log file (default pdbcop.log). ''' __author__ = "Petr Kolenko" __license__ = "Creative Commons Attribution 4.0 International License" __email__ = "kolenpe1@cvut.cz" __version__ = "0.7" import os import argparse import textwrap import re import sys from datetime import datetime from math import sqrt parser=argparse.ArgumentParser( prog='pdbcop.py', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=textwrap.dedent('''\ Report all issues at kolenpe1 [at] cvut.cz. ''') ) parser.add_argument("-c", "--cif", help="input mmCIF file", type=str) parser.add_argument("-c2", "--cif2", help="second mmCIF file for comparison", type=str) parser.add_argument("-p", "--pdb", help="input PDB file", type=str) parser.add_argument("-p2", "--pdb2", help=" second PDB file for comparison", type=str) parser.add_argument("-s", "--sequence", help="prints out sequence", action="store_true") parser.add_argument("-dc", "--delta_coordinates", help="minimal shift to be reported, default 0.2", type=float) parser.add_argument("-da", "--delta_adp", help="minimal delta in ADP to be reported, default 3.5", type=float) parser.add_argument("-l", "--log", help="logfile name (default pdbcop.log)", type=str) parser.add_argument("-v", "--verbose", help="logfile verbosity, 1 = short, 2 = normal, 3 = long, default 2", type=int, default=2) args = parser.parse_args() # ____ _ ____ _ __ # | _ \ ___ __ _ __| |/ ___(_)/ _| # | |_) / _ \/ _` |/ _` | | | | |_ # | _ < __/ (_| | (_| | |___| | _| # |_| \_\___|\__,_|\__,_|\____|_|_| def read_cif(filename): global atomSiteId # cislo atomu global atomSiteLabelAtomId # znacka atomu, napr. CA pro Calfa global atomSiteLabelAltId # alternativa global atomSiteLabelCompId # tripismenovy kod pro reziduum global atomSiteAuthAsymId # asi retezec? global atomSiteAuthSeqId # poradi sekvenci global atomSiteCartnX # souradnice X global atomSiteCartnY # souradnice Y global atomSiteCartnZ # souradnice Z global atomSiteOccupancy # okupance global atomSiteBIsoOrEquiv # ADP # simple reading lines with open(filename, "r") as file: lines = file.readlines() # initiation of lists counter = -1 atomSiteId = [] # cislo atomu atomSiteLabelAtomId = [] # znacka atomu, napr. CA pro Calfa atomSiteLabelAltId = [] # alternativa atomSiteLabelCompId = [] # tripismenovy kod pro reziduum atomSiteAuthAsymId = [] # asi retezec? atomSiteAuthSeqId = [] # poradi v sekvenci atomSiteCartnX = [] # souradnice X atomSiteCartnY = [] # souradnice Y atomSiteCartnZ = [] # souradnice Z atomSiteOccupancy = [] # okupance atomSiteBIsoOrEquiv = [] # ADP # analysis of positions within the CIF file for i in range(len(lines)): line = lines[i].split() if lines[i][0:4] == "loop": counter = -1 if "atom_site.id" in lines[i] and len(line) == 1: positionSiteId = counter if "atom_site.label_atom_id" in lines[i] and len(line) ==1: positionSiteLabelAtomId = counter if "atom_site.label_alt_id" in lines[i] and len(line) ==1: positionSiteLabelAltId = counter if "atom_site.label_comp_id" in lines[i] and len(line) ==1: positionSiteLabelCompId = counter if "atom_site.pdbx_PDB_ins_code" in lines[i] and len(line) ==1: positionPDB_ins_code = counter if "atom_site.auth_asym_id" in lines[i] and len(line) ==1: positionSiteAuthAsymId = counter if "atom_site.auth_seq_id" in lines[i] and len(line) ==1: positionSiteAuthSeqId = counter if "atom_site.Cartn_x" in lines[i] and len(line) ==1: positionSiteCartnX = counter if "atom_site.Cartn_y" in lines[i] and len(line) ==1: positionSiteCartnY = counter if "atom_site.Cartn_z" in lines[i] and len(line) ==1: positionSiteCartnZ = counter if "atom_site.occupancy" in lines[i] and len(line) ==1: positionSiteOccupancy = counter if "atom_site.B_iso_or_equiv" in lines[i] and len(line) ==1: positionSiteBIsoOrEquiv = counter counter = counter + 1 # Filling of lists with values for i in range(len(lines)): # looking for loop if "loop" in lines[i] or lines[i] == []: scan = False # looking for loop and X coordinates if "atom_site.Cartn_x" in lines[i]: scan = True line = lines[i].split() # generation of arrays if len(line) > positionSiteBIsoOrEquiv and scan: atomSiteId.append(line[positionSiteId]) atomSiteLabelAtomId.append(line[positionSiteLabelAtomId]) atomSiteLabelAltId.append(line[positionSiteLabelAltId]) atomSiteLabelCompId.append(line[positionSiteLabelCompId]) atomSiteAuthAsymId.append(line[positionSiteAuthAsymId]) # checking for insertion codes if positionPDB_ins_code: if line[positionPDB_ins_code] != "?" and line[positionPDB_ins_code] != ".": atomSiteAuthSeqId.append(line[positionSiteAuthSeqId]+ "." + line[positionPDB_ins_code]) else: atomSiteAuthSeqId.append(line[positionSiteAuthSeqId]) else: atomSiteAuthSeqId.append(line[positionSiteAuthSeqId]) atomSiteCartnX.append(float(line[positionSiteCartnX])) atomSiteCartnY.append(float(line[positionSiteCartnY])) atomSiteCartnZ.append(float(line[positionSiteCartnZ])) atomSiteOccupancy.append(float(line[positionSiteOccupancy])) atomSiteBIsoOrEquiv.append(float(line[positionSiteBIsoOrEquiv])) # ____ _ ____ ____ ____ # | _ \ ___ __ _ __| | _ \| _ \| __ ) # | |_) / _ \/ _` |/ _` | |_) | | | | _ \ # | _ < __/ (_| | (_| | __/| |_| | |_) | # |_| \_\___|\__,_|\__,_|_| |____/|____/ def read_pdb(filename): global atomSiteId # cislo atomu global atomSiteLabelAtomId # znacka atomu, napr. CA pro Calfa global atomSiteLabelAltId # alternativa global atomSiteLabelCompId # tripismenovy kod pro reziduum global atomSiteAuthAsymId # asi retezec? global atomSiteAuthSeqId # poradi sekvenci global atomSiteCartnX # souradnice X global atomSiteCartnY # souradnice Y global atomSiteCartnZ # souradnice Z global atomSiteOccupancy # okupance global atomSiteBIsoOrEquiv # ADP # initiation of lists atomSiteId = [] # cislo atomu atomSiteLabelAtomId = [] # znacka atomu, napr. CA pro Calfa atomSiteLabelAltId = [] # alternativa atomSiteLabelCompId = [] # tripismenovy kod pro reziduum atomSiteAuthAsymId = [] # asi retezec? atomSiteAuthSeqId = [] # poradi v sekvenci atomSiteCartnX = [] # souradnice X atomSiteCartnY = [] # souradnice Y atomSiteCartnZ = [] # souradnice Z atomSiteOccupancy = [] # okupance atomSiteBIsoOrEquiv = [] # ADP # simple reading lines with open(filename, "r") as file: lines = file.readlines() for i in range(len(lines)): if lines[i][0:4] == "ATOM" or lines[i][0:6] == "HETATM": atomSiteId.append(lines[i][6:11]) atomSiteLabelAtomId.append(lines[i][13:16]) atomSiteLabelAltId.append(lines[i][16:17]) atomSiteLabelCompId.append(lines[i][17:20]) atomSiteAuthAsymId.append(lines[i][21:22]) if lines[i][26:27] != " ": atomSiteAuthSeqId.append(lines[i][22:26].strip() + "." + lines[i][26:27]) else: atomSiteAuthSeqId.append(lines[i][22:26].strip()) atomSiteCartnX.append(float(lines[i][30:38])) atomSiteCartnY.append(float(lines[i][38:46])) atomSiteCartnZ.append(float(lines[i][46:54])) atomSiteOccupancy.append(float(lines[i][54:60])) atomSiteBIsoOrEquiv.append(float(lines[i][60:66])) # _ _ _ _ _ # / \ | |_ ___ _ __ ___ / \ _ __ __ _| |_ _ ___(_)___ # / _ \| __/ _ \| '_ ` _ \ / _ \ | '_ \ / _` | | | | / __| / __| # / ___ \ || (_) | | | | | |/ ___ \| | | | (_| | | |_| \__ \ \__ \ # /_/ \_\__\___/|_| |_| |_/_/ \_\_| |_|\__,_|_|\__, |___/_|___/ # |___/ def atom_analysis(): global number_of_waters, list_of_chains, number_of_residues global number_of_nonstandard_residues, total_residues list_of_chains = [] number_of_atoms = [] # for cycle for analysis of atoms for i in range(len(atomSiteAuthAsymId)): if not list_of_chains: list_of_chains.append(atomSiteAuthAsymId[i]) else: if atomSiteAuthAsymId[i] not in list_of_chains: list_of_chains.append(atomSiteAuthAsymId[i]) data_logfile.append("\n *** Atoms in chains ***\n\n") number_of_atoms = [0] * len(list_of_chains) # counting and printing atoms for i in range(len(atomSiteAuthAsymId)): number_of_atoms[list_of_chains.index(atomSiteAuthAsymId[i])] = \ number_of_atoms[list_of_chains.index(atomSiteAuthAsymId[i])] + 1 for i in range(len(list_of_chains)): data_logfile.append(" " + list_of_chains[i] + " : " + str(number_of_atoms[i]) + "\n") # counting and printing residues number_of_residues = [0] * len(list_of_chains) number_of_nas = [0] * len(list_of_chains) number_of_others = [0] * len(list_of_chains) number_of_waters = [0] * len(list_of_chains) current_residue_number = '-999' current_residue_name = 'NONE' current_chain = 'NONE' for i in range(len(atomSiteLabelCompId)): # condition if atomSiteLabelCompId[i] != current_residue_name \ or atomSiteAuthSeqId[i] != current_residue_number \ or atomSiteAuthAsymId[i] != current_chain: # now the function - residues if atomSiteLabelCompId[i] in three_letter_protein: number_of_residues[list_of_chains.index(atomSiteAuthAsymId[i])] = \ number_of_residues[list_of_chains.index(atomSiteAuthAsymId[i])] + 1 # now nucleic acids if atomSiteLabelCompId[i] in three_letter_na: number_of_nas[list_of_chains.index(atomSiteAuthAsymId[i])] = \ number_of_nas[list_of_chains.index(atomSiteAuthAsymId[i])] + 1 # now other molecules if atomSiteLabelCompId[i] != 'HOH' \ and atomSiteLabelCompId[i] not in three_letter_protein \ and atomSiteLabelCompId[i] not in three_letter_na: number_of_others[list_of_chains.index(atomSiteAuthAsymId[i])] = \ number_of_others[list_of_chains.index(atomSiteAuthAsymId[i])] + 1 if atomSiteLabelCompId[i] == "HOH": number_of_waters[list_of_chains.index(atomSiteAuthAsymId[i])] = \ number_of_waters[list_of_chains.index(atomSiteAuthAsymId[i])] + 1 # finally making difference to current values current_residue_number = atomSiteAuthSeqId[i] current_residue_name = atomSiteLabelCompId[i] current_chain = atomSiteAuthAsymId[i] # appending report data_logfile.append("\n\n\n") data_logfile.append(" *** Composition of AU ***\n\n") data_logfile.append(" Chain: Res / Nuc / Oth / Wat:" + "\n") data_logfile.append(35*"-" + "\n") # addition chain by chain for i in range(len(list_of_chains)): data_logfile.append(" " + list_of_chains[i] + (3-len(list_of_chains[i]))*" " + " : " + (5-len(str(number_of_residues[i])))*" " + str(number_of_residues[i]) + " /" + (5-len(str(number_of_nas[i])))*" " + str(number_of_nas[i]) + " /" + + (5-len(str(number_of_others[i])))*" " + str(number_of_others[i]) + " /" + (5-len(str(number_of_waters[i])))*" " + str(number_of_waters[i]) + "\n") # number of alternate atoms low_occup = 0 alternate_occup = 0 list_of_reduced_occup_residues = [] list_of_alternative_residues = [] list_of_no_alternative = [".", " "] # minimal verbosity for i in range(len(atomSiteOccupancy)): if float(atomSiteOccupancy[i]) != 1 and \ atomSiteLabelAltId[i] in list_of_no_alternative: low_occup = low_occup + 1 if atomSiteLabelAltId[i] not in list_of_no_alternative: alternate_occup = alternate_occup + 1 # Making output data_logfile.append("\n\n *** Alternatives and low occupancies ***\n \n") if low_occup != 0: data_logfile.append(" Atoms in alternatives: " + (21-len(str(alternate_occup)))*" " + str(alternate_occup) + "\n") data_logfile.append(" Low-occupancy atoms: " + (23-len(str(low_occup)))*" " + str(low_occup) + "\n") data_logfile.append(" All atoms with non-zero occupancy: " + (9-len(str(alternate_occup + low_occup)))*" " + str(alternate_occup + low_occup) + "\n") data_logfile.append(" Minimal occupancy: " + (25-len(str(min(atomSiteOccupancy))))*" " + str(min(atomSiteOccupancy)) + "\n") # analysis of extremes extreme_occupancy_work = [] extreme_occupancy = [] number_of_extreme_occ = 0 for i in range(len(atomSiteOccupancy)): if float(atomSiteOccupancy[i]) <= 0.1: number_of_extreme_occ = number_of_extreme_occ + 1 extreme_occupancy_work.append(atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i]) data_logfile.append(" Atoms with occupancy lower than 0.1: " + (7-len(str(number_of_extreme_occ)))*" " + str(number_of_extreme_occ) + "\n\n") if number_of_extreme_occ != 0: data_logfile.append(" Occupancy lower than 0.1 found in residues:\n") for i in range(len(extreme_occupancy_work)): if extreme_occupancy_work[i] not in extreme_occupancy: extreme_occupancy.append(extreme_occupancy_work[i]) #filling the logfile for i in range(len(extreme_occupancy)): data_logfile.append(" " + extreme_occupancy[i] + "\n") data_logfile.append("\n\n") # Standard verbosity if args.verbose == 2: residues_reduced = [] # list of residues with reduced occupancy # global search for reduced occupancies for i in range(len(atomSiteLabelAltId)): if float(atomSiteOccupancy[i]) != 1.0 and \ atomSiteLabelAltId[i] in list_of_no_alternative: value = atomSiteAuthAsymId[i] + "/" \ + atomSiteLabelCompId[i] + "/" \ + atomSiteAuthSeqId[i] \ + "--" + str(atomSiteOccupancy[i]) if value not in residues_reduced: residues_reduced.append(value) residues_alternatives = [] # global search for alternatives for i in range(len(atomSiteLabelAltId)): if atomSiteLabelAltId[i] not in list_of_no_alternative: value = atomSiteAuthAsymId[i] + "/" \ + atomSiteLabelCompId[i] + "/" \ + atomSiteAuthSeqId[i] \ + "(" + atomSiteLabelAltId[i] + ")" \ + "--" + str(atomSiteOccupancy[i]) if value not in residues_alternatives: residues_alternatives.append(value) # Making output if residues_reduced: data_logfile.append(" List of reduced occupancies:" + "\n") data_logfile.append(" ----------------------------\n") for i in range(len(residues_reduced)): data_logfile.append(" " + residues_reduced[i] + "\n") if residues_alternatives: data_logfile.append("\n List of alternatives:" + "\n") data_logfile.append(" ---------------------\n") for i in range(len(residues_alternatives)): data_logfile.append(" " + residues_alternatives[i] + "\n") data_logfile.append("\n\n") # Full verbosity if args.verbose == 3: # Adding list of reduced occupancies data_logfile.append(" List of reduced occupancies:\n") data_logfile.append(" ----------------------------\n") for i in range(len(atomSiteLabelAltId)): if float(atomSiteOccupancy[i]) != 1.0 and \ atomSiteLabelAltId[i] in list_of_no_alternative: data_logfile.append(" " + atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i] + "/" + atomSiteLabelAtomId[i] + "--" + str(atomSiteOccupancy[i]) + "\n") # Adding list of alternatives data_logfile.append("\n List of alternatives:\n") data_logfile.append("-----------------------\n") for i in range(len(atomSiteLabelAltId)): if atomSiteLabelAltId[i] not in list_of_no_alternative: data_logfile.append(" " + atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i] + "/" + atomSiteLabelAtomId[i] + "(" + atomSiteLabelAltId[i] + ")--" + str(atomSiteOccupancy[i]) + "\n") data_logfile.append("\n\n") # analysis of ADPs data_logfile.append(" *** Analysis of ADPs *** \n\n") data_logfile.append(" Overall ADP (ISO or equivalent):\n") data_logfile.append(" --------------------------------\n") data_logfile.append(" Minimal: " + (8-len(str(round(min(atomSiteBIsoOrEquiv), 2))))*" " + str(round(min(atomSiteBIsoOrEquiv), 2)) + "\n") data_logfile.append(" Maximal: " + (8-len(str(round(max(atomSiteBIsoOrEquiv), 2))))*" " + str(round(max(atomSiteBIsoOrEquiv), 2)) + "\n") avg = sum(atomSiteBIsoOrEquiv)/len(atomSiteBIsoOrEquiv) data_logfile.append(" Average: " + (8-len(str(round(avg,1))))*" " + str(round(avg,1)) + "\n\n") data_logfile.append(" Chain: Min / Max / Av.ADP\n") data_logfile.append(" ------------------------------\n") for i in range(len(list_of_chains)): work_list_of_adp = [] for j in range(len(atomSiteBIsoOrEquiv)): if atomSiteAuthAsymId[j] == list_of_chains[i]: work_list_of_adp.append(atomSiteBIsoOrEquiv[j]) avg = sum(work_list_of_adp)/len(work_list_of_adp) data_logfile.append(" " + (3-len(list_of_chains[i]))*" " + list_of_chains[i] + ": " + (6-len(str(round(min(work_list_of_adp), 1))))*" " + str(round(min(work_list_of_adp), 1)) + " / " + (6-len(str(round(max(work_list_of_adp), 1))))*" " + str(round(max(work_list_of_adp), 1)) + " / " + (6-len(str(round(avg,1))))*" " + str(round(avg,1)) +"\n") data_logfile.append("\n\n") # Normal list of ADP extremes if args.verbose == 2: Bmin = min(atomSiteBIsoOrEquiv) Bmax = max(atomSiteBIsoOrEquiv) list_of_adp_low = [] list_of_adp_high = [] # low ADP first for i in range(len(atomSiteBIsoOrEquiv)): if atomSiteBIsoOrEquiv[i] <= Bmin + 5: value = " " + atomSiteAuthAsymId[i] + "/" \ + atomSiteLabelCompId[i] + "/" \ + atomSiteAuthSeqId[i] if value not in list_of_adp_low: list_of_adp_low.append(value) data_logfile.append("\n Residues with ADP lower than " + str(Bmin + 5) + "\n") data_logfile.append(" -----------------------------") data_logfile.append(len(str(Bmin+5))*"-" + "\n") for i in range(len(list_of_adp_low)): data_logfile.append(list_of_adp_low[i] + "\n") # high ADP for i in range(len(atomSiteBIsoOrEquiv)): if atomSiteBIsoOrEquiv[i] >= Bmax - 10: value = " " + atomSiteAuthAsymId[i] + "/" \ + atomSiteLabelCompId[i] + "/" \ + atomSiteAuthSeqId[i] if value not in list_of_adp_high: list_of_adp_high.append(value) data_logfile.append("\n Atoms with ADP higher than " + str(Bmax - 10) + "\n") data_logfile.append(" ---------------------------" + len(str(Bmax-10))*"-" + "\n") for i in range(len(list_of_adp_high)): data_logfile.append(list_of_adp_high[i] + "\n") # Full list of ADP extremes if args.verbose == 3: Bmin = min(atomSiteBIsoOrEquiv) Bmax = max(atomSiteBIsoOrEquiv) data_logfile.append(" Atoms with ADP lower than " + str(Bmin + 5) + "\n") data_logfile.append(" --------------------------" + len(str(Bmin + 5))*"-" + "\n") for i in range(len(atomSiteBIsoOrEquiv)): if atomSiteBIsoOrEquiv[i] <= Bmin + 5: data_logfile.append(" " + atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i] + "/" + atomSiteLabelAtomId[i] + "--" + str(atomSiteBIsoOrEquiv[i]) + "\n") data_logfile.append("\n Atoms with ADP higher than " + str(Bmax - 10) + "\n") data_logfile.append(" ---------------------------" + len(str(Bmax - 10))*"-" + "\n") for i in range(len(atomSiteBIsoOrEquiv)): if atomSiteBIsoOrEquiv[i] >= Bmax - 10: data_logfile.append(" " + atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i] + "/" + atomSiteLabelAtomId[i] + "--" + str(atomSiteBIsoOrEquiv[i]) + "\n") # Example to find average of list #number_list = [45, 34, 10, 36, 12, 6, 80] #avg = sum(number_list)/len(number_list) #print("The average is ", round(avg,2)) def calculate_shift(a, b, c, d, e, f): return sqrt((a-b)**2 + (c-d)**2 + (e-f)**2) def convert_3to1(residue): value = '' if residue in three_letter_protein: value = one_letter_protein[three_letter_protein.index(residue)] if residue in three_letter_na: value = '(' + residue + ')' return value # _____ _ _ # ___ ___ _ __ ___ _ __ __ _ _ __ ___| ___(_) | ___ ___ # / __/ _ \| '_ ` _ \| '_ \ / _` | '__/ _ \ |_ | | |/ _ \/ __| # | (_| (_) | | | | | | |_) | (_| | | | __/ _| | | | __/\__ \ # \___\___/|_| |_| |_| .__/ \__,_|_| \___|_| |_|_|\___||___/ # |_| def compare_files(first_file, second_file): # compare lengths if len(atomSiteCartnX) != len(atomSiteCartnX2): print("Number of atoms in " + str(first_file) + "is diffrent from number" + " of atoms in " + str(second_file)) print("Exiting.") sys.exit() # Setting deltas if args.delta_coordinates: delta_coord = args.delta_coordinates else: delta_coord = 0.2 # delta ADPs if args.delta_adp: delta_adp = args.delta_adp else: delta_adp = 3.5 # calculation of shifts shift_of_atoms = [] for i in range(len(atomSiteCartnX)): shift_of_atoms.append(calculate_shift(atomSiteCartnX[i], atomSiteCartnX2[i], atomSiteCartnY[i], atomSiteCartnY2[i], atomSiteCartnZ[i], atomSiteCartnZ2[i])) # shifts in adps delta_adp_list = [] delta_adp_work = [] delta_adp_values = [] for i in range(len(atomSiteCartnX)): delta_adp_values.append(abs(atomSiteBIsoOrEquiv[i] \ - atomSiteBIsoOrEquiv2[i])) # total number of shifts greater than defined number_of_shifts = 0 for i in range(len(shift_of_atoms)): if shift_of_atoms[i] >= delta_coord: number_of_shifts = number_of_shifts + 1 # total number of adp shifts number_of_adp_changes = 0 for i in range(len(delta_adp_values)): if delta_adp_values[i] >= delta_adp: number_of_adp_changes = number_of_adp_changes + 1 # printing to log file data_logfile.append("\n\n *** Comparison between two files: ***\n\n") data_logfile.append(" First file: " + first_file + "\n") data_logfile.append(" Second file: " + second_file + "\n\n") data_logfile.append(" No. of atoms shifted by more than " + str(delta_coord) + " A: " + str(number_of_shifts) + "\n") data_logfile.append(" Maximal deviation: " + str(round(max(shift_of_atoms), 2)) + "\n") data_logfile.append(" No. of ADP shifts larger than " + str(delta_adp) + " :" + str(number_of_adp_changes) + "\n") data_logfile.append(" Maximal deviation in ADP: " + str(round(max(delta_adp_values), 2)) + "\n\n") # Report verbosity 2 if args.verbose == 2: data_logfile.append(" Atoms shifted by more than " + str(delta_coord) + " A:\n\n") # building of list of shifted atoms work_list_of_shift = [] # generation of all first for i in range(len(shift_of_atoms)): if shift_of_atoms[i] >= delta_coord: work_list_of_shift.append(atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i]) # removal of duplicates list_of_shifts = [] for i in range(len(work_list_of_shift)): if work_list_of_shift[i] not in list_of_shifts: list_of_shifts.append(work_list_of_shift[i]) for i in range(len(list_of_shifts)): data_logfile.append(" " + list_of_shifts[i] + "\n") data_logfile.append("\n") # reports of delta adps data_logfile.append(" Atoms with change in ADP by more than " + str(delta_adp) + " A^2:\n\n") for i in range(len(atomSiteBIsoOrEquiv)): if abs(delta_adp_values[i]) >= delta_adp: delta_adp_work.append(atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i]) # removal of duplicates for i in range(len(delta_adp_work)): if delta_adp_work[i] not in delta_adp_list: delta_adp_list.append(delta_adp_work[i]) for i in range(len(delta_adp_list)): data_logfile.append(" " + delta_adp_list[i] + "\n") data_logfile.append("\n") if args.verbose == 3: data_logfile.append(" Atoms shifted by more than " + str(delta_coord) + " A:\n\n") for i in range(len(shift_of_atoms)): if shift_of_atoms[i] >= delta_coord: data_logfile.append(atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i] + "/" + atomSiteLabelAtomId[i] + "\n") data_logfile.append("\n") data_logfile.append(" Atoms with change in ADP by more than " + str(delta_adp) + " A^2:\n\n") for i in range(len(delta_adp_values)): if delta_adp_values[i] >= delta_adp: data_logfile.append(atomSiteAuthAsymId[i] + "/" + atomSiteLabelCompId[i] + "/" + atomSiteAuthSeqId[i] + "/" + atomSiteLabelAtomId[i] + "\n") # __ __ _ ___ _ _ ____ ____ ___ ____ ____ _ __ __ # | \/ | / \ |_ _| \ | | | _ \| _ \ / _ \ / ___| _ \ / \ | \/ | # | |\/| | / _ \ | || \| | | |_) | |_) | | | | | _| |_) | / _ \ | |\/| | # | | | |/ ___ \ | || |\ | | __/| _ <| |_| | |_| | _ < / ___ \| | | | # |_| |_/_/ \_\___|_| \_| |_| |_| \_\\___/ \____|_| \_\/_/ \_\_| |_| if __name__ == "__main__": three_letter_protein = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLU', 'GLN', 'GLY', 'HIS', "ILE", 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] three_letter_na = ['DA', 'DG', 'DC', 'DT', 'A', 'C', 'G', 'U'] one_letter_protein = ['A', 'R', 'N', 'D', 'C', 'E', 'Q', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'] # ______ __ _ _ __ # /_ __/__ ___ / /_(_)__ ___ _ (_)__ ___ __ __/ /_ # / / / -_|_-chain " + list_of_chains[i] + ":\n") for i in range(len(current_sequence)): data_logfile.append(current_sequence[i]) data_logfile.append("\n\n") current_sequence = [] # _ ____ ___ _____ ____ # _ __ ___ __ _ __| |/ ___|_ _| ___|___ \ # | '__/ _ \/ _` |/ _` | | | || |_ __) | # | | | __/ (_| | (_| | |___ | || _| / __/ # |_| \___|\__,_|\__,_|\____|___|_| |_____| if args.cif2: with open(cif2, "r") as file: lines2 = file.readlines() # initiation of lists counter2 = -1 atomSiteId2 = [] atomSiteLabelAtomId2 = [] atomSiteLabelAltId2 = [] atomSiteLabelCompId2 = [] atomSiteAuthAsymId2 = [] atomSiteAuthSeqId2 = [] atomSiteCartnX2 = [] atomSiteCartnY2 = [] atomSiteCartnZ2 = [] atomSiteOccupancy2 = [] atomSiteBIsoOrEquiv2 = [] # analysis of positions within the CIF file counter = -1 for i in range(len(lines2)): line2 = lines2[i].split() if lines2[i][0:4] == "loop": counter = -1 if "atom_site.id" in lines2[i] and len(line2) == 1: positionSiteId2 = counter if "atom_site.label_atom_id" in lines2[i] and len(line2) ==1: positionSiteLabelAtomId2 = counter if "atom_site.label_alt_id" in lines2[i] and len(line2) ==1: positionSiteLabelAltId2 = counter if "atom_site.label_comp_id" in lines2[i] and len(line2) ==1: positionSiteLabelCompId2 = counter if "atom_site.auth_asym_id" in lines2[i] and len(line2) ==1: positionSiteAuthAsymId2 = counter if "atom_site.auth_seq_id" in lines2[i] and len(line2) ==1: positionSiteAuthSeqId2 = counter if "atom_site.Cartn_x" in lines2[i] and len(line2) ==1: positionSiteCartnX2 = counter if "atom_site.Cartn_y" in lines2[i] and len(line2) ==1: positionSiteCartnY2 = counter if "atom_site.Cartn_z" in lines2[i] and len(line2) ==1: positionSiteCartnZ2 = counter if "atom_site.occupancy" in lines2[i] and len(line2) ==1: positionSiteOccupancy2 = counter if "atom_site.B_iso_or_equiv" in lines2[i] and len(line2) ==1: positionSiteBIsoOrEquiv2 = counter counter = counter + 1 # Filling of lists with values for file2 for i in range(len(lines2)): # looking for loop if "loop" in lines2[i] or lines2[i] == []: scan = False # looking for loop and X coordinates if "atom_site.Cartn_x" in lines2[i]: scan = True line2 = lines2[i].split() if len(line2) > positionSiteBIsoOrEquiv2 and scan: atomSiteId2.append(line2[positionSiteId2]) atomSiteLabelAtomId2.append(line2[positionSiteLabelAtomId2]) atomSiteLabelAltId2.append(line2[positionSiteLabelAltId2]) atomSiteLabelCompId2.append(line2[positionSiteLabelCompId2]) atomSiteAuthAsymId2.append(line2[positionSiteAuthAsymId2]) atomSiteAuthSeqId2.append(line2[positionSiteAuthSeqId2]) atomSiteCartnX2.append(float(line2[positionSiteCartnX2])) atomSiteCartnY2.append(float(line2[positionSiteCartnY2])) atomSiteCartnZ2.append(float(line2[positionSiteCartnZ2])) atomSiteOccupancy2.append(float(line2[positionSiteOccupancy2])) atomSiteBIsoOrEquiv2.append(float(line2[positionSiteBIsoOrEquiv2])) # MAIN ANALYSIS compare_files(cif1, cif2) # _ ____ ____ ____ ____ # _ __ ___ __ _ __| | | _ \| _ \| __ ) |___ \ # | '__/ _ \/ _` |/ _` | | |_) | | | | _ \ __) | # | | | __/ (_| | (_| | | __/| |_| | |_) | / __/ # |_| \___|\__,_|\__,_| |_| |____/|____/ |_____| if args.pdb2: with open(pdb2, "r") as file: lines2 = file.readlines() # initiation of lists atomSiteId2 = [] atomSiteLabelAtomId2 = [] atomSiteLabelAltId2 = [] atomSiteLabelCompId2 = [] atomSiteAuthAsymId2 = [] atomSiteAuthSeqId2 = [] atomSiteCartnX2 = [] atomSiteCartnY2 = [] atomSiteCartnZ2 = [] atomSiteOccupancy2 = [] atomSiteBIsoOrEquiv2 = [] for i in range(len(lines2)): if lines2[i][0:4] == "ATOM" or lines2[i][0:6] == "HETATM": atomSiteId2.append(lines2[i][6:11]) atomSiteLabelAtomId2.append(lines2[i][13:16]) atomSiteLabelAltId2.append(lines2[i][16:17]) atomSiteLabelCompId2.append(lines2[i][17:20]) atomSiteAuthAsymId2.append(lines2[i][21:22]) if lines2[i][26:27] != " ": atomSiteAuthSeqId2.append(lines2[i][22:26].strip() + "." + lines2[i][26:27]) else: atomSiteAuthSeqId2.append(lines2[i][22:26].strip()) atomSiteCartnX2.append(float(lines2[i][30:38])) atomSiteCartnY2.append(float(lines2[i][38:46])) atomSiteCartnZ2.append(float(lines2[i][46:54])) atomSiteOccupancy2.append(float(lines2[i][54:60])) atomSiteBIsoOrEquiv2.append(float(lines2[i][60:66])) # MAIN ANALYSIS compare_files(pdb1, pdb2) # _ __ _ __ _____ __ # _ ______(_) /_(_)__ ___ _/ / ___ ___ _/ __(_) /__ # | |/|/ / __/ / __/ / _ \/ _ `/ /__/ _ \/ _ `/ _// / / -_) # |__,__/_/ /_/\__/_/_//_/\_, /____/\___/\_, /_/ /_/_/\__/ # /___/ /___/ # writing the log file with open(logfile, "w") as file: file.writelines(data_logfile) # printing the log file f = open(logfile, 'r') content = f.read() print(content) f.close()