#!/bin/bash # Automated ShelxC/D/E for experimental phasing with unknown space group # Author: Petr Kolenko # Mail: petr.kolenko@fjfi.cvut.cz # Web: http://kmlinux.fjfi.cvut.cz/~kolenpe1/baSHELiXir # baSHELiXir depends on: Shelx C/D/E, gnuplot, bash # Please cite: I. Uson & G.M. Sheldrick (2018), "An introduction to experimental phasing of macromolecules illustrated by SHELX; new autotracing features" Acta Cryst. D74, 106-116 (Open Access) if SHELXE proves useful. # Data for testing of the script were taken from tutorial and talks of Andrea Thorn: # http://shelx.uni-ac.gwdg.de/~athorn # http://shelx.uni-ac.gwdg.de/~athorn/data/shelxcde_tutorial.zip # Further data were taken from PHENIX tutorial for experimental phasing: # https://www.phenix-online.org/ screen=N wavelength=0.98 solmin=20 solmax=80 solnow=20 solstep=5 act=1 nproc=4 PARAMS=$# NUM=$# if [ $NUM = 0 ]; then echo " +-+-+-+-+-+-+-+-+-+-+-+-+----+ |b|a|S|H|E|L|i|X|i|r| |v|0.13| +-+-+-+-+-+-+-+-+-+-+-+-+----+ " echo "Usage: $0 [params...]" echo "Params:" #Definition of input files: echo -e "\t-prefix foo \t\t\t Set prefix as foo" echo -e "\t-sad foo.hkl \t\t\t Sets foo.hkl as a SAD dataset" echo -e "\t-nat foo.hkl \t\t\t Sets foo.hkl as a native dataset" echo -e "\t-peak foo.hkl \t\t\t Sets foo.hkl as a peak dataset" echo -e "\t-infl foo.hkl \t\t\t Sets foo.hkl as an inflection dataset" echo -e "\t-hrem foo.hkl \t\t\t Sets foo.hkl as a high remote dataset" echo -e "\t-lrem foo.hkl \t\t\t Sets foo.hkl as a low remote dataset" echo -e "\t-before foo.hkl \t\t Sets 'native' dataset for RIP phasing" echo -e "\t-after foo.hkl \t\t\t Sets dataset collected after the radiation damage" echo -e "\t-sira foo.hkl \t\t\t Sets SIRAS dataset" echo -e "\t-nproc N \t\t\t Sets N parallel processes in ShelxE routine" #Definition of ShelxC parameters echo -e "\t-sfac X \t\t\t Sets heavy element" echo -e "\t-find N \t\t\t Sets number of searched atoms" echo -e "\t-ntry N \t\t\t Sets number of trials to search for substructure" echo -e "\t--cell [a] [b] [c] ... \t Sets unit cell parameters" echo -e "\t ... [alpha] [beta] [gamma] \t\t ..." echo -e "\t-wavelength X \t\t\t Sets wavelength in Angstroms" echo -e "\t-res X Y \t\t\t Sets the low and the high resolution for phasing" echo -e "\t-dsul N \t\t\t Sets number disufides at lower resolution" echo -e '\t-mind "X Y" \t\t\t Sets the parameters for MIND' echo -e "\t-dsca X \t\t\t Sets the DSCA parameter for RIP or RIPAS" echo -e "\t-bravais xX \t\t\t Sets the Bravais-type (e.g. tP)" echo -e '\t-list "SG1 SG2 ..." \t\t Sets list of space groups to be examined' echo -e '\t-Epar "par1 par2 ..." \t\t Sets the ShelxE parameters (DO NOT FORGET ""!)' echo -e "\t-solv x X s \t\t\t Sets the minimal, maximal and with fraction steps" echo -e "\t\t\t\t\t\t(e.g. -solv 27 81 2)" echo -e "\t-solv_auto \t\t\t Screens the sol. cont. from 20% to 80% with 5% step width" echo -e "\t------------------------------------------------------------------------------------------" echo -e "\t\t Most frequent ShelxE parameters:" echo -e "\t\t-h \t\t heavy atoms are included in the native structure" echo -e "\t\t-hN \t\t when the nummber N of heavy atoms is known" echo -e "\t\t-aN \t\t runs N cycles of auto-tracing" echo -e "\t\t-q \t\t explicit search for helices" echo -e "\t\t-s[x] \t\t sets fraction of solvent content (e.g. -s0.45)" echo -e "\t\t-e \t\t runs free lunch algorithm (e.g. -e1.2)" echo -e "\t\t-z \t\t optimizes the heavy atom substructure before the density modif." echo -e "################################################################################################" echo "Bravais-types (according to the XDS notation):" echo "aP: P1" echo "mP: P2, P21" echo "mC: C2" echo "oP: P222, P2221, P21212, P212121" echo "oC: C222, C2221" echo "oF: F222" echo "oI: I222, I212121" echo "tP: P4, P41, P42, P43, P422, P4212, P4122, P41212, P4222, P42212, P4322, P43212" echo "tI: I4, I41, I422, I4122" echo "hP: P3, P31, P32, P312, P321, P3112, P3121, P3212, P3221," echo -e " P6, P61, P65, P62, P64, P63, P622, P6122, P6522, P6222, P6422, P6322" echo "hR: R3, R32" echo "cP: P23, P213, P432, P4232, P4332, P4132" echo "cF: F23, F432, F4132" echo "cI: I23, I213, I432, I4132" echo -e "################################################################################################" exit -1 fi echo "Command: baSHELiXir $@" full_command=$@ while (($NUM > 0)); do case $1 in -prefix) prefix=$2 shift 1 ;; -sad) sad=`realpath $2` shift 1 ;; -nat) nat=`realpath $2` shift 1 ;; -peak) peak=`realpath $2` shift 1 ;; -infl) infl=`realpath $2` shift 1 ;; -hrem) hrem=`realpath $2` shift 1 ;; -lrem) lrem=`realpath $2` shift 1 ;; -before) before=`realpath $2` shift 1 ;; -after) after=`realpath $2` shift 1 ;; -sira) sira=`realpath $2` shift 1 ;; -sfac) sfac=$2 shift 1 ;; -find) find=$2 shift 1 ;; -ntry) ntry=$2 shift 1 ;; --cell) cell_manual="$2 $3 $4 $5 $6 $7" shift 6 ;; -wavelength) wavelength_manual=$2 shift 1 ;; -res) resM=$2 resm=$3 shift 2 ;; -dsul) dsul=$2 shift 1 ;; -mind) mind=$2 shift 1 ;; -dsca) dsca=$2 shift 1 ;; -bravais) bravais_type=$2 shift 1 ;; -list) list=$2 shift 1 ;; -Epar) Epar=$2 shift 1 ;; -solv_auto) screen=Y ;; -solv) solmin=$2 solmax=$3 solstep=$4 screen=Y shift 3 ;; -nproc) nproc=$2 shift 1 ;; *) if [ -z "$1" ]; then : #echo "All parameters seem to be set." else echo "Parametr $(($PARAMS-$NUM+1)) unknown: $1" fi # exit ;; esac NUM=$(($NUM-1)); shift 1 done echo " +-+-+-+-+-+-+-+-+-+-+-+-+----+ |b|a|S|H|E|L|i|X|i|r| |v|0.13| +-+-+-+-+-+-+-+-+-+-+-+-+----+ " path_to_executable=$(which shelxc) if [ -x "$path_to_executable" ] ; then : else echo "########################################" echo "Binaries for Shelx are not in your path settings." exit fi if [ -d "results_$prefix" ]; then echo "Directory results_$prefix already exists. Change the prefix or delete the old files." exit else : fi #Reading out the unit cell parameters #for SAD if [ -z $sad ]; then : else if [[ $sad == *.sca ]]; then cell_all=`awk 'NR==3 {print $1 " " $2 " " $3 " " $4 " " $5 " " $6}' $sad` else cell_all=`grep UNIT_CELL_CONSTANTS $sad | awk '{print $2 " " $3 " " $4 " " $5 " " $6 " " $7}'` wavelength=`grep X-RAY_WAVELENGTH $sad | awk '{print $2}'` fi fi #for MAD if [ -z $peak ]; then : else if [[ $peak == *.sca ]]; then cell_all=`awk 'NR==3 {print $1 " " $2 " " $3 " " $4 " " $5 " " $6}' $peak` wavelength=$wavelength_manual else cell_all=`grep UNIT_CELL_CONSTANTS $peak | awk '{print $2 " " $3 " " $4 " " $5 " " $6 " " $7}'` wavelength=`grep X-RAY_WAVELENGTH $peak | awk '{print $2}'` fi fi #for RIP if [ -z $before ]; then : else if [[ $before == *.sca ]]; then cell_all=`awk 'NR==3 {print $1 " " $2 " " $3 " " $4 " " $5 " " $6}' $before` wavelength=$wavelength_manual else cell_all=`grep UNIT_CELL_CONSTANTS $before | awk '{print $2 " " $3 " " $4 " " $5 " " $6 " " $7}'` wavelength=`grep X-RAY_WAVELENGTH $before | awk '{print $2}'` fi fi #for SIRAS if [ -z $sira ]; then : else if [[ $sira == *.sca ]]; then cell_all=`awk 'NR==3 {print $1 " " $2 " " $3 " " $4 " " $5 " " $6}' $sira` wavelength=$wavelength_manual else cell_all=`grep UNIT_CELL_CONSTANTS $sira | awk '{print $2 " " $3 " " $4 " " $5 " " $6 " " $7}'` wavelength=`grep X-RAY_WAVELENGTH $sira | awk '{print $2}'` fi fi #for manual cell input if [[ -z $cell_manual ]]; then : else cell_all=$cell_manual wavelength=$wavelength_manual fi if [[ -z $wavelength_manual ]]; then : else wavelength=$wavelength_manual fi if [ -z $wavelength ]; then echo 'The wavelength is not set. The script needs your input, use parameter "-wavelength".' exit else : fi #Definition of space groups to be tested: if [ "$bravais_type" == "aP" ]; then list="P1" elif [ "$bravais_type" == "mP" ]; then list="P2 P21" elif [ "$bravais_type" == "mC" ]; then list="C2" elif [ "$bravais_type" == "oP" ]; then list="P222 P2221 P2212 P2122 P21212 P21221 P22121 P212121" elif [ "$bravais_type" == "oC" ]; then list="C222 C2221" elif [ "$bravais_type" == "oF" ]; then list="F222" elif [ "$bravais_type" == "oI" ]; then list="I222 I212121" elif [ "$bravais_type" == "tP" ]; then #list="P43212" list="P4 P41 P42 P43 P422 P4212 P4122 P41212 P4222 P42212 P4322 P43212" elif [ "$bravais_type" == "tI" ]; then list="I4 I41 I422 I4122" elif [ "$bravais_type" == "hP" ]; then list="P3 P31 P32 P312 P321 P3112 P3121 P3212 P3221 P6 P61 P65 P62 P64 P63 P622 P6122 P6522 P6222 P6422 P6322" elif [ "$bravais_type" == "hR" ]; then list="R3 R32" elif [ "$bravais_type" == "cP" ]; then list="P23 P213 P432 P4232 P4332 P4132" elif [ "$bravais_type" == "cF" ]; then list="F23 F432 F4132" elif [ "$bravais_type" == "cI" ]; then list="I23 I213 I432 I4132" #else # echo "Enter the list of space groups (not separated by commas):" # read list fi if [ $screen == "Y" ]; then if [[ $Epar == *"-s"* ]]; then echo "Please remove parameter -s from ShelxE parameters and run the script again." echo "Your current ShelxE parameters are: $Epar" exit fi else : fi echo "Running ..." mkdir results_`echo $prefix` cd results_`echo $prefix` echo '' >> results_`echo $prefix`.html echo '
' >> results_`echo $prefix`.html echo '" >> results_`echo $prefix`.html
echo "Input file: $sad $nat $peak $infl $hrem $lrem $before $after $sira
" >> results_`echo $prefix`.html
echo "Full list of parameters: baSHELiXir $full_command
" >> results_`echo $prefix`.html
echo "Bravais type: $bravais_type
" >> results_`echo $prefix`.html
echo "Unit cell parameters: $cell_all
" >> results_`echo $prefix`.html
echo "Wavelength: $wavelength
" >> results_`echo $prefix`.html
echo "Atoms to be found: $find $sfac
" >> results_`echo $prefix`.html
echo "Number of trials: $ntry
" >> results_`echo $prefix`.html
echo "ShelxE parameters: \" $Epar \"
" >> results_`echo $prefix`.html
echo "Space groups to be tested: $list
" >> results_`echo $prefix`.html
if [ $screen == "Y" ];
then
echo "Screening solvent: min $solmin, max $solmax, steps $solstep" >> results_`echo $prefix`.html
else
:
fi
echo "
" >> results_`echo $prefix`.html for space_group in $list do echo '- '$space_group' -' >> results_`echo $prefix`.html done echo "
" >> results_`echo $prefix`.html #-------------------------------------------------------------------------- #Definition of FOR cycle for space_group in $list do echo "Running in space group $space_group ..." mkdir $space_group echo "CELL $cell_all " > $space_group/shelxc.inp echo "SPAG $space_group" >> $space_group/shelxc.inp #Copy input files and definition in the space group directory if [ -z $sad ]; then : else #cp ../$sad $space_group echo "SAD $sad" >> $space_group/shelxc.inp fi if [ -z $nat ]; then : else #cp ../$nat $space_group echo "NAT $nat" >> $space_group/shelxc.inp fi if [ -z $peak ]; then : else #cp ../$peak $space_group echo "PEAK $peak" >> $space_group/shelxc.inp fi if [ -z $infl ]; then : else #cp ../$infl $space_group echo "INFL $infl" >> $space_group/shelxc.inp fi if [ -z $hrem ]; then : else #cp ../$hrem $space_group echo "HREM $hrem" >> $space_group/shelxc.inp fi if [ -z $lrem ]; then : else #cp ../$lrem $space_group echo "LREM $lrem" >> $space_group/shelxc.inp fi if [ -z $before ]; then : else #cp ../$before $space_group echo "BEFORE $before" >> $space_group/shelxc.inp fi if [ -z $after ]; then : else #cp ../$after $space_group echo "AFTER $after" >> $space_group/shelxc.inp fi echo "FIND $find" >> $space_group/shelxc.inp echo "NTRY $ntry" >> $space_group/shelxc.inp echo "SFAC $sfac" >> $space_group/shelxc.inp if [ -z $mind ]; then : else echo "MIND $mind" >> $space_group/shelxc.inp fi #Running command ShelxC cd $space_group echo "Running ShelxC in $space_group" shelxc $prefix < shelxc.inp > `echo $prefix`_shelxc.log sed -i "2s/.*/CELL $wavelength $cell_all/" `echo $prefix`_fa.ins if [ -z $dsul ]; then : else sed -n -i "p;13a DSUL $dsul" `echo $prefix`_fa.ins #puts DSUL to the shelxd input file fi #Export for ShelC graphs echo "set xrange [] reverse" > gnuplot-shelxc.script echo "set yrange [0:]" >> gnuplot-shelxc.script echo "set title '" >> results_`echo $prefix`.html echo "
" >> results_`echo $prefix`.html #grep -B 3 -A 8 Resl. $space_group/`echo $prefix`_shelxc.log >> results_`echo $prefix`.html #echo "" >> results_`echo $prefix`.html echo "ShelxC input file - " >> results_`echo $prefix`.html echo "ShelxC logfile
" >> results_`echo $prefix`.html
echo "ShelxD input file - " >> results_`echo $prefix`.html
echo "ShelxD logfile
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "Graph PATFOM vs. CCall
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "shelxe-orig logfile - " >> results_`echo $prefix`.html
echo "shelxe-inv logfile
" >> results_`echo $prefix`.html
echo "
" >> results_`echo $prefix`.html
echo "Graph connectivity vs. cycle
" >> results_`echo $prefix`.html
cd $space_group
if [ -f `echo $prefix`.pdb ];
then
cp `echo $prefix`.pdb original.pdb
else
touch original.pdb
fi
if [ -f `echo $prefix`_i.pdb ];
then
cp `echo $prefix`_i.pdb inverted.pdb
else
touch inverted.pdb
fi
echo "original `grep -c "AT" original.pdb`, inverted `grep -c "AT" inverted.pdb` .
" >> textik
cd ../
echo "Number of atoms built: " >> results_`echo $prefix`.html
cat $space_group/textik >> results_`echo $prefix`.html
rm $space_group/textik
echo "
Please cite: I. Uson & G.M. Sheldrick (2018), "An introduction to experimental phasing of macromolecules illustrated by SHELX; new autotracing features" Acta Cryst. D74, 106-116 (Open Access) if SHELXE proves useful.
' >> results_`echo $prefix`.html echo ' ' >> results_`echo $prefix`.html