Commit d53620ff authored by bliven_s's avatar bliven_s
Browse files

Alphafold/2.1.1 with multimer support!

Includes syntax changes. See https://intranet.psi.ch/en/bio/alphafold
parent fa0823ac
......@@ -23,7 +23,7 @@ conda install -y -c bioconda hmmer==3.3.2 hhsuite==3.3.0 kalign2==2.04
pip install absl-py==0.13.0 biopython==1.79 chex==0.0.7 dm-haiku==0.0.4 \
dm-tree==0.1.6 immutabledict==2.0.0 jax==0.2.14 ml-collections==0.1.0 \
numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0
numpy==1.19.5 scipy==1.7.0 tensorflow==2.5.0 pandas==1.3.4
pip install --upgrade jax jaxlib==0.1.69+cuda111 \
-f https://storage.googleapis.com/jax-releases/jax_releases.html
```
......@@ -54,7 +54,7 @@ GWSTELEKHREELKEFLKKEGITNVEIRIDNGRLEVRVEGGTERLKRFLEELRQKLEKKGYTVDIKIE
EOF
module use MX unstable
module load alphafold/2.0.1
sbatch $ALPHAFOLD_DIR/bin/submit_merlin.sh query.fasta
module load alphafold/2.1.1
sbatch alphafold_merlin.sh query.fasta
```
#!/bin/bash
#SBATCH -p gpu
#SBATCH -J alphafold
#SBATCH -M gmerlin6
#SBATCH --gpus=1
#SBATCH -n 1
#SBATCH -c 10
# Alphafold submission script for the merlin cluster
# Usage: sbatch [slurm_opts] alphafold_merlin.sh [options] fasta_file
#
# OPTIONS
# All alphafold options are set automatically, but can be overwritten.
# Some common options:
#
# --max_template_date=YYYY-MM-DD (default: today)
# --output_dir (default: current directory
# --helpfull List all options
#
# 2021-12-22 Spencer Bliven, D.Ozerov
#
export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold/versions/latest
module purge
module use MX unstable
module load alphafold/2.1.1
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
echo "hostname=$(hostname)"
echo "python=$(which python)"
echo "ALPHAFOLD_DATA=$(realpath "$ALPHAFOLD_DATA")"
python "${ALPHAFOLD_DIR:?Error loading module}/bin/alphafold_runner.py" -v 0 "$@"
#!/bin/bash
#SBATCH -p gpu-week
#SBATCH -t 2-00:00:00
#SBATCH -J alphafold
#SBATCH --gres=gpu:1
#SBATCH -J alphafold
#SBATCH -n 1
#SBATCH -c 10
# Alphafold submission script for the merlin cluster
# Usage: sbatch [slurm_opts] alphafold_merlin.sh [options] fasta_file
#
# OPTIONS
# All alphafold options are set automatically, but can be overwritten.
# Some common options:
#
# --max_template_date=YYYY-MM-DD (default: today)
# --output_dir (default: current directory)
# --helpfull List all options
#
# 2021-12-22 Spencer Bliven, D.Ozerov
#
export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data_2.1.1/versions/latest
# Need at least rc6 to see alphafold
PMODULES_VERSION=1.0.0rc10;
source /opt/psi/config/profile.bash;
module --version
module purge
module use MX unstable Programming
module load alphafold/2.1.1
conda activate "${ALPHAFOLD_ENV:?"Error: ALPHAFOLD_ENV not set. Try 'module use MX unstable; module load alphafold'"}"
echo "hostname=$(hostname)"
echo "python=$(which python)"
echo "ALPHAFOLD_DATA=$(realpath "$ALPHAFOLD_DATA")"
python "${ALPHAFOLD_DIR:?Error loading module}/bin/alphafold_runner.py" -v 0 "$@"
#!/usr/bin/env python
"""
Wrapper script for Alphafold 2, with automatic setting of common options
usage: python alphafold_runner.py [alphafold options] input.fa
"""
import sys
import os
import importlib
import subprocess
import logging
import argparse
from datetime import date
from pathlib import Path
from typing import Union
from absl import app
from absl.flags import FLAGS
from absl import logging
def import_alphafold():
"Import run_alphafold.py from ALPHAFOLD_HOME"
home = os.environ.get('ALPHAFOLD_HOME', str(Path(__file__).parent.resolve("../alphafold")))
sys.path.append(home)
try:
return importlib.import_module("run_alphafold")
except ImportError:
sys.stderr.write(f"Unable to find run_alphafold.py\n")
sys.stderr.write(f"path:{', '.join(sys.path)}")
sys.exit(1)
af = import_alphafold()
def multi_fasta(fasta_path):
entries = 0
with open(fasta_path, 'r') as fasta:
for line in fasta:
if line and line[0] == '>':
entries += 1
if entries > 1:
return True
return False
def guess_model_preset(fasta_paths):
if any(multi_fasta(f) for f in fasta_paths):
logging.info("Input appears to be multimer")
return "multimer"
logging.info("Input appears to be monomer")
return "monomer"
def main(argv):
"""Set some option defaults and then call alphafold's main method
Most alphafold options have defaults set automatically:
- database files are set from the ALPHAFOLD_DATA variable or the --data_dir option
(assuming the versioned layout, which differs slightly from the default)
- `--model_preset` is set to either monomer or multimer depending on the number of sequences in the fasta file
- `--max_template_date` defaults to the current date
"""
if len(argv) > 2:
raise app.UsageError('Too many command-line arguments.')
# Accept positional fasta_paths
if len(argv) > 1:
if FLAGS["fasta_paths"].present:
raise app.UsageError("Both the --fasta_paths option and a fasta file argument were given")
FLAGS["fasta_paths"].parse(argv[1])
elif not FLAGS.fasta_paths:
raise app.UsageError("No fasta file specified")
# Database flags
if FLAGS["data_dir"].present:
data_dir = FLAGS.data_dir
elif "ALPHAFOLD_DATA" in os.environ:
data_dir = os.environ["ALPHAFOLD_DATA"]
logging.info(f"Using ALPHAFOLD_DATA={data_dir}")
FLAGS['data_dir'].value = data_dir
else:
raise app.UsageError("Specify --data_dir or set ALPHAFOLD_DATA")
if not FLAGS["model_preset"].present:
FLAGS.model_preset = guess_model_preset(FLAGS.fasta_paths)
use_small_bfd = FLAGS.db_preset == 'reduced_dbs'
if use_small_bfd:
if not FLAGS.small_bfd_database_path:
FLAGS.small_bfd_database_path = os.path.join(data_dir, "small_bfd", "bfd-first_non_concensus_sequences.fasta")
else:
if not FLAGS.bfd_database_path:
FLAGS.bfd_database_path = os.path.join(data_dir, "bfd", "bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt")
if not FLAGS.uniclust30_database_path:
FLAGS.uniclust30_database_path = os.path.join(data_dir, "uniclust30", "uniclust30_2018_08")
run_multimer_system = 'multimer' in FLAGS.model_preset
if run_multimer_system:
if not FLAGS.pdb_seqres_database_path:
FLAGS.pdb_seqres_database_path = os.path.join(data_dir, "pdb", "pdb_seqres.txt")
if not FLAGS.uniprot_database_path:
FLAGS.uniprot_database_path = os.path.join(data_dir, "uniprot", "uniprot.fasta")
else:
if not FLAGS.pdb70_database_path:
FLAGS.pdb70_database_path = os.path.join(data_dir, "pdb70", "pdb70")
if not FLAGS.mgnify_database_path:
FLAGS.mgnify_database_path = os.path.join(data_dir, "mgnify", "mgy_clusters_2018_12.fa")
if not FLAGS.obsolete_pdbs_path:
FLAGS.obsolete_pdbs_path = os.path.join(data_dir, "pdb", "obsolete.dat")
if not FLAGS.template_mmcif_dir:
FLAGS.template_mmcif_dir = os.path.join(data_dir, "pdb", "mmcif_files")
if not FLAGS.uniref90_database_path:
FLAGS.uniref90_database_path = os.path.join(data_dir, "uniprot", "uniref90.fasta")
if not FLAGS.output_dir:
FLAGS.output_dir = os.getcwd()
if not FLAGS.max_template_date:
FLAGS["max_template_date"].parse(date.today().isoformat())
af.main(argv[0:1])
if __name__ == "__main__":
app.run(main)
#!/bin/bash
#SBATCH -p gpu
#SBATCH -J alphafold
#SBATCH -M gmerlin6
#SBATCH --gpus=1
#SBATCH -n 1
#SBATCH -c 10
# Alphafold submission script for the merlin cluster
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
#
# Output will be in the same directory as the fasta_file.
# Slurm logs will be in the current directory.
#
# 2021-08-09 Spencer Bliven, D.Ozerov
#
export ALPHAFOLD_DATA=/data/project/bio/shared/alphafold
module purge
module use MX unstable
module load alphafold/ALPHAFOLD_VERSION
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"
#!/bin/bash
#SBATCH -p day
#SBATCH -t 1-00:00:00
#SBATCH -J alphafold
#SBATCH -n 1
#SBATCH -c 10
# Alphafold submission script for the ra cluster
# Usage: sbatch [slurm_opts] $ALPHAFOLD_DIR/bin/submit_merlin.sh fasta_file [max_template_date]
#
# Output will be in the same directory as the fasta_file.
# Slurm logs will be in the current directory.
#
# 2021-08-09 Spencer Bliven, D.Ozerov
#
export ALPHAFOLD_DATA=/das/work/common/opt/alphafold/data
# Need at least rc6 to see alphafold
PMODULES_VERSION=1.0.0rc10;
source /opt/psi/config/profile.bash;
module --version
module purge
module use MX unstable Programming
module load alphafold/ALPHAFOLD_VERSION
module list
exec "${ALPHAFOLD_DIR:?Error loading module}/bin/submit.sh" "$@"
......@@ -27,14 +27,12 @@ pbuild::install() {
fi
git clone --depth=1 -b "$BRANCH" https://github.com/deepmind/alphafold.git "$ALPHAFOLD_HOME" || return $?
if ! [ -f "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" ]; then
wget -q -P "$ALPHAFOLD_HOME/alphafold/common/" \
--no-check-certificate `# wget root certs are old` \
curl -fLsS -o "$ALPHAFOLD_HOME/alphafold/common/stereo_chemical_props.txt" \
https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt
fi
wget -O "$ALPHAFOLD_HOME/run_alphafold.sh" \
https://raw.githubusercontent.com/kalininalab/alphafold_non_docker/main/run_alphafold.sh
chmod +x "$ALPHAFOLD_HOME/run_alphafold.sh"
cp -r "$BUILDBLOCK_DIR/bin" "$PREFIX/"
sed -i "s/ALPHAFOLD_VERSION/$V/g" "$PREFIX/bin/"*
......
alphafold/2.0.0-b88f8da unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
alphafold/2.0.1 stable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
alphafold/2.1.1 unstable anaconda/2019.07 b:gcc/10.3.0 cuda/11.0.3
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment