{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# KLJR_180920 - OCI_JQ1res - chip_seq\n", "This notebook will create all the necessary files, scripts and folders to create trackhub for the aforementioned project. The end goal is to gather tracks of the ChIP-seq experiment and files that are necessary for most downstream analyses. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "### Trackhub" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Overwriting /data/reddylab/Hazel/troubleshoot/processing/chip_seq/scripts/create_chipseq_tracks.CAL51.py\n" ] } ], "source": [ "%%writefile /data/reddylab/Hazel/chipseq/KLJR_180920/processing/chip_seq/scripts/create_chipseq_tracks.OCI_JQ1res_H3K27me3.py\n", "#!/usr/bin/env python\n", "\n", "from trackhub import Hub, GenomesFile, Genome, TrackDb, Track, ViewTrack, \\\n", " SuperTrack, AggregateTrack, CompositeTrack, SubGroupDefinition\n", "import os\n", "from operator import add\n", "from palettable.colorbrewer.sequential import Reds_6_r, Blues_6_r, Greens_6_r, Purples_6_r, Oranges_6_r, Greys_6_r\n", "\n", "colors_palettes=[Purples_6_r, Oranges_6_r, Greens_6_r]\n", "\n", "OUTDIR='/data/reddylab/Hazel/chipseq/KLJR_180920/processing/chip_seq/trackhub/OCI_JQ1res'\n", "URLBASE = 'http://trackhub.genome.duke.edu/reddylab/'\n", "REMOTE_DATA_DIR='/nfs/trackhub/reddylab/validations/OCI_JQ1res/'\n", "LOCAL_DATA_DIR='/data/reddylab/Hazel/chipseq/KLJR_180920/processing/chip_seq/OCI_JQ1res-se-narrow/'\n", "GENOME = 'hg38'\n", "\n", "hub = Hub(\n", " hub='ChIP_seq_OCI_JQ1res_H3K27me3',\n", " short_label='ChIP_seq_OCI_JQ1res_H3K27me3',\n", " long_label='ChIP_seq_OCI_JQ1res_H3K27me3',\n", " email='xa2@duke.edu')\n", "\n", "hub.local_fn = '%s/hub.txt' % OUTDIR\n", "genomes_file = GenomesFile()\n", "genome = Genome(GENOME)\n", "trackdb = TrackDb()\n", "trackdb.local_fn = '%s/%s/trackDb.txt' % (OUTDIR, GENOME)\n", "\n", "# OCI_AML2.H3K27me3.CST.rep1.masked.dedup.sorted.rpkm.bw\n", "factors = [\n", " 'Parental','Res'\n", "]\n", "replicates = 3\n", "tracks = []\n", "supertrack = SuperTrack(\n", " name=\"OCI_JQ1res_H3K27me3_tracks\",\n", " short_label=\"OCI_JQ1res_H3K27me3_tracks\",\n", " long_label=\"OCI_JQ1res_H3K27me3_tracks\")\n", "\n", "fi = 0 \n", "\n", "for fi, factor in enumerate(factors):\n", " aggregate_track = AggregateTrack(\n", " name=\"aggregated%s\" % (factor),\n", " short_label=\"%02d_%s\" % (fi, factor),\n", " long_label=\"%02d_%s\" % (fi, factor),\n", " tracktype='bigWig',\n", " showSubtrackColorOnUi='on',\n", " visibility='full',\n", " autoScale='on',\n", " maxHeightPixels='100:32:8',\n", " alwaysZero='on',\n", " aggregate='transparentOverlay',\n", " subgroups={'factor': factor}\n", " )\n", " for rep in xrange(1, replicates+1):\n", " if 'input' in factor and rep > 1: \n", " break\n", " sample = \"CAL51.p300.%s.rep%d.masked.dedup.sorted.rpkm.bw\" % (factor, rep)\n", " sample_name = \"%02d_%s_rep%d\" % (fi, factor, rep)\n", " tr = Track(\n", " name=sample_name,\n", " short_label=sample_name,\n", " long_label=sample_name,\n", " local_fn=os.path.join(LOCAL_DATA_DIR, sample),\n", " remote_fn=os.path.join(REMOTE_DATA_DIR, sample),\n", " url=os.path.join(URLBASE, \n", " 'validations',\n", " 'CAL51', \n", " GENOME,\n", " 'data',\n", " sample),\n", " tracktype='bigWig',\n", " color=','.join([str(cc) for cc in colors_palettes[fi].colors[1]]),\n", " visibility='full',\n", " maxHeightPixels='100:32:8',\n", " )\n", " aggregate_track.add_subtrack(tr)\n", " \n", " # Add peaks in bigBed format\n", " sample = sample.replace(\"sorted.rpkm.bw\", \"sorted_peaks.trunked_scores.broadPeak.bb\")\n", " bigbed_peaks = Track(\n", " name=\"%s_peaks\" % sample_name,\n", " short_label=\"%s_peaks\" % sample_name,\n", " long_label=\"%s_peaks\" % sample_name,\n", " url=os.path.join(URLBASE, \n", " 'validations',\n", " 'CAL51', \n", " GENOME,\n", " 'data',\n", " sample),\n", " tracktype='bigBed 6 .',\n", " visibility='dense',\n", " color='0,0,128'\n", " )\n", " supertrack.add_track(bigbed_peaks)\n", " supertrack.add_track(aggregate_track)\n", "\n", "\n", "print supertrack\n", "trackdb.add_tracks(supertrack)\n", "\n", "genome.add_trackdb(trackdb)\n", "genomes_file.add_genome(genome)\n", "hub.add_genomes_file(genomes_file)\n", "\n", "hub.render()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Submitted batch job 2630590\n" ] } ], "source": [ "%%bash\n", "source /data/reddylab/software/miniconda2/bin/activate alex\n", "mkdir -p /data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51/hg38/data\n", "sbatch -o /data/reddylab/Hazel/troubleshoot/processing/chip_seq/logs/trackhub.CAL51.out \\\n", " -p new,all \\\n", " --wrap=\"python /data/reddylab/Hazel/troubleshoot/processing/chip_seq/scripts/create_chipseq_tracks.CAL51.py\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%%bash\n", "cd /data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51/hg38/data\n", "ln -s /data/reddylab/Hazel/troubleshoot/processing/chip_seq/CAL51_p300_ts-se/*rep*.masked.dedup.sorted.rpkm.bw ./\n", "ln -s /data/reddylab/Hazel/troubleshoot/processing/chip_seq/CAL51_p300_ts-se/*rep*.trunked_scores.broadPeak.bb ./" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg38&hubUrl=http://trackhub.genome.duke.edu/reddylab/validations/CAL51/hub.txt" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sending incremental file list\n", "ChIP_seq_CAL51_p300.genomes.txt\n", "hub.txt\n", "hg38/trackDb.txt\n", "\n", "sent 2074 bytes received 107 bytes 1454.00 bytes/sec\n", "total size is 171562398 speedup is 78662.26\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Pseudo-terminal will not be allocated because stdin is not a terminal.\r\n" ] } ], "source": [ "%%bash\n", "ssh hardac-xfer.genome.duke.edu\n", "cd /data/reddylab/Hazel/troubleshoot/processing/chip_seq/trackhub/CAL51\n", "rsync -rvz --copy-links -e ssh --update \\\n", " * \\\n", " trackhub.genome.duke.edu:/nfs/trackhub/reddylab/validations/CAL51\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 1 }