#########1.SNP heritability estimation using GCTA#########
####The overview and manual of this software is available at https://cnsgenomics.com/software/gcta/#Tutorial
~/gcta64 --grm ~/grm_adj --reml --pheno ~/pheno.phen --mpheno 1 --covar ~/cov.covar   --qcovar ~/cov.qcovar   --reml-lrt  1 --out ~/h2_pheno_1


#########2.BLUP analysis using GCTA#########
####The overview and manual of this software is available at https://cnsgenomics.com/software/gcta/#Tutorial
~/gcta64 --threads 10 --grm ~/grm_adj --reml --pheno  ~/pheno.phen  --mpheno 1  --covar ~/cov.covar   --qcovar ~/cov.qcovar  --reml-lrt  1 --reml-pred-rand --out ~/blup_pheno_1

~/gcta64 --threads 10 --bfile ~/ukb_bfile  --blup-snp  ~/blup_pheno_1.indi.blp --out ~/pheno_1_gblup

#########3.GWAS analysis using fastGWA#########
####The overview and manual of this software is available at https://cnsgenomics.com/software/gcta/#fastGWA
~/gcta64 --bfile --bfile ~/ukb_bfile --grm-sparse ~/ukb_sp  --fastGWA-mlm --est-vg REML  --pheno ~/pheno.phen --mpheno 1 --covar ~/cov.covar   --qcovar ~/cov.qcovar  --out ~/pheno_1


#########4.Post0GWAS analysis using FUMA#########
####We use the FUMA online tool https://fuma.ctglab.nl/ to perform post-GWAS analysis ("SNP2GENE" function)
####Below are parameters we used in FUMA analysis
####Section I: Upload input files
Chromosome: CHR
Position: POS
rsID: SNP
P-value: P
Effect allele: A1
Beta: BETA
#Use default value for all other settings
####Section II: Parameters for lead SNPs and candidate SNPs identification
Sample size (N): Column name for N per SNP (text): N
Minimum P-value of lead SNPs (<):2.3e-10 
#Use default value for all other settings 
#Section III-1: Gene Mapping (positional mapping)
Perform positional mapping: YES 
Tissue/cell types for 15-core chromatin state: Brain (13)
Additional annotations: PsychENCODE (6) and Brain Open Chromatin Atlas (28)
#Use default value for all other settings
#Section III-2: Gene Mapping (eQTL mapping)
Perform eQTL mapping: YES 
Tissue types: PsychENCODE (1); ComminMind Consortium (4), BRAINEAC (11), GTEx v8 Brain (13). (Be sure to select all of the four tissue types)
Tissue/cell types for 15-core chromatin state: Brain (13)
Additional annotations: PsychENCODE (6) and Brain Open Chromatin Atlas (28)
#Use default value for other settings
#Section III-3: Gene Mapping (3D Chromatin Interaction mapping)
Perform chromatin interaction mapping: YES 
Builtin chromatin interaction data: 
GSE87112: Dorsolateral prefrontal cortex, 
GSE87112: Hippocampus, 
Giusti-Rodríguez et al.2019: Adult cortex 
Giusti-Rodríguez et al.2019: Fetal cortex. 
Annotate enhancer/promoter regions (Roadmap 111 epigenomes): Brain (12)
Tissue/cell types for 15-core chromatin state: Brain (13)
Additional annotations: PsychENCODE (6) and Brain Open Chromatin Atlas (28)
#Use default value for other settings
#Section IV: Gene types
#Use default value for all settings
#Section V: MHC region
#Use default value for all settings
#Section VI: MAGMA analysis
MAGMA gene expression analysis: (ALL)
GTEx v8: 30, GTEx v8: 54,
#Use default value for other settings

#########5.Genetic correlation analysis using LDSC#########
####The overview and manual of this software is available at https://github.com/bulik/ldsc
##obtain the needed columns from fastGWA result
awk 'NR>1{{print $2, $4, $5, $6, $8/$9, $10}}' < ~/pheno_1.fastGWA > ~/ldsc_1.txt
##merge with the list of SNPs provided by LDSC
awk 'NR==FNR{a[$1,$1];next} ($1,$1) in a' ~/w_hm3.noMHC.nohead.snplist ~/ldsc_1.txt  > ~/ldsc_1_hm3.txt
##add the header
awk 'BEGIN{print "snpid A1 A2 N Zscore P-value"}{print}' ~/ldsc_1_hm3.txt > ~/ldsc_1_hm3_hd.txt
##munge the summary statistics 
~/munge_sumstats.py \
--sumstats ~/ldsc_1_hm3_hd.txt \
--out ~/ldsc_1 \
--merge-alleles ~/w_hm3.snplist
##estimate the genetic correlation with other complex trait
~/ldsc.py \
--rg ~/ldsc_1.sumstats.gz,~/other_trait.sumstats.gz  \
--ref-ld-chr ~/eur_w_ld_chr/ \
--w-ld-chr ~/eur_w_ld_chr/ \
--out ~/ldsc_1


#########6.Partitioned heritability analysis using LDSC#########
####The overview and manual of this software is available at https://github.com/bulik/ldsc/wiki/Partitioned-Heritability
#Calculate LD Score
chr=1
~/ldsc.py --l2 --bfile ~/1000G_plinkfiles/1000G.mac5eur.$chr \
--ld-wind-cm 1 --annot ~/ldsc_annot/$chr.annot.gz --out ~/ldsc_annot/$chr \
--print-snps ~/hapmap3_snps/hm.$chr.snp 

#Calculate Partitioned heritability
~/ldsc.py \
--h2 ~/ldsc_1.sumstats.gz \
--out ~/ldsc_2 \
--frqfile-chr ~/1000G_frq/1000G.mac5eur. \
--overlap-annot --ref-ld-chr ~/ldsc_annot,~/baseline. \
--w-ld-chr ~/weights_hm3_no_hla/weights. --print-coefficients

#########7.Gene-level association test using MAGMA#########
####The overview and manual of this software is available at https://ctg.cncr.nl/software/magma
~/magma --bfile ~/g1000_eur \
--pval ~/pheno_1.fastGWA use=SNP,P ncol=N \
--gene-annot ~/MAGMAdefault.genes.annot  --out ~/pheno_1

#########8.Gene-set enrichment analysis using DEPICT#########
####The overview and manual of this software is available at https://github.com/perslab/depict
##obtain the columns needed for DEPICT analysis
awk 'NR>0{print $2,$1,$3,$10}' ~/pheno_1.fastGWA >  ~/pheno_1.dep
sed 's/ /\t/g' ~/pheno_1.dep > ~/pheno_1.dep
##write the setup file needed by DEPICT
cat >> run_depict_1 <<EOL

[PATHS]

analysis_path: ~/output/


[GWAS FILE SETTINGS]

gwas_summary_statistics_file: ~/pheno_1.dep

association_pvalue_cutoff:  5e-5

label_for_output_files: pheno_1

pvalue_col_name: P

marker_col_name: 
  
chr_col_name: CHR

pos_col_name: POS

separator: tab


[PLINK SETTINGS]

plink_executable: ~/plink

genotype_data_plink_prefix: ~/CEU_GBR_TSI_unrelated.phase1_release_v3.20101123.snps_indels_svs.genotypes_noduplicates


[DEPICT SETTINGS]

# The following three steps are need to construct DEPICT loci based on your GWAS summary statistics
step_construct_depict_loci: yes

# The following step is needed to perform DEPICT gene prioritization
step_depict_geneprio: yes

# The following step is needed to perform DEPICT gene set enrichment
step_depict_gsea: yes

# The following step is needed to perform DEPICT tissue enrichment analysis
step_depict_tissueenrichment: yes


[MISC SETTINGS]

# Number of threads used by the DEPICT Java binary
number_of_threads: 12

# Java heap size in mega bytes
heap_size_in_mb: 16000

# Precomputed loci for each 1000 Genomes project SNPs
collection_file: data/collections/ld0.5_collection_1000genomespilot_depict_150429.txt.gz

# The reconstituted gene set files used by DEPICT
reconstituted_genesets_file: data/reconstituted_genesets/reconstituted_genesets_150901.binary

# The tissue expression matrix
tissue_expression_file: data/tissue_expression/GPL570EnsemblGeneExpressionPerTissue_DEPICT20130820_z_withmeshheader.txt

# Mapping from tissue/cell type identifier to tissue name and information
tissue_mapping_file: data/tissue_expression/GPL570EnsemblGeneExpressionPerTissue_DEPICT20130820_z_withmeshheader_mapping.txt

# Gene annotation file
depict_gene_annotation_file: data/mapping_and_annotation_files/GPL570ProbeENSGInfo+HGNC_reformatted.txt

# Number of genes to report for each reconstituted gene set
max_top_genes_for_gene_set: 10

# Number of repititions used to compute false discovery rates
nr_repititions: 50

# Number of permutations used to adjust for biases such as gene length
nr_permutations: 500

# Boundaries of HLA region (Should not be changed)
mhc_start_bp: 25000000
mhc_end_bp: 35000000

# Directory with precomputed background files
background_data_path: data/backgrounds

# Mapping from gene set identifiers to gene set names
go_mapping_file: data/mapping_and_annotation_files/GO.terms_alt_ids_withoutheader.tab
mgi_mapping_file: data/mapping_and_annotation_files/VOC_MammalianPhenotype.rpt
inweb_mapping_file: data/mapping_and_annotation_files/inweb_mapping.tab

# Files used to output eQTL column in gene prioritization result file
eqtl_mapping_file: data/mapping_and_annotation_files/2012-08-08-IlluminaAll96PercentIdentity-ProbeAnnotation-ProbesWithWrongMappingLengthFilteredOut-EnsemblAnnotation.txt
eqtl_file: data/mapping_and_annotation_files/eQTLProbesFDR0.05.txt

# Prioritize genes across entire genome
prioritize_genes_outside_input_loci: no

# Chromosome to be left-out, leave empty if all chromosomes should be included (for bencharmking purposes)
leave_out_chr:
  
# Number of null GWAS used to run permutations (bias adjustment) and repitions (FDR calcuations)
number_random_runs: 500

# Threshold used when clumping null GWAS
background_plink_clumping_pvalue: 0.0005

#######run DEPICT with the setup file 

~/depict.py ~/run_depict_1 





