• Don't want to see ads? Install an adblocker like uBlock Origin or use a Europe-based privacy-friendly browser like Vivaldi or Mullvad.

Admixtools Full Rotating qpAdm Protocol à la Harney et al. (2021)

Jovialis

Advisor
Messages
9,888
Reaction score
6,794
Points
113
Ethnic group
Italian
Y-DNA haplogroup
R1b-PF7566>Y227216
mtDNA haplogroup
H6a1b7
I hope I am doing this right!

Code:
# Full Rotating qpAdm Protocol à la Harney et al. (2021)
# ADMIXTOOLS 2 (v2.0.8) with integrated outgroup pre-screening
# Uses per-quartet SNP selection (allsnps = TRUE)

library(admixtools)
library(tidyverse)

# I) OUTGROUP PRE-SCREENING ---------------------------------------------------
# A) Data preparation (AADR HO eigenstrat)
prefix <- "C:/Users/jovialis/Documents/Bioinformatics/Jovialis_HO_merge/merged_HO"
f2_dir <- "C:/Users/jovialis/Documents/Bioinformatics/Jovialis_HO_merge/f2_blocks"

# Define populations for f2 precomputation: target + sources
left_pops <- c(
    "Jovialis",                     # Target
    "Russia_Samara_EBA_Yamnaya.AG", #Source
    "Luxembourg_Mesolithic.AG",     #Source
    "Turkey_Marmara_Barcin_N.AG",   #Source
    "Iran_GanjDareh_N.AG"           #Source
)
overwrite_f2 <- TRUE
blgsize <- 0.05                      # jackknife block size
base_out <- "Mbuti.DG"            # fixed base outgroup

# Candidate outgroups to test
outgroup_cand <- c(
    "Ethiopia_4500BP.AG", "Serbia_IronGates_Mesolithic.SG",
    "Spain_Magdalenian_contam.AG", "Spain_Mesolithic_brother.I0585.AG",
    "Russia_AfontovaGora3_UP.AG","Russia_MA1_UP.SG",
    "Russia_UstIshim_IUP.DG","Russia_Kostenki14_UP.SG",
    "Russia_YuzhniyOleniyOstrov_Mesolithic.AG",
    "Belgium_GoyetQ116_1_UP.AG","Italy_Epigravettian.AG.BY.AA",
    "Czechia_Gravettian.AG.BY.AA","Georgia_Kotias_Mesolithic.SG",
    "Georgia_Satsurblia_LateUP.SG","Jordan_PPNB.AG",
    "Turkey_Central_Pinarbasi_Epipaleolithic.AG",
    "Israel_Natufian.AG","Morocco_Iberomaurusian.AG"
)
# Combine for f2 extraction
mypops <- c(left_pops, base_out, outgroup_cand)

# Extract and load f2-blocks
extract_f2(
    prefix, f2_dir,
    pops      = mypops,
    overwrite = overwrite_f2,
    auto_only = TRUE,
    blgsize   = blgsize
)
f2_blocks <- f2_from_precomp(
    f2_dir,
    pops   = mypops,
    afprod = TRUE
)

# B) Screen outgroup candidates via f4(Left_i, Left_j; cand, base)
# Compute max |Z| per candidate
z_table <- map_dfr(outgroup_cand, \(cand) {
    stats <- qpdstat(
        f2_blocks,
        pop1 = left_pops, pop2 = left_pops,
        pop3 = cand,      pop4 = base_out,
        unique_only = TRUE, comb = TRUE
    ) %>% filter(pop1 != pop2)
    tibble(cand = cand, max_z = max(abs(stats$z), na.rm = TRUE))
})

# Determine informative candidates (|Z| > 3)
informative_outs <- z_table %>% filter(max_z > 3) %>% pull(cand)

# Ensure at least 5 outgroups (including base); fill with top-Z if needed
if(length(informative_outs) < 4) {
    warning("Fewer than 5 outgroups: filling with top-Z candidates")
    fill <- z_table %>% arrange(desc(max_z)) %>% slice_head(n = 4) %>% pull(cand)
    informative_outs <- unique(fill)
}

# Final Right set for Phase 1 (pre-screened)
right0 <- c(base_out, informative_outs)
if(length(right0) < 5) stop("Could not assemble at least 5 outgroups after prescreening")
cat("\nPre-screened outgroups (right0):\n")
print(right0)

# II) CONFIGURATION -----------------------------------------------------------
# Separate target and sources for qpAdm
target <- left_pops[1]
left0  <- left_pops[-1]

# PHASE 1: Full-model qpAdm → baseline p-value + diagnostics
full1 <- qpadm(
    prefix,
    left      = left0,
    right     = right0,
    target    = target,
    blgsize   = blgsize,
    auto_only = TRUE,
    allsnps   = TRUE,
    verbose   = TRUE,
    return_f4 = TRUE
)
p_full1 <- full1$popdrop$p[1]
cat("Phase 1 p-value:", round(p_full1, 3), "\n")
print(full1$weights)
print(full1$popdrop)
print(full1$details)

# PHASE 2: Drop-one outgroup rotation → prune on p-value only
phase2 <- map_dfr(
    setdiff(right0, base_out),
    \(og) {
        r2 <- qpadm(
            prefix,
            left      = left0,
            right     = setdiff(right0, og),
            target    = target,
            blgsize   = blgsize,
            auto_only = TRUE,
            allsnps   = TRUE,
            verbose   = FALSE,
            return_f4 = FALSE
        )
        tibble(outgroup = og, p_removed = r2$popdrop$p[1])
    }
) %>% mutate(delta = p_removed - p_full1) %>% arrange(delta)
cat("\nPhase 2 summary:\n")
print(phase2)

# Phase 2 kept outgroups (delta < 0)
keep_ogs2 <- c(base_out, phase2 %>% filter(delta < 0) %>% pull(outgroup))
# Ensure at least 5 outgroups; fill with best deltas if needed
if(length(keep_ogs2) < 5) {
    warning("Fewer than 5 outgroups after Phase 2: filling with best-delta candidates")
    extras <- phase2 %>% arrange(delta) %>% slice_head(n = 4) %>% pull(outgroup)
    keep_ogs2 <- unique(c(base_out, extras))
}
cat("\nPhase 2 kept outgroups (keep_ogs2):\n")
print(keep_ogs2)

# PHASE 2b: Full-model qpAdm with Phase 2 outgroups
cat("\nPHASE 2b: Full-model qpAdm with Phase 2 outgroups\n")
test2 <- qpadm(
    prefix,
    left      = left0,
    right     = keep_ogs2,
    target    = target,
    blgsize   = blgsize,
    auto_only = TRUE,
    allsnps   = TRUE,
    verbose   = TRUE,
    return_f4 = TRUE
)
cat("Phase 2b p-value:", round(test2$popdrop$p[1], 3), "\n")
print(test2$weights)
print(test2$popdrop)
print(test2$details)

# PHASE 3: Source rotation (model competition)
results3 <- map_dfr(
    left0,
    \(held) {
        r3 <- qpadm(
            prefix,
            left      = setdiff(left0, held),
            right     = unique(c(keep_ogs2, held)),
            target    = target,
            blgsize   = blgsize,
            auto_only = TRUE,
            allsnps   = TRUE,
            verbose   = FALSE,
            return_f4 = FALSE
        )
        tibble(held = held, pval = r3$popdrop$p[1])
    }
) %>% mutate(delta = pval - p_full1) %>% arrange(delta)
cat("\nPhase 3 summary:\n")
print(results3)

final_left <- results3 %>% filter(delta < 0) %>% pull(held)
if(length(final_left) == 0) {
    message("Phase 3 dropped no sources → keeping original left0")
    final_left <- left0
}

# PHASE 4: Final outgroup drop-one rotation
phase4 <- map_dfr(
    setdiff(keep_ogs2, base_out),
    \(og) {
        r4 <- qpadm(
            prefix,
            left      = final_left,
            right     = setdiff(keep_ogs2, og),
            target    = target,
            blgsize   = blgsize,
            auto_only = TRUE,
            allsnps   = TRUE,
            verbose   = FALSE,
            return_f4 = FALSE
        )
        tibble(outgroup = og, p_removed = r4$popdrop$p[1])
    }
) %>% mutate(delta = p_removed - p_full1) %>% arrange(delta)
cat("\nPhase 4 summary:\n")
print(phase4)

keep_ogs3 <- c(base_out, phase4 %>% filter(delta < 0) %>% pull(outgroup))
if(length(keep_ogs3) < 5) {
    warning("Fewer than 5 outgroups after Phase 4: filling with best-delta candidates")
    extras4 <- phase4 %>% arrange(delta) %>% slice_head(n = 4) %>% pull(outgroup)
    keep_ogs3 <- unique(c(base_out, extras4))
}
cat("\nPhase 4 kept outgroups (keep_ogs3):\n")
print(keep_ogs3)

# FINAL RUN: Leanest qpAdm model → full diagnostics
final <- qpadm(
    prefix,
    left      = final_left,
    right     = keep_ogs3,
    target    = target,
    blgsize   = blgsize,
    auto_only = TRUE,
    allsnps   = TRUE,
    verbose   = TRUE,
    return_f4 = TRUE
)
cat("\nFinal p-value:", round(final$popdrop$p[1], 3), "\n")
print(final$weights)
print(final$popdrop)
print(final$details)
 
Last edited:
Typically after phase 2 the outgroups seem sufficient. By the end, the selection seems over pruned imho. I had to factor in a safeguard to make sure it would leave at least 5 outgroups, which is what is recommend.

Basically what it is doing is dropping what ever outgroup that brings down the p-val. By you have to apply your knowledge and common sense to to make the right determination. For example, if it removes a critical outgroup that you are certain should be there, or leave a redundant outgroup.
 
Added a very crucial pre-screening step before phase 1 that prunes out the outgroup list based on Z score of >3.
 
Back
Top