Jovialis
Advisor
- Messages
- 9,888
- Reaction score
- 6,794
- Points
- 113
- Ethnic group
- Italian
- Y-DNA haplogroup
- R1b-PF7566>Y227216
- mtDNA haplogroup
- H6a1b7
I hope I am doing this right!
Code:
# Full Rotating qpAdm Protocol à la Harney et al. (2021)
# ADMIXTOOLS 2 (v2.0.8) with integrated outgroup pre-screening
# Uses per-quartet SNP selection (allsnps = TRUE)
library(admixtools)
library(tidyverse)
# I) OUTGROUP PRE-SCREENING ---------------------------------------------------
# A) Data preparation (AADR HO eigenstrat)
prefix <- "C:/Users/jovialis/Documents/Bioinformatics/Jovialis_HO_merge/merged_HO"
f2_dir <- "C:/Users/jovialis/Documents/Bioinformatics/Jovialis_HO_merge/f2_blocks"
# Define populations for f2 precomputation: target + sources
left_pops <- c(
"Jovialis", # Target
"Russia_Samara_EBA_Yamnaya.AG", #Source
"Luxembourg_Mesolithic.AG", #Source
"Turkey_Marmara_Barcin_N.AG", #Source
"Iran_GanjDareh_N.AG" #Source
)
overwrite_f2 <- TRUE
blgsize <- 0.05 # jackknife block size
base_out <- "Mbuti.DG" # fixed base outgroup
# Candidate outgroups to test
outgroup_cand <- c(
"Ethiopia_4500BP.AG", "Serbia_IronGates_Mesolithic.SG",
"Spain_Magdalenian_contam.AG", "Spain_Mesolithic_brother.I0585.AG",
"Russia_AfontovaGora3_UP.AG","Russia_MA1_UP.SG",
"Russia_UstIshim_IUP.DG","Russia_Kostenki14_UP.SG",
"Russia_YuzhniyOleniyOstrov_Mesolithic.AG",
"Belgium_GoyetQ116_1_UP.AG","Italy_Epigravettian.AG.BY.AA",
"Czechia_Gravettian.AG.BY.AA","Georgia_Kotias_Mesolithic.SG",
"Georgia_Satsurblia_LateUP.SG","Jordan_PPNB.AG",
"Turkey_Central_Pinarbasi_Epipaleolithic.AG",
"Israel_Natufian.AG","Morocco_Iberomaurusian.AG"
)
# Combine for f2 extraction
mypops <- c(left_pops, base_out, outgroup_cand)
# Extract and load f2-blocks
extract_f2(
prefix, f2_dir,
pops = mypops,
overwrite = overwrite_f2,
auto_only = TRUE,
blgsize = blgsize
)
f2_blocks <- f2_from_precomp(
f2_dir,
pops = mypops,
afprod = TRUE
)
# B) Screen outgroup candidates via f4(Left_i, Left_j; cand, base)
# Compute max |Z| per candidate
z_table <- map_dfr(outgroup_cand, \(cand) {
stats <- qpdstat(
f2_blocks,
pop1 = left_pops, pop2 = left_pops,
pop3 = cand, pop4 = base_out,
unique_only = TRUE, comb = TRUE
) %>% filter(pop1 != pop2)
tibble(cand = cand, max_z = max(abs(stats$z), na.rm = TRUE))
})
# Determine informative candidates (|Z| > 3)
informative_outs <- z_table %>% filter(max_z > 3) %>% pull(cand)
# Ensure at least 5 outgroups (including base); fill with top-Z if needed
if(length(informative_outs) < 4) {
warning("Fewer than 5 outgroups: filling with top-Z candidates")
fill <- z_table %>% arrange(desc(max_z)) %>% slice_head(n = 4) %>% pull(cand)
informative_outs <- unique(fill)
}
# Final Right set for Phase 1 (pre-screened)
right0 <- c(base_out, informative_outs)
if(length(right0) < 5) stop("Could not assemble at least 5 outgroups after prescreening")
cat("\nPre-screened outgroups (right0):\n")
print(right0)
# II) CONFIGURATION -----------------------------------------------------------
# Separate target and sources for qpAdm
target <- left_pops[1]
left0 <- left_pops[-1]
# PHASE 1: Full-model qpAdm → baseline p-value + diagnostics
full1 <- qpadm(
prefix,
left = left0,
right = right0,
target = target,
blgsize = blgsize,
auto_only = TRUE,
allsnps = TRUE,
verbose = TRUE,
return_f4 = TRUE
)
p_full1 <- full1$popdrop$p[1]
cat("Phase 1 p-value:", round(p_full1, 3), "\n")
print(full1$weights)
print(full1$popdrop)
print(full1$details)
# PHASE 2: Drop-one outgroup rotation → prune on p-value only
phase2 <- map_dfr(
setdiff(right0, base_out),
\(og) {
r2 <- qpadm(
prefix,
left = left0,
right = setdiff(right0, og),
target = target,
blgsize = blgsize,
auto_only = TRUE,
allsnps = TRUE,
verbose = FALSE,
return_f4 = FALSE
)
tibble(outgroup = og, p_removed = r2$popdrop$p[1])
}
) %>% mutate(delta = p_removed - p_full1) %>% arrange(delta)
cat("\nPhase 2 summary:\n")
print(phase2)
# Phase 2 kept outgroups (delta < 0)
keep_ogs2 <- c(base_out, phase2 %>% filter(delta < 0) %>% pull(outgroup))
# Ensure at least 5 outgroups; fill with best deltas if needed
if(length(keep_ogs2) < 5) {
warning("Fewer than 5 outgroups after Phase 2: filling with best-delta candidates")
extras <- phase2 %>% arrange(delta) %>% slice_head(n = 4) %>% pull(outgroup)
keep_ogs2 <- unique(c(base_out, extras))
}
cat("\nPhase 2 kept outgroups (keep_ogs2):\n")
print(keep_ogs2)
# PHASE 2b: Full-model qpAdm with Phase 2 outgroups
cat("\nPHASE 2b: Full-model qpAdm with Phase 2 outgroups\n")
test2 <- qpadm(
prefix,
left = left0,
right = keep_ogs2,
target = target,
blgsize = blgsize,
auto_only = TRUE,
allsnps = TRUE,
verbose = TRUE,
return_f4 = TRUE
)
cat("Phase 2b p-value:", round(test2$popdrop$p[1], 3), "\n")
print(test2$weights)
print(test2$popdrop)
print(test2$details)
# PHASE 3: Source rotation (model competition)
results3 <- map_dfr(
left0,
\(held) {
r3 <- qpadm(
prefix,
left = setdiff(left0, held),
right = unique(c(keep_ogs2, held)),
target = target,
blgsize = blgsize,
auto_only = TRUE,
allsnps = TRUE,
verbose = FALSE,
return_f4 = FALSE
)
tibble(held = held, pval = r3$popdrop$p[1])
}
) %>% mutate(delta = pval - p_full1) %>% arrange(delta)
cat("\nPhase 3 summary:\n")
print(results3)
final_left <- results3 %>% filter(delta < 0) %>% pull(held)
if(length(final_left) == 0) {
message("Phase 3 dropped no sources → keeping original left0")
final_left <- left0
}
# PHASE 4: Final outgroup drop-one rotation
phase4 <- map_dfr(
setdiff(keep_ogs2, base_out),
\(og) {
r4 <- qpadm(
prefix,
left = final_left,
right = setdiff(keep_ogs2, og),
target = target,
blgsize = blgsize,
auto_only = TRUE,
allsnps = TRUE,
verbose = FALSE,
return_f4 = FALSE
)
tibble(outgroup = og, p_removed = r4$popdrop$p[1])
}
) %>% mutate(delta = p_removed - p_full1) %>% arrange(delta)
cat("\nPhase 4 summary:\n")
print(phase4)
keep_ogs3 <- c(base_out, phase4 %>% filter(delta < 0) %>% pull(outgroup))
if(length(keep_ogs3) < 5) {
warning("Fewer than 5 outgroups after Phase 4: filling with best-delta candidates")
extras4 <- phase4 %>% arrange(delta) %>% slice_head(n = 4) %>% pull(outgroup)
keep_ogs3 <- unique(c(base_out, extras4))
}
cat("\nPhase 4 kept outgroups (keep_ogs3):\n")
print(keep_ogs3)
# FINAL RUN: Leanest qpAdm model → full diagnostics
final <- qpadm(
prefix,
left = final_left,
right = keep_ogs3,
target = target,
blgsize = blgsize,
auto_only = TRUE,
allsnps = TRUE,
verbose = TRUE,
return_f4 = TRUE
)
cat("\nFinal p-value:", round(final$popdrop$p[1], 3), "\n")
print(final$weights)
print(final$popdrop)
print(final$details)
Last edited: