# Step 1: Define file paths
prefix <- "D:\\Bioinformatics\\Jovialis_1240k_merge\\merged_1240k"
my_f2_dir <- "D:\\Bioinformatics\\Jovialis_1240k_merge\\my_f2_dir_merged_1240k"
# Step 2: Load required libraries
library(admixtools)
library(tidyverse)
# Step 3: Define target and population lists
target <- c('Jovialis')
left <- c('Italy_Sicily_Himera_480BCE_NEurope.AG', 'Italy_Sicily_Himera_480BCE_Greek.AG')
right <- c("Ethiopia_4500BP.AG", "Russia_YuzhniyOleniyOstrov_Mesolithic.AG", "Luxembourg_Mesolithic.AG", "Georgia_Kotias_Mesolithic.SG", "Israel_Natufian.AG")
# Step 4: Combine all populations for f2 statistics
mypops <- c(target, left, right)
# Step 5: Generate f2 statistics for qpWave
cat("\nExtracting f2 statistics...\n")
extract_f2(prefix, my_f2_dir, pops = mypops, overwrite = TRUE, maxmiss = 1, blgsize = 0.05)
f2_blocks <- f2_from_precomp(my_f2_dir, pops = mypops, afprod = TRUE)
# Step 6: Run qpWave to validate admixture model complexity
cat("\nRunning qpWave to check rank...\n")
qpwave_results <- qpwave(f2_blocks, left = left, right = right, verbose = TRUE)
# Save qpWave results to file
write.csv(qpwave_results$rankdrop, file = "qpwave_rankdrop_results.csv", row.names = FALSE)
cat("\nqpWave Rank Drop Results:\n")
print(qpwave_results$rankdrop)
# Step 7: Determine whether to proceed with qpAdm
min_p <- min(qpwave_results$rankdrop$p, na.rm = TRUE)
if (!is.na(min_p) && min_p > 0.05) {
cat("\nqpWave indicates that the source populations form a clade (p > 0.05). This suggests that the sources do not exhibit distinct ancestries.\n")
cat("\nqpAdm may not provide meaningful results in this case, but we will proceed for comparison.\n")
# Run qpAdm with allsnps = TRUE using genotype files
results <- qpadm(prefix, left, right, target, allsnps = TRUE)
cat("\nqpAdm Admixture Weights (allsnps = TRUE, may not be meaningful due to clade formation):\n")
print(results$weights)
cat("\nqpAdm Population Drop Results (allsnps = TRUE):\n")
print(results$popdrop)
# Save results to file
write.csv(results$weights, file = "qpadm_weights_clade.csv", row.names = FALSE)
write.csv(results$popdrop, file = "qpadm_popdrop_clade.csv", row.names = FALSE)
} else {
cat("\nqpWave indicates that the source populations do not form a clade (p ≤ 0.05). Proceeding with qpAdm to estimate admixture proportions.\n")
# Run qpAdm with allsnps = TRUE using genotype files
results_true <- qpadm(prefix, left, right, target, allsnps = TRUE)
cat("\nqpAdm Admixture Weights (allsnps = TRUE):\n")
print(results_true$weights)
cat("\nqpAdm Population Drop Results (allsnps = TRUE):\n")
print(results_true$popdrop)
# Save results to file
write.csv(results_true$weights, file = "qpadm_weights_allsnps_TRUE.csv", row.names = FALSE)
write.csv(results_true$popdrop, file = "qpadm_popdrop_allsnps_TRUE.csv", row.names = FALSE)
# Run qpAdm with allsnps = FALSE for comparison
results_false <- qpadm(f2_blocks, left, right, target, allsnps = FALSE)
cat("\nqpAdm Admixture Weights (allsnps = FALSE):\n")
print(results_false$weights)
cat("\nqpAdm Population Drop Results (allsnps = FALSE):\n")
print(results_false$popdrop)
# Save results to file
write.csv(results_false$weights, file = "qpadm_weights_allsnps_FALSE.csv", row.names = FALSE)
write.csv(results_false$popdrop, file = "qpadm_popdrop_allsnps_FALSE.csv", row.names = FALSE)
}
cat("\nAnalysis complete. Check the output CSV files for detailed results.\n")