# Load necessary libraries
library(ggplot2)
library(dplyr)
# Set working directory
setwd("D:/Bioinformatics/01_Admixtools_Dataset/V62.0_HO_Eigenstrat_Merged_Jovialis/W_Eurasia_Mod_aDNA")
# Read the eigenvalues
evals <- scan("projected.eval.txt", quiet = TRUE)
# Read the eigenvectors
evecs <- read.table("projected.evec.txt", header = FALSE, stringsAsFactors = FALSE)
# Extract individual IDs and population labels
individuals <- as.character(evecs$V1)
populations <- as.character(evecs$V12) # Adjust if your population labels are in a different column
# Extract the first two principal components and flip both axes
pc1 <- -as.numeric(evecs$V2) # Horizontal flip
pc2 <- -as.numeric(evecs$V3) # Vertical flip
# Create a data frame for plotting
pca_data <- data.frame(Individual = individuals, Population = populations, PC1 = pc1, PC2 = pc2, stringsAsFactors = FALSE)
# Remove rows with NA values (if any)
pca_data <- na.omit(pca_data)
# Define populations to highlight
highlighted_pops <- c(
"Jovialis", "Armenian.HO", "Iranian.HO", "Turkish.HO", "Albanian.HO", "Italian_North.HO",
"Bulgarian.HO", "Cypriot.HO", "Greek.HO", "Italian_South.HO", "Maltese.HO", "Sicilian.HO",
"Italian_Central.HO", "English.HO", "French.HO", "Icelandic.HO", "Norwegian.HO", "Orcadian.HO",
"Scottish.HO", "BedouinA.HO", "BedouinB.HO", "Jordanian.HO", "Palestinian.HO", "Saudi.HO",
"Syrian.HO", "Abkhasian.HO", "Adygei.HO", "Balkar.HO", "Chechen.HO", "Georgian.HO", "Kumyk.HO",
"Lezgin.HO", "Russia_NorthOssetian.HO", "Jew_Ashkenazi.HO", "Jew_Georgian.HO", "Jew_Iranian.HO",
"Jew_Iraqi.HO", "Jew_Libyan.HO", "Jew_Moroccan.HO", "Jew_Tunisian.HO", "Jew_Turkish.HO",
"Jew_Yemenite.HO", "Basque.HO", "Spanish.HO", "Spanish_North.HO", "Druze.HO", "Lebanese.HO",
"Belarusian.HO", "Croatian.HO", "Czech.HO", "Estonian.HO", "Hungarian.HO", "Lithuanian.HO",
"Ukrainian.HO", "IBS_CanaryIslands.DG", "Sardinian.HO", "Finnish.HO", "Mordovian.HO", "Russian.HO"
)
# Filter data to include only highlighted populations
pca_data <- pca_data %>% filter(Population %in% highlighted_pops)
# Assign groups for coloring and filling
pca_data <- pca_data %>%
mutate(
Group = case_when(
Population == "Jovialis" ~ "Jovialis",
Population == "Armenian.HO" ~ "Armenian",
Population == "Iranian.HO" ~ "Iranian",
Population == "Turkish.HO" ~ "Turkish",
Population == "Albanian.HO" ~ "Albanian",
Population == "Italian_North.HO" ~ "Italian_North",
Population == "Bulgarian.HO" ~ "Bulgarian",
Population == "Cypriot.HO" ~ "Cypriot",
Population == "Greek.HO" ~ "Greek",
Population == "Italian_South.HO" ~ "Italian_South",
Population == "Maltese.HO" ~ "Maltese",
Population == "Sicilian.HO" ~ "Sicilian",
Population == "Italian_Central.HO" ~ "Italian_Central",
Population == "English.HO" ~ "English",
Population == "French.HO" ~ "French",
Population == "Icelandic.HO" ~ "Icelandic",
Population == "Norwegian.HO" ~ "Norwegian",
Population == "Orcadian.HO" ~ "Orcadian",
Population == "Scottish.HO" ~ "Scottish",
Population == "BedouinA.HO" ~ "BedouinA",
Population == "BedouinB.HO" ~ "BedouinB",
Population == "Jordanian.HO" ~ "Jordanian",
Population == "Palestinian.HO" ~ "Palestinian",
Population == "Saudi.HO" ~ "Saudi",
Population == "Syrian.HO" ~ "Syrian",
Population == "Abkhasian.HO" ~ "Abkhasian",
Population == "Adygei.HO" ~ "Adygei",
Population == "Balkar.HO" ~ "Balkar",
Population == "Chechen.HO" ~ "Chechen",
Population == "Georgian.HO" ~ "Georgian",
Population == "Kumyk.HO" ~ "Kumyk",
Population == "Lezgin.HO" ~ "Lezgin",
Population == "Russia_NorthOssetian.HO" ~ "North_Ossetian",
Population == "Jew_Ashkenazi.HO" ~ "Jew_Ashkenazi",
Population == "Jew_Georgian.HO" ~ "Jew_Georgian",
Population == "Jew_Iranian.HO" ~ "Jew_Iranian",
Population == "Jew_Iraqi.HO" ~ "Jew_Iraqi",
Population == "Jew_Libyan.HO" ~ "Jew_Libyan",
Population == "Jew_Moroccan.HO" ~ "Jew_Moroccan",
Population == "Jew_Tunisian.HO" ~ "Jew_Tunisian",
Population == "Jew_Turkish.HO" ~ "Jew_Turkish",
Population == "Jew_Yemenite.HO" ~ "Jew_Yemenite",
Population == "Basque.HO" ~ "Basque",
Population == "Spanish.HO" ~ "Spanish",
Population == "Spanish_North.HO" ~ "Spanish_North",
Population == "Druze.HO" ~ "Druze",
Population == "Lebanese.HO" ~ "Lebanese",
Population == "Belarusian.HO" ~ "Belarusian",
Population == "Croatian.HO" ~ "Croatian",
Population == "Czech.HO" ~ "Czech",
Population == "Estonian.HO" ~ "Estonian",
Population == "Hungarian.HO" ~ "Hungarian",
Population == "Lithuanian.HO" ~ "Lithuanian",
Population == "Ukrainian.HO" ~ "Ukrainian",
Population == "IBS_CanaryIslands.DG" ~ "Canary_Islands",
Population == "Sardinian.HO" ~ "Sardinian",
Population == "Finnish.HO" ~ "Finnish",
Population == "Mordovian.HO" ~ "Mordovian",
Population == "Russian.HO" ~ "Russian",
TRUE ~ "Other"
)
)
# Assign colors with a focus on darker shades and valid color names
custom_colors <- c(
"Jovialis" = "darkgoldenrod", "Armenian" = "darkblue", "Iranian" = "darkgreen",
"Turkish" = "orange", "Albanian" = "green", "Italian_North" = "darkorange",
"Bulgarian" = "steelblue", "Cypriot" = "darkmagenta", "Greek" = "saddlebrown",
"Italian_South" = "darkorchid3", "Maltese" = "blue", "Sicilian" = "darkolivegreen",
"Italian_Central" = "midnightblue", "English" = "firebrick", "French" = "chocolate4",
"Icelandic" = "darkslategray", "Norwegian" = "mediumblue", "Orcadian" = "darkslateblue",
"Scottish" = "darkseagreen", "BedouinA" = "darkcyan", "BedouinB" = "deepskyblue4",
"Jordanian" = "darkred", "Palestinian" = "darkgreen", "Saudi" = "darkgoldenrod4",
"Syrian" = "mediumvioletred", "Abkhasian" = "brown4", "Adygei" = "khaki4",
"Balkar" = "purple4", "Chechen" = "royalblue4", "Georgian" = "brown3",
"Kumyk" = "forestgreen", "Lezgin" = "springgreen4", "North_Ossetian" = "lightpink4",
"Jew_Ashkenazi" = "chocolate", "Jew_Georgian" = "darkturquoise",
"Jew_Iranian" = "dodgerblue4", "Jew_Iraqi" = "slateblue", "Jew_Libyan" = "cornflowerblue",
"Jew_Moroccan" = "limegreen", "Jew_Tunisian" = "darkred", "Jew_Turkish" = "seagreen4",
"Jew_Yemenite" = "navyblue", "Basque" = "darkorchid4", "Spanish" = "darkorchid",
"Spanish_North" = "mediumseagreen", "Druze" = "slateblue4", "Lebanese" = "springgreen3",
"Belarusian" = "darkturquoise", "Croatian" = "blue", "Czech" = "darkslateblue",
"Estonian" = "darkslategray4", "Hungarian" = "darkorange3", "Lithuanian" = "tan4",
"Ukrainian" = "tan", "Canary_Islands" = "navy", "Sardinian" = "darkseagreen4",
"Finnish" = "olivedrab", "Mordovian" = "darkorange1", "Russian" = "red"
)
# Assign unique filled shapes to each group (cycling through the available filled shapes)
filled_shapes <- c(21, 22, 23, 24, 25) # Circle, square, diamond, up-triangle, down-triangle
shape_values <- rep(filled_shapes, length.out = length(unique(pca_data$Group)))
# Plot the PCA with dark colors, different filled shapes to distinguish samples, and a black border around the PCA
ggplot(pca_data, aes(x = PC1, y = PC2, color = Group, fill = Group, shape = Group)) +
geom_point(size = 3) +
scale_color_manual(values = custom_colors) +
scale_fill_manual(values = custom_colors) +
scale_shape_manual(values = shape_values) +
labs(
title = "PCA Projection of Modern West Eurasia (AADR_HO v62.0 merged with Jovialis WGS 30x)",
x = paste0("PC1 (", round(evals[1] / sum(evals) * 100, 2), "% variance)"),
y = paste0("PC2 (", round(evals[2] / sum(evals) * 100, 2), "% variance)")
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_blank(),
legend.text = element_text(size = 8), # Decrease legend text size
legend.key.size = unit(0.4, "cm"), # Decrease size of legend keys
legend.spacing.x = unit(0.2, "cm"), # Decrease horizontal spacing in legend
legend.box = "horizontal", # Arrange legend items horizontally
legend.direction = "horizontal",
plot.title = element_text(hjust = 0.5),
panel.border = element_rect(color = "black", fill = NA, linewidth = 1) # Add black border around the PCA plot
) +
guides(
color = guide_legend(ncol = 8),
shape = guide_legend(ncol = 8), # Make sure the shape legend is also compact
fill = guide_legend(ncol = 8) # Make sure the fill legend is also compact
)