#Script to calculate, output and graph allele frequencies #Created by Mark Christie on 3/1/2012 setwd("C:/Example/Directory) # set working directory gdata<-read.table(file="BeechData_east.txt", header=TRUE, sep="\t", na.strings="NA", dec=".", strip.white=TRUE) #load data ; here genotypes of Beech data from dryad head(gdata) #look at the data population=gdata[,-c(1:3)] #This is called indexing. Here we are accessing only the genotypes for a single population (i.e., removing columns 1-3) L=ncol(population) #how many columns are there? locus_positions=(2*(unique(round((1:(L-2))/2)))+1) #find the starting column number for each locus lnames=colnames(population) #locus names, from the header OUT=NULL #create a null dataset to append allele freqs to for (x in locus_positions) { #begin for loop, to calculate frequencies for each locus alleles=c(population[,x],population[,x+1]) #For example, combine columns 1 and 2 for locus 1 (two columns because they are diploid) alleles2=as.data.frame(table(alleles)) #count each allele at locus x missing=alleles2[which(alleles2[,1]==0),2] #count missing data at locus x, entered as '0' in this dataset (not used further for simplicity) alleles3=alleles2[-which(alleles2[,1]==0),] #remove missing data (otherwise 0 would be counted in total number of alleles) alleles4=cbind(alleles3,alleles3[,2]/sum(alleles3[,2])) #calculate frequencies output=cbind(x,lnames[x],alleles4) #combine x, locusname, and frequencies OUT <<- rbind(OUT,output) } colnames(OUT) <- c("Number","Locus","allele","count","frequency") #add column headers Allelefreqs=OUT[,-1] write.table(Allelefreqs,file="Allelefrequencies.txt",row.names=FALSE,col.names=TRUE,sep="\t",append=FALSE) #2 different plots for allele freqs for a specified locus wanted_locus="FS1.15" #enter wanted locus name here Locus=Allelefreqs[which(Allelefreqs[,1]==wanted_locus),] plot(as.numeric(as.character(Locus[,2])),as.numeric(as.character(Locus[,4])),xlab="Allele",ylab="Frequency",main=paste("Locus_",Locus[1,1]),pch=21,bg="blue",cex=1.5) plot(1:length(Locus[,2]),sort(as.numeric(as.character(Locus[,4])),decreasing=TRUE),xlab="Allele (orderd by frequency)",ylab="Frequency",main=paste("Locus_",Locus[1,1]),pch=21,bg="blue",cex=1.5)