library(babynames) library(reshape2) library(ineq) # babynames is based on SSA applicant number and sex. # we estimate that the sex-ratio at birth is 107 boys for 100 girls # we correct the babynames data applicants<-dcast(applicants,year~sex,sum) applicants$CORR<-107/(100*applicants$M/applicants$F) bebe<-dcast(babynames,name+year~sex,value.var="n",sum) m<-match(bebe$year,applicants$year) bebe$FCORR<-round(bebe$F/applicants$CORR[m]) bebe$PROP<-bebe$M/(bebe$FCORR+bebe$M) # 1 if every baby is male, 0 if female bebe$PROPM<-bebe$M/applicants$M[m] bebe$PROPF<-bebe$FCORR/(applicants$F[m]/applicants$CORR[m]) # test sur Leslie pr<-"Leslie" test <- subset(bebe,bebe$name==pr) test$PROP<-test$M/test$FCORR par(mar=c(4, 6, 2, .2) + 0.1) plot(test$year,log10(test$PROP),main=pr,pch=20,cex=.5,ylim=c(-2,2),axes=F,ylab="",xlab="") smoothingSpline = smooth.spline(test$year,log10(test$PROP), spar=0.4) lines(smoothingSpline) axis(1) axis(2, at=c(-2,-1,0,1,2),col.axis="black", las=2,labels=c("100 times \n more girls","10 times\n more girls","equal number","10 times\nmore boys","100 times\n more boys")) title(sub="Data : USA, Social Security Administration, package babynames | coulmont.com",cex.sub=.8,adj=0,line=3) # more epicene mini<-.1 maxi<-.9 bbe<-subset(bebe,bebe$PROP>mini&bebe$PROPbbe$FCORR) # 1 if this name is given to boys more than to girls bbnb<-aggregate(N~year,data=bbe,sum) bbnb$PRM<-aggregate(PRM~year,data=bbe,sum)$PRM bbnb$PRPRM<-100*bbnb$PRM/bbnb$N plot(bbm$year,bbm$PROPM,col="orange",pch=20,cex=.5,ylim=c(0,100),yaxt="n",ylab="Proportion",xlab="",axes=F) axis(2,las=2,at=c(0,25,50,75,100),labels=c(0,25,50,75,100)) axis(1) smoothingSpline = smooth.spline(bbm$year,bbm$PROPM, spar=0.4) lines(smoothingSpline) abline(50,0,lty=3,col="darkgray") smoothingSpline = smooth.spline(bbnb$year,bbnb$PRPRM, spar=0.4) lines(smoothingSpline,col="red",lty=2) title(main="Sex ratio") title(sub="Data : USA, Social Security Administration, package babynames | coulmont.com\nBetween 1950 and 1990, boys received more girls' names than girls boys' names\nNote: Androgynous name = name for which female are more than 10% and less than 90% of the total",cex.sub=.8,adj=0) text(1890,65,"1900: 60% of epicene names\nare male names",cex=.8,font=2,col="red",adj=0) text(1970,32,"1980: 35% of epicene babies\nare boys",cex=.8,font=2,col="black",adj=0) par(new=TRUE) plot(gini$year,gini$gini,type="n",xaxt="n",yaxt="n",xlab="",ylab="",axes=F) smoothingSpline = smooth.spline(gini$year,gini$gini, spar=0.4) lines(smoothingSpline,col="violet",lty=2,lwd=2) axis(4,las=2,col="violet") mtext("Gini coefficient",side=4,line=3,col="violet",font=2)