/*------------------------------------------------------------------+ | MUTATION DISTANCES(FITCH AND MARGOLIASH) | | | | The source of this data is a paper by Fitch and Margoliash | | in Science(1967). For a more recent reference see Scientific | | American (1972?). | | Every species has a protein molecule, Cytochrome c, which varies | | from species to species but has a similar function for all. It | | consists of a long chain of amino acids. There are only a few | | acids, but different molecules are obtained by varying the | | acids in each position in the chain. The number of positions | | with different acids measures distance between two species. | | these distances are given in the data below. | | For example, the amino acids in Cytochrome c for two species look | | like this: | | Moth XXYVPLY .........SEXI | | Screwworm fly XXYVPLY .........LSEI | | where the whole chain is 110 in length, and the letters represent | | particular amino acids. Each difference contributes to mutation | | distance according to the minimum number of nucleotides that would| | need to be changed to convert one into the other. | | Fitch & Margoliash used these data to construct a phylogenetic | | tree. | | Ref: Science, v. 155, 279-284. | +-------------------------------------------------------------------*/ Title 'Mutation distances among 20 species'; Data MUTATE(TYPE=DISTANCE); Input Species $1-16 @20 (D1-D20) (20*2.); N=_N_; if n <= 20 then output; Cards; Man 0 Monkey 01 0 Dog 1312 0 Horse 171610 0 Donkey 16150801 0 Pig 1312040504 0 Rabbit 121106111006 0 Kangaroo 12130711120707 0 Pekin Duck 1716121615131014 0 Pigeon 161512161513081403 0 Chicken 18171416151311150304 0 King Penguin 1817141716141113030402 0 Snapping Turtle 191813161513111407080808 0 Rattlesnake 20213032313025302424282830 0 Tuna 3132292726252627272726272738 0 Screwworm Fly 333224242526232626262628304034 0 Moth 36352833323129313030313033414116 0 Bakers Mould 6362646464646266595961626561725859 0 Bread Yeast 565761605959595862626261646166636057 0 Skin Fungus 66656668676767686666666567696965616141 0 proc print data=mutate; id species; proc mds data=mutate level=ordinal shape=triangle model=euclid out=config dim=2; var d1-d20; id species; proc Print; Title2 'Output configuration'; Id Species; Proc plot data=Config; Plot DIM2 * DIM1 $ Species; %plotit(data=config, datatype=mds, labelvar=Species); Proc CLUSTER data=MUTATE outtree=TREE Method=complete /* Diameter method */ ; Var D1-D20; Id Species; Title2 'Complete linkage clustering'; Proc TREE data=TREE dissimilar horizontal; run; *include goptions ; goptions vsize=7.5 in; *include macros(gtree); %gtree(tree=tree, orient=H,label=Mutation Distance,sym=dot, ctree='red', hlabel=1.3);