(<- class-heights
    ;; sex (0 - male; 1 - female)  height (inches) last-digit of id
    (flatten 
     (loop for i from 1 to 20
           collect 
           (let* 
             ((sex (random-discrete-uniform :from 0 :to 1))
              (height 
               (eref (random-gaussian :location (if (zerop sex)
                                                  70 
                                                  64)
                                      :scale 3) 0) 
               )
              (id (random-discrete-uniform :from 0 :to 9))
              )
             (list (if (zerop sex)
                     ;; using strings here rather than numbers 0 and 1
                     "Male"
                     "Female")
                   height id)
             )
           )
     )
    )

;;; Here is where we might collect data rather than generate it.
;;; It is presently commented out by the enclosing #|  ... |#

#| 
(<- class-heights
    ;; sex (0 - male; 1 - female)  height (inches) last-digit of id
    '("Male"  74 9
      "Male" 68 8
      "Male" 73 9
      "Female" 64 7
      "Female" 68 5
      "Female" 72 2
      "Male" 68 5
      "Female" 64 9
      "Female" 64 6
      "Male" 68 4
      "Male" 72 9
      "Female" 61 3
      "Male" 66 8
      "Male" 67 8
      "Male" 71 9
      "Female" 63 6
      "Male" 70 7
      "Male" 70 4
      "Male" 72 2
      "Male" 75 3
      "Male" 66 3
      "Male" 70 1
      
      
      )
    )
|#

(<- class-vars '(sex height-in-inches last-id-digit) )

(<- class-heights
    (array
     class-heights
     :dimensions (list (/ (length class-heights)
                          (length class-vars))
                       (length class-vars))))

(<- class-heights 
    (dataset class-heights 
             :variates class-vars 
             :name "STAT441/841 height data"))


;; some plots
;;

(histogram :data class-heights
           :var 'height-in-inches
           :title "Heights in inches"
           :color *green-color*
           :fill? T
           :link? T
           :nbins 8)


;; just look at the cases batched together by sex

(batch-display-list :data class-heights
                   :scrollable? NIL
                   :link? T
                   :by 'sex
                   :draw? T)

;; set up a density model for each sex
;; 
;; 

(<- men
    (array
     (loop for i from 0 to (- (nrows class-heights) 1)
           when 
           ;; it's a man
           (or (and (numberp (eref class-heights i 0))
                         (= (eref class-heights i 0) 0))
                    (string-equal "Male" (eref class-heights i 0)))
           ;; this when is more complicated than it needs to be just
           ;; so that both cases of using 0 and 1 or "Male" and "Female"
           ;; will work without any change to the rest of the code.

           collect (eref class-heights i 1))
     )
    )

(<- women
    (array  
     (loop for i from 0 to (- (nrows class-heights) 1)
           when 
           ;; it's a woman
           (or (and (numberp (eref class-heights i 0))
                         (= (eref class-heights i 0) 0))
                    (string-equal "Female" (eref class-heights i 0)))
           ;; as with men,
           ;; this when is more complicated than it needs to be just
           ;; so that both cases of using 0 and 1 or "Male" and "Female"
           ;; will work without any change to the rest of the code.

           collect (eref class-heights i 1))
     ))

(<- hm (histogram :data men 
                  :var 0 
                  :fill? T
                  :color *light-blue-color*
                  :title "Men's height" 
                  :bottom-label "height"
                  :histogram-scale :density))

(<- hw (histogram :data women 
                  :var 0 
                  :fill? T
                  :color *gray-color*
                  :title "Women's height" 
                  :bottom-label "height"
                  :histogram-scale :density))

(<- gw (function-plot :domain '(55 80) 
                      :nlines 100 
                      :function (function (lambda (x) 
                                            (density-gaussian 
                                             x
                                             :location (mean women)
                                             :scale (sd women))))
                      :draw? T
                      :width 4
                      :left-label "density"
                      :bottom-label "Height in inches"
                      :title "Two gaussians, modelling heights; women red, men blue"
                      :color *red-color*))

(set-tics (left-view-of gw) '(0 0.1 0.2))

(<- gm (function-view :domain '(55 80) 
                      :nlines 100 
                      :function 
                      (function (lambda (x) 
                                  (density-gaussian 
                                   x
                                   :location (mean men)
                                   :scale (sd men))))
                      :draw? NIL
                      :width 4
                      :color *blue-color*))


(layer-view (interior-view-of gw) gm )

;;; Now let's layer these on top of the corresponding histograms to see how
;;; well these fit.

(set-tics (left-view-of hw) '(0 0.1 0.2))
(set-tics (bottom-view-of hw) '(55 60 65 70 75 80))

(layer-view (interior-view-of hw) (interior-view-of gw) )

(set-tics (left-view-of hm) '(0 0.1 0.2))
(set-tics (bottom-view-of hm) '(55 60 65 70 75 80))

(layer-view (interior-view-of hm) gm )


;;; Now put together as a mixture

(<- prop-female (/ (number-of-elements women)
                   (nrows class-heights)))

(<- gwm (function-view :domain '(56 80) 
                       :nlines 100 
                       :function 
                       (function (lambda (x) 
                                   (+
                                    (* prop-female 
                                       (density-gaussian 
                                        x
                                        :location (mean women)
                                        :scale (sd women)))
                                    (* (- 1 prop-female)
                                       (density-gaussian 
                                        x
                                        :location (mean men)
                                        :scale (sd men))))
                                   )
                                 )
                       :draw? NIL
                       :width 4
                       :color *green-color*))


(layer-view (interior-view-of gw) gwm )
(layer-view gm gwm )


;;;
;;;  Some examples of mixtures of two gaussians
;;;

(defun draw-gaussian-mix (&key (location1 10) 
                               (scale1 1) 
                               (location2 15) 
                               (scale2 2)
                               (mix-prob 0.5))
       (let
         (g1 g2 gm 
             (xmin (min (- location1 (* 3 scale1)) (- location2 (* 3 scale2))))
             (xmax (max (+ location1 (* 3 scale1)) (+ location2 (* 3 scale2))))
             (max-height (max (density-gaussian location1 
                                                :location location1
                                                :scale scale1)
                              (density-gaussian location2 
                                                :location location2
                                                :scale scale2))
                         )
             )
         (<- g1 (function-plot :domain (list xmin xmax)
                               :nlines 100 
                               :function (function (lambda (x) 
                                                     (density-gaussian 
                                                      x
                                                      :location location1
                                                      :scale scale1)))
                               :draw? T
                               :width 4
                               :left-label "density"
                               :bottom-label "X"
                               :title 
                               (format NIL
                                       "Mixing two gaussians: ~3,2F G(~a, ~a) + ~3,2F G(~a, ~a)"
                                       mix-prob location1 scale1 
                                       (- 1 mix-prob) location2 scale2)
                               :color *red-color*))
         (set-tics (left-view-of g1) (list 0 0.1 (* .1 (round max-height .1))))
         
         (<- g2 (function-view :domain (list xmin xmax) 
                               :nlines 100 
                               :function 
                               (function (lambda (x) 
                                           (density-gaussian 
                                            x
                                            :location location2
                                            :scale scale2)))
                               :draw? NIL
                               :width 4
                               :color *blue-color*)
             )
         
         
         (layer-view (interior-view-of g1) g2 )
         
         
         (<- gm (function-view :domain (list xmin xmax) 
                               :nlines 100 
                               :function 
                               (function (lambda (x) 
                                           (+
                                            (* mix-prob 
                                               (density-gaussian 
                                                x
                                                :location location1
                                                :scale scale1))
                                            (* (- 1 mix-prob)
                                               (density-gaussian 
                                                x
                                                :location location2
                                                :scale scale2)))
                                           )
                                         )
                               :draw? NIL
                               :width 4
                               :color *green-color*))
         
         
         (layer-view (interior-view-of g1) gm )
         )
       )


;;; try some

(draw-gaussian-mix)
(draw-gaussian-mix :mix-prob 0.1)
(draw-gaussian-mix :mix-prob 0.7)
(draw-gaussian-mix :mix-prob 0.9)
(draw-gaussian-mix :location1 12)
(draw-gaussian-mix :location1 12 :location2 13)
(draw-gaussian-mix :location1 12 :location2 20 :mix-prob 1/3)

;;; Exploring some of the other features and dimensions of the data
;;;


(dot-plot :data class-heights
          :var 'last-id-digit
          :size 12
          :link? T
          :title "Random digit")

;; display the cases in a list

(case-display-list :data class-heights
                   :scrollable? T
                   :link? T
                   :draw? T)
(<- sp
    (scatterplot :data class-heights
                 :x 'last-id-digit
                 :y 'height-in-inches
                 :link? T))