> # Attributes
> 
> # Consider the dim() function that we saw last time, which finds the dimensions 
> # of a data frame or matrix
> M <- matrix(1:6, nrow=2, ncol=3)
> dim(M)
[1] 2 3
> # What does R do when calling dim(M)?
> M
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
> # R could just to a nested loop every time we call dim(M), but that would be a waste of time. We have to loop through a lot of numbers potentially if it's a big matrix
> # If we wanted to use dim(M) many times in a script with a big matrix, that would increase the time by a large factor
> 
> # So how does R fix this issue?
> # Attributes
> # Any data in R can have one or more "attributes"
> # These attributes record extra information about the data, which
> # can accessed and changed
> 
> # Example: Adding a string attribute to a number
> x <- 123
> x
[1] 123
> # We add and access attributes using the attr() function
> attr(x, "letters") <- "abc"
> # We added an attribute called "letters" to x, and gave it the value "abc"
> # We can now access it
> attr(x, "letters")
[1] "abc"
> # By default, attributes you added will print to the console when you call up a variable
> x
[1] 123
attr(,"letters")
[1] "abc"
> 
> # Attributes (usually) get passed on in operations
> y <- x + 1000
> attr(y, "letters")
[1] "abc"
> 
> # Attributes on vectors
> v <- c(1,2,3)
> attr(v, "letters") <- "def"
> attr(v, "letters")
[1] "def"
> # A vector does not maintain its attributes if combine it with another
> b <- c(v, 4)
> attr(b, "letters")
NULL
> # NULL means the attribute does not exist
> # b doesn't have v's attribute
> 
> # To recap: "letters" is the name of the attribute, and "def" is the value of the attribute
> 
> # Now that we know a bit about attributes, what's another way we could implement dim(M)?
> # We could give M an attribute that has the dimensions of M store right away, and then all we need to do, is ask for the value of that attribute
> # And that's exactly what R does.  
> # Each matrix has a 'dim' attribute built-in, which is calucaulted when a matrix is created
> attr(M, "dim")
[1] 2 3
> # dim(M) simply returns that attribute (in this case, the attribute is a vector of two numbers)
> 
> # What attributes have we seen for lists?
> L <- list(a=3, b=4, c=5)
> # In a named list, we have names for elements
> # again, R stores these as attributes of the data so that they are easy to access
> attr(L, "names")
[1] "a" "b" "c"
> names(L)
[1] "a" "b" "c"
> # names(L) returns that attribute
> 
> # Let's look at a more 'real-life' example
> # Example: Let's say we have a collection of grades for a student as a vector
> student1 <- c(98, 78, 76, 86)
> # Let's say we want to store the name of the student as part of the vector
> attr(student1, "name") <- "Fred"
> attr(student1, "name")
[1] "Fred"
> student1
[1] 98 78 76 86
attr(,"name")
[1] "Fred"
> 
> # Special attribute: "class"
> # The "class" attribute tells R that some operations on an R object/data structure should be done in a special way
> # Let's say we wanted to print out our student grades in a nicer way than the default vector printing
> student1
[1] 98 78 76 86
attr(,"name")
[1] "Fred"
> # It doesn't look very nice
> # Let's see how to use the class attribute to add some behaviour to this data
> attr(student1, "class") <- "student"
> # We are creating a new "type" of object in R called "student"
> # "student" is not really a new R data type, because at the end of the day it's still just a vector and a character
> # We're defining a 'class' named "student" and giving student1 that class
> # We can now define a new way to print for *any* variable of class "student"
> 
> # We edit the printing of the variable by creating a function called
> # print.[classname]
> source('~/Desktop/classes.R')
> student1
Student name: Fred 
Student grade: 98 78 76 86
> print(student1)
Student name: Fred 
Student grade: 98 78 76 86
> # Notice how we basically created an 'override' of the print behaviour of student1
> b <- student1
> b
Student name: Fred 
Student grade: 98 78 76 86
> class(b)
[1] "student"
> class(b) <- "person"
> b
[1] "NA NA <NA> [NA] (NA)" "NA NA <NA> [NA] (NA)" "NA NA <NA> [NA] (NA)"
[4] "NA NA <NA> [NA] (NA)"
> class(b) <- "student"
> b
Student name: Fred 
Student grade: 98 78 76 86
> 
> # Classes vs types
> class(student1)
[1] "student"
> typeof(student1)
[1] "double"
> # class() gives the class attribue, while typeof() gives the underlying basic R type it's built off of 
> # Classes allow us to extend R's basic types (numeric, strings, vectors, lists) into more complex R 'objects'
> # Another great example is Data Frames
> d <- data.frame(a=c(1,2,3))
> class(d)
[1] "data.frame"
> typeof(d)
[1] "list"
> # A data frame is actually a list, with a class "data.frame"
> # We added a "data.frame" class attribute to a variable of type "list"
> # This allows us to do data frame-things, such as have rows and columns, and organize and print them out in a certain way
> list(a=c(1,2,3))
$a
[1] 1 2 3

> d
  a
1 1
2 2
3 3
> L <- list(a=c(1,2,3))
> typeof(L) == typeof(d)
[1] TRUE
> # We added behaviour to a list
> unclass(d)
$a
[1] 1 2 3

attr(,"row.names")
[1] 1 2 3
> # unclass(x) gets rid of the class of x
> 
> # Factors
> # A factor represents a vector of strings as a vector of integers, along with with a vector of distince string values
> v <- c("red", "green", "yellow", "red", "green", "blue", "red")
> # What are the unique values?
> # red, green, yellow, blue
> # There are 4 unique/distinct values
> # let's turn the vector into a factor
> f <- as.factor(v)
> f
[1] red    green  yellow red    green  blue   red   
Levels: blue green red yellow
> # We get back a vector of strings, but also some "Levels: " which are...the unique values!
> # Let's check the classes and types
> class(f)
[1] "factor"
> typeof(f)
[1] "integer"
> # The type of f is "not" string! It's an integer vector!
> # We can find the underlying type be removing the class
> unclass(f)
[1] 3 2 4 3 2 1 3
attr(,"levels")
[1] "blue"   "green"  "red"    "yellow"
> f
[1] red    green  yellow red    green  blue   red   
Levels: blue green red yellow
> # Now we see that underlying structure
> # An integer vector with an attribute, "levels"
> # What are the "levels"? The unique strings
> # The numbers in the integer vector are the indexes of the vector in attribute "levels"
> # How does this help?
> # Integers take up less space than strings, so having only the unique values saves a lot of space in the computer's memory
> 
> # Where are factors used?
> # Recall: data frames
> students <- read.table("students.txt", header=TRUE)
> students$lname
[1] Campbell  Gries     Dolderman
Levels: Campbell Dolderman Gries
> # Why do we store columns as factors?
> # Data frames can be huge, and we want to save as much space in memory as we can
> # Factors only store unique values, and some integers - so we can potentially save a LOT of space