Book
Book
Jennifer Rexford!
1
Motivating Quotations!
Every program depends on algorithms and data
structures, but few programs depend on the
invention of brand new ones.!
-- Kernighan & Pike!
3
A Common Task!
Maintain a table of key/value pairs!
Each key is a string; each value is an int
Unknown number of key-value pairs!
Examples!
(student name, grade)!
(john smith, 84), (jane doe, 93), (bill clinton, 81)!
(baseball player, number)!
(Ruth, 3), (Gehrig, 4), (Mantle, 7)!
(variable name, value)!
(maxLength, 2000), (i, 7), (j, -10)!
Algorithms!
Create: Create the data structure!
Add: Add a key/value pair!
Search: Search for a key/value pair, by key!
Free: Free the data structure!
5
Data Structure #1: Linked List!
Data structure: Nodes; each contains key/value
pair and pointer to next node!
Algorithms:!
Create: Allocate Table structure to point to first node!
Add: Insert new node at front of list!
Search: Linear search through the list!
Free: Free nodes while traversing; free Table structure!
6
Linked List: Data Structure!
struct Node {
const char *key;
int value;
struct Node *next;
};
struct Table {
struct Node *first;
};
struct! struct!
struct! Node! Node!
Table!
"Gehrig" "Ruth"
4 3
NULL
7
Linked List: Create (1)!
struct Table *Table_create(void) {
struct Table *t;
t = (struct Table*)
malloc(sizeof(struct Table));
t->first = NULL;
return t;
}
t!
8
Linked List: Create (2)!
struct Table *Table_create(void) {
struct Table *t;
t = (struct Table*)
malloc(sizeof(struct Table));
t->first = NULL;
return t;
}
t!
NULL
9
Linked List: Add (1)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
p->key = key;
p->value = value;
p->next = t->first;
t->first = p;
}
10
Linked List: Add (2)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
p->key = key;
p->value = value;
p->next = t->first;
t->first = p;
}
11
Linked List: Add (3)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
p->key = key;
p->value = value;
p->next = t->first;
t->first = p;
}
"Gehrig" "Ruth"
4 3
NULL
12
Linked List: Add (4)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
p->key = key;
p->value = value;
p->next = t->first;
t->first = p;
}
"Gehrig" "Ruth"
4 3
NULL
13
Linked List: Add (5)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
p->key = key;
p->value = value;
p->next = t->first;
t->first = p;
}
"Gehrig" "Ruth"
4 3
NULL
14
Linked List: Search (1)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
for (p = t->first; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) {
*value = p->value;
return 1;
} struct Table *t;
return 0; int value;
} int found;
found =
Table_search(t, "Gehrig", &value);
t!
15
Linked List: Search (2)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
for (p = t->first; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) {
*value = p->value;
return 1;
} struct Table *t;
return 0; int value;
} int found;
found =
Table_search(t, "Gehrig", &value);
p!
t!
16
Linked List: Search (3)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
for (p = t->first; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) {
*value = p->value;
return 1;
} struct Table *t;
return 0; int value;
} int found;
found =
Table_search(t, "Gehrig", &value);
p!
t!
17
Linked List: Search (4)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
for (p = t->first; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) {
*value = p->value;
return 1;
} struct Table *t;
return 0; int value;
} int found;
found =
Table_search(t, "Gehrig", &value);
p!
t!
18
Linked List: Search (5)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
for (p = t->first; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) {
*value = p->value;
return 1;
} struct Table *t;
return 0; int value;
} int found;
found =
Table_search(t, "Gehrig", &value);
p!
t!
19
Linked List: Search (6)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
for (p = t->first; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) {
*value = p->value;
return 1;
} struct Table *t;
return 0; int value;
} int found;
found =
Table_search(t, "Gehrig", &value);
1
p! 4
t!
20
Linked List: Free (1)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
21
Linked List: Free (2)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
22
Linked List: Free (3)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
23
Linked List: Free (4)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
24
Linked List: Free (5)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
25
Linked List: Free (6)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
26
Linked List: Free (7)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
27
Linked List: Free (8)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t);
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
28
Linked List: Free (9)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
for (p = t->first; p != NULL; p = nextp) {
nextp = p->next;
free(p);
}
free(t)
}
p! nextp!
t!
"Mantle" "Gehrig" "Ruth"
7 4 3
NULL
29
Linked List Performance!
Create: !fast!
What are the
Add:! !fast! asymptotic run times
Search: !slow! (big-oh notation)?!
Free: !slow!
Would it be better to
keep the nodes
sorted by key?!
30
Data Structure #2: Hash Table!
Fixed-size array where each element points to a linked list!
0
ARRAYSIZE-1
1776 1861
0
Revolution Civil
1
2
3
4
1939
WW2
32
How Large an Array?!
Large enough that average bucket size is 1!
Short buckets mean fast search!
Long buckets mean slow search!
This is OK:!
0
ARRAYSIZE-1
33
What Kind of Hash Function?!
Good at distributing elements across the array!
Distribute results over the range 0, 1, , ARRAYSIZE-1!
Distribute results evenly to avoid very long buckets!
ARRAYSIZE-1
What would be the
worst possible hash
function?!
34
Hashing String Keys to Integers!
Simple schemes dont distribute the keys evenly enough!
Number of characters, mod ARRAYSIZE!
Sum the ASCII values of all characters, mod ARRAYSIZE!
!
35
Implementing Hash Function!
Potentially expensive to compute ai for each value of i!
Computing ai for each value of I!
Instead, do (((x[0] * 65599 + x[1]) * 65599 + x[2]) * 65599 + x[3]) * !
0
1
2
3
4
5
6
37
Hash Table Example (cont.)!
Example: ARRAYSIZE = 7!
Lookup (and enter, if not present) these strings: the, cat, in, the, hat!
Hash table initially empty.!
First word: the. hash(the) = 965156977. 965156977 % 7 = 1.!
Search the linked list table[1] for the string the; not found!
Now: table[1] = makelink(key, value, table[1])!
0
the
1
2
3
4
5
6
38
Hash Table Example (cont.)!
Second word: cat. hash(cat) = 3895848756. 3895848756 % 7 = 2.!
Search the linked list table[2] for the string cat; not found!
Now: table[2] = makelink(key, value, table[2])!
0
the
1
2
3
4
5
6
39
Hash Table Example (cont.)!
Third word: in. hash(in) = 6888005. 6888005% 7 = 5.!
Search the linked list table[5] for the string in; not found!
Now: table[5] = makelink(key, value, table[5])!
0
the
1
2
3 cat
4
5
6
40
Hash Table Example (cont.)!
Fourth word: the. hash(the) = 965156977. 965156977 % 7 = 1.!
Search the linked list table[1] for the string the; found it!!
0
the
1
2
3 cat
4
in
5
6
41
Hash Table Example (cont.)!
Fourth word: hat. hash(hat) = 865559739. 865559739 % 7 = 2.!
Search the linked list table[2] for the string hat; not found.!
Now, insert hat into the linked list table[2]. !
At beginning or end? Doesnt matter.!
0
the
1
2
3 cat
4
in
5
6
42
Hash Table Example (cont.)!
Inserting at the front is easier, so add hat at the front !
0
the
1
2
3 hat cat
4
in
5
6
43
Hash Table: Data Structure!
struct Node {
const char *key;
int value; struct!
struct Node *next;
struct!
Table!
}; Node!
0 NULL struct!
struct Table { "Ruth"
1 NULL
struct Node *array[BUCKET_COUNT]; 3 Node!
}; 23 NULL
"Gehrig"
723 4
NULL
806 NULL
1023 NULL
44
Hash Table: Create!
struct Table *Table_create(void) {
struct Table *t;
t = (struct Table*)calloc(1, sizeof(struct Table));
return t;
}
t!
0 NULL
1 NULL
1023 NULL
45
Hash Table: Add (1)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
int h = hash(key);
p->key = key;
p->value = value; struct Table *t;
p->next = t->array[h];
t->array[h] = p; Table_add(t, "Ruth", 3);
} Table_add(t, "Gehrig", 4);
Table_add(t, "Mantle", 7);
t!
These are
pointers to
0 NULL "Ruth" strings!
1 NULL
3
NULL
23 "Gehrig"
4
723 NULL Pretend that Ruth!
806 NULL
hashed to 23 and!
1023 NULL Gehrig to 723!
46
Hash Table: Add (2)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
int h = hash(key);
p->key = key;
p->value = value; struct Table *t;
p->next = t->array[h];
t->array[h] = p; Table_add(t, "Ruth", 3);
} Table_add(t, "Gehrig", 4);
Table_add(t, "Mantle", 7);
t!
0 NULL "Ruth"
1 NULL
3
NULL
23 "Gehrig" p!
4
723 NULL
806 NULL
1023 NULL
47
Hash Table: Add (3)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
int h = hash(key);
p->key = key;
p->value = value; struct Table *t;
p->next = t->array[h];
t->array[h] = p; Table_add(t, "Ruth", 3);
} Table_add(t, "Gehrig", 4);
Table_add(t, "Mantle", 7);
t!
Pretend that Mantle!
hashed to 806, and so!
0 NULL "Ruth"
1 NULL 3
h = 806!
NULL
23 "Gehrig" p!
4
723 NULL
806 NULL
"Mantle"
7
1023 NULL
48
Hash Table: Add (4)!
void Table_add(struct Table *t,
const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
int h = hash(key);
p->key = key;
p->value = value; struct Table *t;
p->next = t->array[h];
t->array[h] = p; Table_add(t, "Ruth", 3);
} Table_add(t, "Gehrig", 4);
Table_add(t, "Mantle", 7);
t!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 51
Hash Table: Search (2)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
int h = hash(key);
for (p = t->array[h]; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) { struct Table *t;
*value = p->value; int value;
return 1; int found;
}
return 0; found =
} Table_search(t, "Gehrig", &value);
t!
Pretend that Gehrig!
0 NULL hashed to 723, and so!
1 NULL
"Ruth"
3
h = 723!
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 52
Hash Table: Search (3)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
int h = hash(key);
for (p = t->array[h]; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) { struct Table *t;
*value = p->value; int value;
return 1; int found;
}
return 0; found =
} Table_search(t, "Gehrig", &value);
t!
0 NULL p! h = 723!
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 53
Hash Table: Search (4)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
int h = hash(key);
for (p = t->array[h]; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) { struct Table *t;
*value = p->value; int value;
return 1; int found;
}
return 0; found =
} Table_search(t, "Gehrig", &value);
t!
0 NULL p! h = 723!
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 54
Hash Table: Search (5)!
int Table_search(struct Table *t,
const char *key, int *value) {
struct Node *p;
int h = hash(key);
for (p = t->array[h]; p != NULL; p = p->next)
if (strcmp(p->key, key) == 0) { struct Table *t;
*value = p->value; int value;
return 1; int found;
}
return 0; found =
} Table_search(t, "Gehrig", &value);
t!
1
0 NULL p! h = 723!
1 NULL 4
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 55
Hash Table: Free (1)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 56
Hash Table: Free (2)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
b = 0!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 57
Hash Table: Free (3)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
p!
b = 0!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 58
Hash Table: Free (4)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
p!
b = 1, , 23!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 59
Hash Table: Free (5)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t! p!
b = 23!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 60
Hash Table: Free (6)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t! p!
nextp!
b = 23!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 61
Hash Table: Free (7)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
p! nextp!
b = 23!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 62
Hash Table: Free (8)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
b = 24, , 723!
0 NULL
b = 724, , 806!
1 NULL
"Ruth"
b = 807, , 1024!
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 63
Hash Table: Free (9)!
void Table_free(struct Table *t) {
struct Node *p;
struct Node *nextp;
int b;
for (b = 0; b < BUCKET_COUNT; b++)
for (p = t->array[b]; p != NULL; p = nextp) {
nextp = p->next;
free(p); struct Table *t;
}
free(t); Table_free(t);
}
t!
b = 1024!
0 NULL
1 NULL
"Ruth"
3
23 NULL "Gehrig"
723 4
NULL "Mantle"
806 7
1023 NULL NULL 64
Hash Table Performance!
Create: !fast!
What are the
Add: !fast! asymptotic run times
Search: !fast! (big-oh notation)?!
Free: !slow!
Is hash table
search always
fast?!
65
Key Ownership!
66
Key Ownership (cont.)!
Problem: Consider this calling code:!
struct Table t; Via Table_add(), table contains
char k[100] = "Ruth";
memory address k!
Table_add(t, k, 3); Client changes string at
strcpy(k, "Gehrig"); memory address k!
Thus client changes key within table!
67
Key Ownership (cont.)!
Solution: Table_add() saves copy of given key!
void Table_add(struct Table *t, const char *key, int value) {
struct Node *p = (struct Node*)malloc(sizeof(struct Node));
p->key = (const char*)malloc(strlen(key) + 1);
strcpy(p->key, key);
} Why add 1?!
Related issues!
Hashing algorithms!
Memory ownership!
69
Appendix!
70
Revisiting Hash Functions!
Potentially expensive to compute mod c!
Involves division by c and keeping the remainder!
Easier when c is a power of 2 (e.g., 16 = 24)!
An alternative (by example)!
53 = 32 + 16 + 4 + 1!
32 16 8 4 2 1
0 0 1 1 0 1 0 1
5 0 0 0 0 0 1 0 1
72
A Faster Hash Function!
unsigned int hash(const char *x) {
int i;
unsigned int h = 0U;
for (i=0; x[i]!='\0'; i++) Previous!
h = h * 65599 + (unsigned char)x[i]; version!
return h % 1024;
}
74