这次的 Lab 要求我们使用 Lab3 中实现的 raft 构建一个 kvserver。

Part A: Key/value service without snapshots#

kvserver#

Clerk 向记录的 leader kvserver 发送请求，如果超时或失败，则重新寻找 leader 并发送请求。

kvserver 收到请求后调用 Start()，等待 Raft 完成共识。提交的命令完成后回复 Clerk 的 RPC。

kvserver 侧如何得知结果被应用？

不妨利用 Lab 2 中用到的历史记录。在每次命令被提交并执行后将 history 设置，RPC 处理部分不断检测对应项，当对应项不为空时则判断为命令执行完成。

每个新操作的 RPC 请求都会附带上一操作的序号，表示客户端已经收到上次操作的结果。剩下的就交给 Raft 吧。

完成以后初次测试只有第二个 test case 出现了超时错误

1
test_test.go:419: Operations completed too slowly 100.859572ms/op > 33.333333ms/op

测试要求每 100ms 至少进行三次操作，如果按我在 Lab 3 中的固定每 100ms 发送一次 AppendEntries 的做法肯定是过不了的，所以又得回去改 Raft（

1
func (rf *Raft) Start(command interface{}) (int, int, bool) {
2
  index := -1
3
  term := -1
4
  isLeader := true
5

6
  // Your code here (3B).
7
  rf.mu.Lock()
8
  defer rf.mu.Unlock()
9
  term, isLeader = rf.CurrentTerm, rf.state == LEADER
10
  if !isLeader {
11
    return index, term, isLeader
12
  } else {
13
    index = len(rf.Log) + rf.SnapshotState.SnapshotIndex
14
    rf.Log = append(rf.Log, Log{Command: command, Term: rf.CurrentTerm})
15
    rf.persist()
16
    rf.matchIndex[rf.me] = len(rf.Log) - 1 + rf.SnapshotState.SnapshotIndex
17
    go func() {
18
      time.Sleep(1 * time.Millisecond)
19
      rf.mu.Lock()
20
      if rf.state != LEADER {
21
        rf.mu.Unlock()
22
        return
23
      }
24
      rf.sendAppendEntriesToAll()
25
      rf.mu.Unlock()
26
    }()
27
  }
28
  return index, term, isLeader
29
}

具体实现#

client.go#

1
func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
2
  ck := new(Clerk)
3
  ck.servers = servers
4
  // You'll have to add code here.
5
  ck.leader = mathRand.Intn(len(servers))
6
  ck.lastOpID = 0
7
  return ck
8
}
9
func (ck *Clerk) Get(key string) string {
10
  // You will have to modify this function.
11
  opId := nrand()
12
  args := GetArgs{Key: key, ID: opId, LastOpID: ck.lastOpID}
13
  start := ck.leader
14
  for {
15
    reply := GetReply{}
16
    resultCh := make(chan bool)
17
    leader := ck.leader
18
    var ok bool
19
    go func() {
20
      time.Sleep(100 * time.Millisecond)
21
      resultCh <- false
22
    }()
23
    go func() {
24
      ok = ck.servers[leader].Call("KVServer.Get", &args, &reply)
25
      resultCh <- true
26
    }()
27
    if <-resultCh {
28
      if ok && reply.Err == "" {
29
        ck.lastOpID = opId
30
        return reply.Value
31
      }
32
    }
33
    ck.leader = (ck.leader + 1) % len(ck.servers)
34
    if ck.leader == start {
35
      time.Sleep(200 * time.Millisecond)
36
    }
37
  }
38
}
39
func (ck *Clerk) PutAppend(key string, value string, op string) {
40
  // You will have to modify this function.
41
  opId := nrand()
42
  args := PutAppendArgs{Key: key, Value: value, ID: opId, LastOpID: ck.lastOpID}
43
  start := ck.leader
44
  for {
45
    resultCh := make(chan bool)
46
    var ok bool
47
    leader := ck.leader
48
    reply := PutAppendReply{}
49
    go func() {
50
      ok = ck.servers[leader].Call("KVServer."+op, &args, &reply)
51
      resultCh <- true
52
    }()
53
    go func() {
54
      time.Sleep(100 * time.Millisecond)
55
      resultCh <- false
56
    }()
57
    if <-resultCh {
58
      if ok && reply.Err == "" {
59
        ck.lastOpID = opId
60
        break
61
      }
62
    }
63
    ck.leader = (ck.leader + 1) % len(ck.servers)
64
    if ck.leader == start {
65
      time.Sleep(200 * time.Millisecond)
66
    }
67
  }
68
}

初始化时随机给 Clerk 分配一个假 leader，当请求失败后再更换节点尝试，全部节点都失败后等待一会儿再开始新一轮的重试。

server.go#

1
func (kv *KVServer) Get(args *GetArgs, reply *GetReply) {
2
  // Your code here.
3
  op := Op{Operation: "Get", Key: args.Key, ID: args.ID, LastOpID: args.LastOpID}
4
  _, _, success := kv.rf.Start(op)
5
  if !success {
6
    reply.Err = "not leader"
7
  } else {
8
    for !kv.killed() {
9
      if _, isLeader := kv.rf.GetState(); !isLeader {
10
        reply.Err = "leader expired"
11
        return
12
      }
13
      kv.mu.Lock()
14
      if _, ok := kv.history[op.ID]; ok {
15
        reply.Value = kv.pairs[op.Key]
16
        kv.mu.Unlock()
17
        return
18
      }
19
      kv.mu.Unlock()
20
      time.Sleep(1 * time.Millisecond)
21
    }
22
  }
23
}
24
func (kv *KVServer) apply() {
25
  for !kv.killed() {
26
    applyMsg := <-kv.applyCh
27
    if applyMsg.CommandValid {
28
      op, _ := applyMsg.Command.(Op)
29
      kv.mu.Lock()
30
      if _, ok := kv.history[op.ID]; ok {
31
        kv.mu.Unlock()
32
        continue
33
      }
34
      delete(kv.history, op.LastOpID)
35

36
      switch op.Operation {
37
      case "Get":
38
        kv.history[op.ID] = kv.pairs[op.Key]
39
      case "Put":
40
        kv.pairs[op.Key] = op.Value
41
        kv.history[op.ID] = ""
42
      case "Append":
43
        kv.pairs[op.Key] += op.Value
44
        kv.history[op.ID] = ""
45
      }
46
      kv.mu.Unlock()
47
    }
48
  }
49
}

另起一协程不断接收命令，和 lab2 类似，服务器执行命令前首先检查是否已完成过该命令，避免重复执行。

Part B: Key/value service with snapshots#

Part B 要求我们为 kvserver 加入快照。没太多好说的，感觉难度不应该标 hard。

为了避免延迟后的 RPC 造成重复的日志应用，删除 history 中的记录时另开一个协程，延迟一段时间再进行删除。

在重启之后也要避免重复的日志应用，所以需要持久化的有存储状态、应用历史以及已应用索引。

具体实现#

1
func (kv *KVServer) MakeSnapshot(persister *raft.Persister, maxraftstate int) {
2
  for !kv.killed() {
3
    kv.mu.Lock()
4
    kv.cond.Wait()
5
    if persister.RaftStateSize() > maxraftstate*4/5 {
6
      w := new(bytes.Buffer)
7
      e := labgob.NewEncoder(w)
8
      e.Encode(kv.pairs)
9
      e.Encode(kv.history)
10
      e.Encode(kv.lastIndex)
11
      snapshot := w.Bytes()
12
      kv.rf.Snapshot(kv.lastIndex, snapshot)
13
    }
14
    kv.mu.Unlock()
15
  }
16
}
17
func (kv *KVServer) apply() {
18
  for !kv.killed() {
19
    applyMsg := <-kv.applyCh
20
    if applyMsg.CommandValid {
21
      op, _ := applyMsg.Command.(Op)
22
      kv.mu.Lock()
23
      kv.lastIndex = applyMsg.CommandIndex
24
      if _, ok := kv.history[op.ID]; ok {
25
        kv.mu.Unlock()
26
        continue
27
      }
28
      switch op.Operation {
29
      case "Get":
30
        kv.history[op.ID] = "1"
31
        kv.history[op.LastOpID] = "2"
32
      case "Put":
33
        kv.pairs[op.Key] = op.Value
34
        kv.history[op.ID] = "1"
35
        kv.history[op.LastOpID] = "2"
36
      case "Append":
37
        kv.pairs[op.Key] += op.Value
38
        kv.history[op.ID] = "1"
39
        kv.history[op.LastOpID] = "2"
40
      }
41

42
      go func(id int64) {
43
        time.Sleep(100 * time.Millisecond)
44
        kv.mu.Lock()
45
        delete(kv.history, id)
46
        kv.mu.Unlock()
47
      }(op.LastOpID)
48
      kv.cond.Broadcast()
49
      kv.mu.Unlock()
50
    } else if applyMsg.SnapshotValid {
51
      kv.mu.Lock()
52
      r := bytes.NewBuffer(applyMsg.Snapshot)
53
      d := labgob.NewDecoder(r)
54
      d.Decode(&kv.pairs)
55
      d.Decode(&kv.history)
56
      d.Decode(&kv.lastIndex)
57
      for key, value := range kv.history {
58
        go func(k int64, v string) {
59
          time.Sleep(100 * time.Millisecond)
60
          if v == "2" {
61
            kv.mu.Lock()
62
            delete(kv.history, k)
63
            kv.mu.Unlock()
64
          }
65
        }(key, value)
66

67
      }
68
      kv.mu.Unlock()
69
    }
70
  }
71
}

在初始化函数中：

1
if persister.ReadSnapshot() != nil && len(persister.ReadSnapshot()) >= 1 {
2
    r := bytes.NewBuffer(persister.ReadSnapshot())
3
    d := labgob.NewDecoder(r)
4
    d.Decode(&kv.pairs)
5
    d.Decode(&kv.history)
6
    d.Decode(&kv.lastIndex)
7
    kv.mu.Lock()
8
    for key, value := range kv.history {
9
      go func(k int64, v string) {
10
        time.Sleep(100 * time.Millisecond)
11
        if v == "2" {
12
          kv.mu.Lock()
13
          delete(kv.history, k)
14
          kv.mu.Unlock()
15
        }
16
      }(key, value)
17
    }
18
    kv.mu.Unlock()
19
  }
20
  if maxraftstate >= 0 {
21
    go kv.MakeSnapshot(persister, maxraftstate)
22
  }

单次测试结果#

1
=== RUN   TestBasic4A
2
Test: one client (4A) ...
3
  ... Passed --  15.1  5 10461 1973
4
--- PASS: TestBasic4A (15.05s)
5
=== RUN   TestSpeed4A
6
Test: ops complete fast enough (4A) ...
7
  ... Passed --   5.9  3  3127    0
8
--- PASS: TestSpeed4A (5.92s)
9
=== RUN   TestConcurrent4A
10
Test: many clients (4A) ...
11
  ... Passed --  15.2  5 14620 2792
12
--- PASS: TestConcurrent4A (15.24s)
13
=== RUN   TestUnreliable4A
14
Test: unreliable net, many clients (4A) ...
15
  ... Passed --  17.7  5  4127  452
16
--- PASS: TestUnreliable4A (17.70s)
17
=== RUN   TestUnreliableOneKey4A
18
Test: concurrent append to same key, unreliable (4A) ...
19
  ... Passed --   2.4  3   263   52
20
--- PASS: TestUnreliableOneKey4A (2.39s)
21
=== RUN   TestOnePartition4A
22
Test: progress in majority (4A) ...
23
  ... Passed --   0.8  5    65    2
24
Test: no progress in minority (4A) ...
25
  ... Passed --   1.1  5   156    3
26
Test: completion after heal (4A) ...
27
  ... Passed --   1.0  5    67    3
28
--- PASS: TestOnePartition4A (3.50s)
29
=== RUN   TestManyPartitionsOneClient4A
30
Test: partitions, one client (4A) ...
31
  ... Passed --  22.4  5  4920  720
32
--- PASS: TestManyPartitionsOneClient4A (22.40s)
33
=== RUN   TestManyPartitionsManyClients4A
34
Test: partitions, many clients (4A) ...
35
  ... Passed --  22.5  5  9930 1466
36
--- PASS: TestManyPartitionsManyClients4A (22.54s)
37
=== RUN   TestPersistOneClient4A
38
Test: restarts, one client (4A) ...
39
  ... Passed --  23.1  5 10499 1912
40
--- PASS: TestPersistOneClient4A (23.08s)
41
=== RUN   TestPersistConcurrent4A
42
Test: restarts, many clients (4A) ...
43
  ... Passed --  25.1  5 16316 2862
44
--- PASS: TestPersistConcurrent4A (25.06s)
45
=== RUN   TestPersistConcurrentUnreliable4A
46
Test: unreliable net, restarts, many clients (4A) ...
47
  ... Passed --  22.9  5  4720  468
48
--- PASS: TestPersistConcurrentUnreliable4A (22.86s)
49
=== RUN   TestPersistPartition4A
50
Test: restarts, partitions, many clients (4A) ...
51
  ... Passed --  29.9  5  9971 1417
52
--- PASS: TestPersistPartition4A (29.88s)
53
=== RUN   TestPersistPartitionUnreliable4A
54
Test: unreliable net, restarts, partitions, many clients (4A) ...
55
  ... Passed --  28.5  5  4407  297
56
--- PASS: TestPersistPartitionUnreliable4A (28.47s)
57
=== RUN   TestPersistPartitionUnreliableLinearizable4A
58
Test: unreliable net, restarts, partitions, random keys, many clients (4A) ...
59
  ... Passed --  32.2  7 11361  444
60
--- PASS: TestPersistPartitionUnreliableLinearizable4A (32.22s)
61
=== RUN   TestSnapshotRPC4B
62
Test: InstallSnapshot RPC (4B) ...
63
labgob warning: Decoding into a non-default variable/field int may not work
64
  ... Passed --   5.0  3   325   63
65
--- PASS: TestSnapshotRPC4B (4.96s)
66
=== RUN   TestSnapshotSize4B
67
Test: snapshot size is reasonable (4B) ...
68
  ... Passed --   3.3  3  2467  800
69
--- PASS: TestSnapshotSize4B (3.30s)
70
=== RUN   TestSpeed4B
71
Test: ops complete fast enough (4B) ...
72
  ... Passed --   4.1  3  3088    0
73
--- PASS: TestSpeed4B (4.05s)
74
=== RUN   TestSnapshotRecover4B
75
Test: restarts, snapshots, one client (4B) ...
76
  ... Passed --  19.8  5 20631 3982
77
--- PASS: TestSnapshotRecover4B (19.79s)
78
=== RUN   TestSnapshotRecoverManyClients4B
79
Test: restarts, snapshots, many clients (4B) ...
80
  ... Passed --  20.3  5 96102 18766
81
--- PASS: TestSnapshotRecoverManyClients4B (20.29s)
82
=== RUN   TestSnapshotUnreliable4B
83
Test: unreliable net, snapshots, many clients (4B) ...
84
  ... Passed --  16.5  5  4103  462
85
--- PASS: TestSnapshotUnreliable4B (16.48s)
86
=== RUN   TestSnapshotUnreliableRecover4B
87
Test: unreliable net, restarts, snapshots, many clients (4B) ...
88
  ... Passed --  22.3  5  4487  438
89
--- PASS: TestSnapshotUnreliableRecover4B (22.27s)
90
=== RUN   TestSnapshotUnreliableRecoverConcurrentPartition4B
91
Test: unreliable net, restarts, partitions, snapshots, many clients (4B) ...
92
  ... Passed --  28.9  5  4402  271
93
--- PASS: TestSnapshotUnreliableRecoverConcurrentPartition4B (28.91s)
94
=== RUN   TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4B
95
Test: unreliable net, restarts, partitions, snapshots, random keys, many clients (4B) ...
96
  ... Passed --  31.5  7 11331  426
97
--- PASS: TestSnapshotUnreliableRecoverConcurrentPartitionLinearizable4B (31.53s)
98
PASS