This commit is contained in:
2024-12-15 12:08:02 +07:00
parent 1378032bfe
commit 549f538bbf
4 changed files with 254 additions and 102 deletions

View File

@@ -113,6 +113,7 @@ fi
# wait for remaining workers and coordinator to exit. # wait for remaining workers and coordinator to exit.
wait wait
exit 0
######################################################### #########################################################
# now indexer # now indexer
rm -f mr-* rm -f mr-*

View File

@@ -1,23 +1,37 @@
package mr package mr
import ( import (
"crypto/rand"
"errors" "errors"
"fmt"
"log" "log"
"math/big"
"net" "net"
"net/http" "net/http"
"net/rpc" "net/rpc"
"os" "os"
"strconv"
"strings"
"sync"
"time" "time"
) )
const (
JOB_TIMEOUT_S = 10 * time.Second
DEBUG = true
)
type Coordinator struct { type Coordinator struct {
MapJobs []Job MapJobs []Job
ReduceJobs []Job ReduceJobs []Job
NReduce int NReduce int
mu sync.Mutex
mapWg sync.WaitGroup
} }
type Job struct { type Job struct {
Filename string WorkerId string
Filenames []string
Status bool Status bool
Active bool Active bool
LastPing time.Time LastPing time.Time
@@ -27,34 +41,65 @@ type Job struct {
// //
// the RPC argument and reply types are defined in rpc.go. // the RPC argument and reply types are defined in rpc.go.
func (c *Coordinator) GetTasks(args *RpcArgument, reply *RpcReply) error { func (c *Coordinator) GetTasks(args *RpcArgument, reply *RpcReply) error {
c.mu.Lock()
defer c.mu.Unlock()
if args.Method != "request_task" { if args.Method != "request_task" {
return errors.New("argument .Method not valid for this procedure") return errors.New("argument .Method not valid for this procedure")
} }
// check for any non-completed OR non-active map jobs // check for any non-completed OR non-active map jobs
for _, mj := range c.MapJobs { for i, mj := range c.MapJobs {
// assign job that not already done OR not actively being run by other workers // assign job that not already done OR not actively being run by other workers
// status = false AND active = false
if !mj.Status && !mj.Active { if !mj.Status && !mj.Active {
wId, err := genWorkerId()
if err != nil {
fmt.Println(err)
}
reply.NReduce = c.NReduce reply.NReduce = c.NReduce
reply.TaskType = "map" reply.TaskType = "map"
reply.Filename = mj.Filename reply.Filenames = mj.Filenames
// set job status, taken by a worker reply.WorkerId = wId
mj.Active = true // set job status, taken by a worke
mj.LastPing = time.Now() c.MapJobs[i].WorkerId = wId
c.MapJobs[i].Active = true
c.MapJobs[i].LastPing = time.Now()
if DEBUG {
fmt.Printf("[Map Job Assigned] %s | file: %s | status: %t | active: %t |\n",
c.MapJobs[i].WorkerId,
c.MapJobs[i].Filenames[0],
c.MapJobs[i].Status, c.MapJobs[i].Active)
}
return nil return nil
} }
} }
c.mapWg.Wait()
// check for reduce jobs // check for reduce jobs
for _, rj := range c.ReduceJobs { for i, rj := range c.ReduceJobs {
if !rj.Status && !rj.Active { if !rj.Status && !rj.Active {
wId, err := genWorkerId()
if err != nil {
fmt.Println(err)
}
reply.WorkerId = wId
reply.NReduce = c.NReduce reply.NReduce = c.NReduce
reply.TaskType = "reduce" reply.TaskType = "reduce"
reply.Filename = rj.Filename reply.Filenames = rj.Filenames
// set job status, taken by a worker // set job status, taken by a worker
rj.Active = true c.ReduceJobs[i].WorkerId = wId
rj.LastPing = time.Now() c.ReduceJobs[i].Active = true
c.ReduceJobs[i].LastPing = time.Now()
if DEBUG {
fmt.Printf("[Reduce Job Assigned] %s | status: %t | active: %t |\n",
c.ReduceJobs[i].WorkerId, c.ReduceJobs[i].Status, c.ReduceJobs[i].Active)
}
return nil return nil
} }
} }
reply.TaskType = "done"
reply.NReduce = c.NReduce
reply.Filenames = []string{}
return nil return nil
} }
@@ -64,26 +109,35 @@ func (c *Coordinator) SetTaskDone(args *RpcArgument, reply *RpcReply) error {
return errors.New("argument .Method not valid for this procedure") return errors.New("argument .Method not valid for this procedure")
} }
if args.TaskType == "map" { if args.TaskType == "map" {
f := args.Filename wId := args.WorkerId
for _, mj := range c.MapJobs { for i, _ := range c.MapJobs {
if mj.Filename == f { if c.MapJobs[i].WorkerId == wId {
mj.Status = true c.MapJobs[i].Status = true
c.MapJobs[i].Active = false
c.mapWg.Done()
if DEBUG {
fmt.Printf("[DONE] %s | file: %s | status: %t | active: %t\n",
c.MapJobs[i].WorkerId,
c.MapJobs[i].Filenames[0],
c.MapJobs[i].Status, c.MapJobs[i].Active)
} }
} }
// Create reduce job data struct for each intermediary file returned from worker's map job }
// Check again
for _, ifn := range args.IntermediateFiles { for _, ifn := range args.IntermediateFiles {
reduceJob := Job{} reduceJobIdx := strings.Split(ifn, "-")[2]
reduceJob.Active = false idx, err := strconv.Atoi(reduceJobIdx)
reduceJob.Filename = ifn if err != nil {
reduceJob.LastPing = time.Now() fmt.Printf("Error converting string to integer: %v\n", err)
reduceJob.Status = false
c.ReduceJobs = append(c.ReduceJobs, reduceJob)
} }
c.ReduceJobs[idx].Filenames = append(c.ReduceJobs[idx].Filenames, ifn)
}
} else if args.TaskType == "reduce" { } else if args.TaskType == "reduce" {
f := args.Filename wId := args.WorkerId
for _, rj := range c.ReduceJobs { for i, _ := range c.ReduceJobs {
if rj.Filename == f { if c.ReduceJobs[i].WorkerId == wId {
rj.Status = true c.ReduceJobs[i].Status = true
} }
} }
} else { } else {
@@ -106,6 +160,23 @@ func (c *Coordinator) server() {
go http.Serve(l, nil) go http.Serve(l, nil)
} }
// check scheduled tasks; if their age > JOB_TIMEOUT_S
// reset the job status, making it available for scheduling
func (c *Coordinator) CheckTaskValidity() {
c.mu.Lock()
defer c.mu.Unlock()
for i, rj := range c.ReduceJobs {
timeNow := time.Now()
if rj.Active && !rj.Status {
duration := timeNow.Sub(rj.LastPing)
if duration >= JOB_TIMEOUT_S {
c.ReduceJobs[i].Active = false
c.ReduceJobs[i].Status = false
}
}
}
}
// main/mrcoordinator.go calls Done() periodically to find out // main/mrcoordinator.go calls Done() periodically to find out
// if the entire job has finished. // if the entire job has finished.
func (c *Coordinator) Done() bool { func (c *Coordinator) Done() bool {
@@ -116,6 +187,11 @@ func (c *Coordinator) Done() bool {
return ret return ret
} }
} }
for _, mj := range c.ReduceJobs {
if !mj.Status {
return ret
}
}
// all mj completed // all mj completed
ret = true ret = true
return ret return ret
@@ -131,12 +207,34 @@ func MakeCoordinator(files []string, nReduce int) *Coordinator {
for i := range files { for i := range files {
mj := Job{} mj := Job{}
mj.Filename = files[i] mj.Filenames = []string{files[i]}
mj.Status = false mj.Status = false
mj.Active = false mj.Active = false
c.mapWg.Add(1)
c.MapJobs = append(c.MapJobs, mj) c.MapJobs = append(c.MapJobs, mj)
} }
for j := 0; j < nReduce; j++ {
reduceJob := Job{}
reduceJob.Active = false
reduceJob.Filenames = []string{}
reduceJob.Status = false
c.ReduceJobs = append(c.ReduceJobs, reduceJob)
}
c.server() c.server()
return &c return &c
} }
func genWorkerId() (string, error) {
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
result := make([]byte, 8)
for i := range result {
index, err := rand.Int(rand.Reader, big.NewInt(int64(len(charset))))
if err != nil {
return "", fmt.Errorf("error generating random number: %v", err)
}
result[i] = charset[index.Int64()]
}
return string(result), nil
}

View File

@@ -17,15 +17,16 @@ import (
// //
type RpcArgument struct { type RpcArgument struct {
WorkerId string
Method string Method string
Filename string
TaskType string TaskType string
IntermediateFiles []string IntermediateFiles []string
} }
type RpcReply struct { type RpcReply struct {
WorkerId string
TaskType string TaskType string
Filename string Filenames []string
NReduce int NReduce int
} }

View File

@@ -10,6 +10,7 @@ import (
"net/rpc" "net/rpc"
"os" "os"
"path/filepath" "path/filepath"
"sort"
"strings" "strings"
) )
@@ -19,6 +20,15 @@ type KeyValue struct {
Value string Value string
} }
// Sorting
// for sorting by key.
type ByKey []KeyValue
// for sorting by key.
func (a ByKey) Len() int { return len(a) }
func (a ByKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByKey) Less(i, j int) bool { return a[i].Key < a[j].Key }
// use ihash(key) % NReduce to choose the reduce // use ihash(key) % NReduce to choose the reduce
// task number for each KeyValue emitted by Map. // task number for each KeyValue emitted by Map.
func ihash(key string) int { func ihash(key string) int {
@@ -31,45 +41,84 @@ func ihash(key string) int {
func Worker(mapf func(string, string) []KeyValue, func Worker(mapf func(string, string) []KeyValue,
reducef func(string, []string) string) { reducef func(string, []string) string) {
// Your worker implementation here. // Your worker implementation here.
taskType, inFilename, nReduce := RequestTask() taskType := "init"
fmt.Printf("Perform %s task on: %s", taskType, inFilename) for taskType != "done" {
file, err := os.Open(inFilename) workerId, taskType, inFilenames, nReduce := RequestTask()
if taskType == "done" {
return
}
if taskType == "map" {
ifn := inFilenames[0]
file, err := os.Open(ifn)
if err != nil { if err != nil {
log.Fatalf("cannot open %v", inFilename) log.Fatalf("cannot open %v", ifn)
} }
content, err := io.ReadAll(file) content, err := io.ReadAll(file)
if err != nil { if err != nil {
log.Fatalf("cannot read %v", inFilename) log.Fatalf("cannot read %v", ifn)
} }
file.Close() file.Close()
if taskType == "map" { kva := mapf(ifn, string(content))
intermediateFiles := []string{}
kva := mapf(inFilename, string(content))
//hash the map job id //hash the map job id
mapTaskIdx := ihash(inFilename) mapTaskIdx := ihash(ifn)
IntermediaryFileMaps := make(map[int][]KeyValue)
for i := 0; i < nReduce; i++ {
IntermediaryFileMaps[i] = []KeyValue{}
}
for i := range kva { for i := range kva {
log.Printf("%s: %s", kva[i].Key, kva[i].Value) //log.Printf("%s: %s", kva[i].Key, kva[i].Value)
idx := ihash(kva[i].Key) % nReduce idx := ihash(kva[i].Key) % nReduce
mapOutFn := fmt.Sprintf("mr-%d-%d", mapTaskIdx, idx) IntermediaryFileMaps[idx] = append(IntermediaryFileMaps[idx], kva[i])
// add to intermediate file list }
intermediateFiles = append(intermediateFiles, mapOutFn)
// write intermidiate files intermediateFiles := []string{}
data, err := json.MarshalIndent(kva[i], "", " ") for idx := range IntermediaryFileMaps {
outFile := fmt.Sprintf("mr-%d-%d", mapTaskIdx, idx)
intermediateFiles = append(intermediateFiles, outFile)
jsonData, err := json.Marshal(IntermediaryFileMaps[idx])
if err != nil { if err != nil {
log.Fatalf("failed to serialize map to JSON: %w", err) log.Fatal("failed to marshal json")
} }
if err = WriteTempAtomic(mapOutFn, data); err != nil { if err = WriteTempAtomic(outFile, jsonData); err != nil {
log.Fatalf("failed to write KV map to file: %w", err) log.Fatalf("failed to write KV map to file")
} }
} }
SetTaskDone("map", inFilename, intermediateFiles) SetTaskDone(workerId, "map", intermediateFiles)
return
} else if taskType == "reduce" { } else if taskType == "reduce" {
intermediate := []KeyValue{} var intermediate []KeyValue
for _, f := range inFilenames {
// Open the file
file, err := os.Open(f)
if err != nil {
log.Fatalf("Failed to open file: %v", err)
}
defer file.Close()
// Read the file contents
data, err := io.ReadAll(file)
if err != nil {
log.Fatalf("Failed to read file: %v", err)
}
// Parse the JSON data
var intermidateShard []KeyValue
if err := json.Unmarshal(data, &intermidateShard); err != nil {
log.Fatalf("Failed to parse JSON: %v", err)
}
intermediate = append(intermediate, intermidateShard...)
}
sort.Sort(ByKey(intermediate))
// Prepare output file // Prepare output file
oFilename := strings.Split(inFilename, ",")[1] reduceJobNum := strings.Split(inFilenames[0], "-")[2]
oFilename := fmt.Sprintf("mr-out-%s", reduceJobNum)
var buffer bytes.Buffer var buffer bytes.Buffer
i := 0 i := 0
for i < len(intermediate) { for i < len(intermediate) {
@@ -85,15 +134,16 @@ func Worker(mapf func(string, string) []KeyValue,
// this is the correct format for each line of Reduce output. // this is the correct format for each line of Reduce output.
fmt.Fprintf(&buffer, "%v %v\n", intermediate[i].Key, output) fmt.Fprintf(&buffer, "%v %v\n", intermediate[i].Key, output)
//fmt.Println(buffer.String())
i = j i = j
} }
if err = WriteTempAtomic(oFilename, buffer.Bytes()); err != nil { if err := WriteTempAtomic(oFilename, buffer.Bytes()); err != nil {
log.Fatalf("failed to write KV map to file: %w", err) log.Fatalf("failed to write KV map to file: %w", err)
} }
SetTaskDone("reduce", inFilename, []string{}) SetTaskDone(workerId, "reduce", []string{})
return
} }
}
} }
func WriteTempAtomic(fn string, data []byte) error { func WriteTempAtomic(fn string, data []byte) error {
@@ -135,36 +185,38 @@ func WriteTempAtomic(fn string, data []byte) error {
} }
// RPC call to set the task status to done // RPC call to set the task status to done
func SetTaskDone(taskType string, filename string, intermediateFiles []string) { func SetTaskDone(workerId string, taskType string, intermediateFiles []string) {
args := RpcArgument{} args := RpcArgument{}
args.WorkerId = workerId
args.Method = "set_task_done" args.Method = "set_task_done"
args.TaskType = taskType args.TaskType = taskType
args.Filename = filename
args.IntermediateFiles = intermediateFiles args.IntermediateFiles = intermediateFiles
reply := RpcReply{} reply := RpcReply{}
ok := call("Coordinator.SetTaskDone", &args, &reply) ok := call("Coordinator.SetTaskDone", &args, &reply)
if !ok {
fmt.Printf("call failed!\n")
}
/*
if ok { if ok {
fmt.Printf("ok") fmt.Printf("ok\n")
} else { } else {
fmt.Printf("call failed!\n") fmt.Printf("call failed!\n")
} }
*/
} }
// RPC call to request task from coordinator // RPC call to request task from coordinator
func RequestTask() (string, string, int) { func RequestTask() (string, string, []string, int) {
// declare an arg and reply // declare an arg and reply
args := RpcArgument{} args := RpcArgument{}
args.Method = "request_task" args.Method = "request_task"
reply := RpcReply{} reply := RpcReply{}
ok := call("Coordinator.GetTask", &args, &reply) ok := call("Coordinator.GetTasks", &args, &reply)
if ok { if !ok {
fmt.Printf("reply.TaskType %v\n", reply.TaskType)
fmt.Printf("reply.InFilename %v\n", reply.Filename)
} else {
fmt.Printf("call failed!\n") fmt.Printf("call failed!\n")
} }
return reply.TaskType, reply.Filename, reply.NReduce return reply.WorkerId, reply.TaskType, reply.Filenames, reply.NReduce
} }
// send an RPC request to the coordinator, wait for the response. // send an RPC request to the coordinator, wait for the response.