📌  相关文章
📜  golang 读取大文件 - Go 编程语言代码示例

📅  最后修改于: 2022-03-11 14:45:02.113000             🧑  作者: Mango

代码示例1
1package main
  2
  3import (
  4    "bufio"
  5    "fmt"
  6    "log"
  7    "os"
  8    "strconv"
  9    "strings"
 10    "sync"
 11    "sync/atomic"
 12    "time"
 13)
 14
 15func main() {
 16    start := time.Now()
 17    file, err := os.Open(os.Args[1])
 18    if err != nil {
 19        log.Fatal(err)
 20    }
 21    defer file.Close()
 22
 23    commonName := ""
 24    commonCount := 0
 25    scanner := bufio.NewScanner(file)
 26    nameMap := make(map[string]int)
 27    dateMap := make(map[int]int)
 28
 29    namesCounted := false
 30    namesCount := 0
 31    fileLineCount := int64(0)
 32
 33    type entry struct {
 34        firstName string
 35        name      string
 36        date      int
 37    }
 38
 39    linesChunkLen := 64 * 1024
 40    linesChunkPoolAllocated := int64(0)
 41    linesPool := sync.Pool{New: func() interface{} {
 42        lines := make([]string, 0, linesChunkLen)
 43        atomic.AddInt64(&linesChunkPoolAllocated, 1)
 44        return lines
 45    }}
 46    lines := linesPool.Get().([]string)[:0]
 47
 48    entriesPoolAllocated := int64(0)
 49    entriesPool := sync.Pool{New: func() interface{} {
 50        entries := make([]entry, 0, linesChunkLen)
 51        atomic.AddInt64(&entriesPoolAllocated, 1)
 52        return entries
 53    }}
 54    mutex := &sync.Mutex{}
 55    wg := sync.WaitGroup{}
 56
 57    scanner.Scan()
 58    for {
 59        lines = append(lines, scanner.Text())
 60        willScan := scanner.Scan()
 61        if len(lines) == linesChunkLen || !willScan {
 62            linesToProcess := lines
 63            wg.Add(len(linesToProcess))
 64            go func() {
 65                atomic.AddInt64(&fileLineCount, int64(len(linesToProcess)))
 66                entries := entriesPool.Get().([]entry)[:0]
 67                for _, text := range linesToProcess {
 68                    // get all the names
 69                    entry := entry{}
 70                    split := strings.SplitN(text, "|", 9)
 71                    entry.name = strings.TrimSpace(split[7])
 72
 73                    // extract first names
 74                    if entry.name != "" {
 75                        startOfName := strings.Index(entry.name, ", ") + 2
 76                        if endOfName := strings.Index(entry.name[startOfName:], " "); endOfName < 0 {
 77                            entry.firstName = entry.name[startOfName:]
 78                        } else {
 79                            entry.firstName = entry.name[startOfName : startOfName+endOfName]
 80                        }
 81                        if cs := strings.Index(entry.firstName, ","); cs > 0 {
 82                            entry.firstName = entry.firstName[:cs]
 83                        }
 84                    }
 85                    // extract dates
 86                    entry.date, _ = strconv.Atoi(split[4][:6])
 87                    entries = append(entries, entry)
 88                }
 89                linesPool.Put(linesToProcess)
 90                mutex.Lock()
 91                for _, entry := range entries {
 92                    if len(entry.firstName) != 0 {
 93                        nameCount := nameMap[entry.firstName] + 1
 94                        nameMap[entry.firstName] = nameCount
 95                        if nameCount > commonCount {
 96                            commonCount = nameCount
 97                            commonName = entry.firstName
 98                        }
 99                    }
100                    if namesCounted == false {
101                        if namesCount == 0 {
102                            fmt.Printf("Name: %s at index: %v\n", entry.name, 0)
103                        } else if namesCount == 432 {
104                            fmt.Printf("Name: %s at index: %v\n", entry.name, 432)
105                        } else if namesCount == 43243 {
106                            fmt.Printf("Name: %s at index: %v\n", entry.name, 43243)
107                            namesCounted = true
108                        }
109                        namesCount++
110                    }
111                    dateMap[entry.date]++
112                }
113                mutex.Unlock()
114                entriesPool.Put(entries)
115                wg.Add(-len(entries))
116            }()
117            lines = linesPool.Get().([]string)[:0]
118        }
119        if !willScan {
120            break
121        }
122    }
123    wg.Wait()
124
125    // report c2: names at index
126    fmt.Printf("Name time: %v\n", time.Since(start))
127
128    // report c1: total number of lines
129    fmt.Printf("Total file line count: %v\n", fileLineCount)
130    fmt.Printf("Line count time: %v\n", time.Since(start))
131
132    // report c3: donation frequency
133    for k, v := range dateMap {
134        fmt.Printf("Donations per month and year: %v and donation ncount: %v\n", k, v)
135    }
136    fmt.Printf("Donations time: %v\n", time.Since(start))
137
138    // report c4: most common firstName
139    fmt.Printf("The most common first name is: %s and it occurs: %v times.\n", commonName, commonCount)
140    fmt.Printf("Most common name time: %v\n", time.Since(start))
141}