1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
|
package main
import (
"strconv"
"strings"
"github.com/gocolly/colly"
)
func (app *App) ScrapeGithub(platform Platform) []Entry {
var err error
URL := platform.URL
Languages := app.GetLanguages()
UpdatePeriods := app.GetUpdatePeriods()
current_language := Language{}
current_update_period := UpdatePeriod{}
Entries := []Entry{}
c := app.customCollector([]string{"www.github.com", "github.com"})
c.OnHTML("ol.repo-list > li", func(e *colly.HTMLElement) {
entry := Entry{}
owner := Owner{}
e.ForEach("div > h3", func(i int, e *colly.HTMLElement) {
entry.URL = URL + e.ChildAttr("a", "href")
entry.Title = e.ChildText("a")
owner.Name = strings.TrimSuffix(e.ChildText("a > span"), " /")
})
e.ForEach("div.py-1", func(i int, e *colly.HTMLElement) {
entry.Synopsis = e.ChildText("p")
})
e.ForEach("div.text-gray", func(i int, e *colly.HTMLElement) {
if i == 0 {
entry.Stars, err = strconv.Atoi(e.ChildText("a.muted-text"))
if err != nil {
Warn(err, "Github: Extracting stars from "+entry.Title+" failed")
}
}
})
owner.Platform = &platform
owner.URL = URL + owner.Name
entry.Owner = &owner
entry.Platform = &platform
entry.Language = ¤t_language
entry.UpdatePeriod = ¤t_update_period
entry.Created_At = app.Now
Entries = append(Entries, entry)
})
for _, l := range Languages {
current_language = l
for _, t := range UpdatePeriods {
current_update_period = t
CURRENT_URL := URL + "/trending/" + l.Name + "?since=" + t.Name
err := c.Visit(CURRENT_URL)
if err != nil {
Warn(err, "Scraping Platform "+platform.Name+" failed with URL: "+CURRENT_URL)
}
}
}
return Entries
}
|