summaryrefslogtreecommitdiff
path: root/crawler/init.go
blob: 0c6c71f7249ebbb639ce5b1279e96882d120b02d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package main

import (
	"errors"
	"strings"

	log "github.com/sirupsen/logrus"
	flag "github.com/spf13/pflag"
)

// global config, gets overwritten by main
var _conf Config

func init() {
	// overwrites unhelpful error message
	flag.ErrHelp = errors.New("")

	// we need to parse the config because of log level setting
	configFile := flag.StringP("config", "c", "", "path to config file")
	debug := flag.BoolP("debug", "d", false, "set log level to \"Debug\"")
	trace := flag.BoolP("trace", "t", false, "set log level to \"Trace\"")
	verbose := flag.BoolP("verbose", "v", false, "set log level to \"Debug\", same as --debug")
	silent := flag.BoolP("silent", "s", false, "suppress output except warnings")
	loglevel_f := flag.String("log-level", "Warn", `set log level, can be "Warn", "Info", "Debug" or "Trace"`)
	flag.BoolP("list-shops", "l", false, `list all crawlable shops`)
	shopids_f := flag.StringP("only-shop", "o", "", `comma separated list of shop ids, crawl only these`)
	not_shopids_f := flag.StringP("exclude-shop", "x", "", `comma separated list of shop ids, DO NOT crawl these`)
	user_agent_f := flag.StringP("user-agent", "u", "", "set user agent")
	delay_f := flag.Int("delay", 0, "enable and set delay in seconds between crawls (default 0)")
	ignore_robots_f := flag.Bool("ignore-robots-txt", true, "ignore robots.txt")

	flag.Parse()
	loglevel := strings.ToLower(*loglevel_f)

	if *trace || loglevel == "trace" {
		log.SetLevel(log.TraceLevel)
	} else if *debug || *verbose || loglevel == "debug" {
		log.SetLevel(log.DebugLevel)
	} else if loglevel == "info" {
		log.SetLevel(log.InfoLevel)
	} else {
		log.SetLevel(log.WarnLevel)
	}

	if *silent {
		log.SetLevel(log.WarnLevel)
	}

	_conf.parseConfig(*configFile)

	if *user_agent_f != "" {
		_conf.UserAgent = *user_agent_f
	}
	if *delay_f != 0 {
		_conf.Delay = *delay_f
	}
	if !*ignore_robots_f {
		_conf.IgnoreRobotsTXT = *ignore_robots_f
	}

	if _conf.Debug && !*silent {
		log.SetLevel(log.DebugLevel)
	}

	if "" != *shopids_f {
		_conf.ShopIDs = strings.Split(*shopids_f, ",")
	}
	if "" != *not_shopids_f {
		_conf.ExcludeShopIDs = strings.Split(*not_shopids_f, ",")
	}

	if "" != *shopids_f && "" != *not_shopids_f {
		log.Fatal("init.go: Config error: Cannot use both flags --exclude-shop and --only-shop at the same time.")
	}
}