From 8d68ac7c900241eb8499a94c23ab1f60750e7aed Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 15 Jun 2018 23:28:18 +0200 Subject: Introduces config for user agent, robots.txt and crawler delay. (crawler) --- crawler/init.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'crawler/init.go') diff --git a/crawler/init.go b/crawler/init.go index 60f7e47..668df2d 100644 --- a/crawler/init.go +++ b/crawler/init.go @@ -23,6 +23,9 @@ func init() { loglevel_f := flag.StringP("loglevel", "l", "Warn", `sets log level, can be "Warn", "Info" or "Debug"`) flag.Bool("list-shops", false, `lists all crawlable shops`) shopids_f := flag.StringP("restrict-shops", "r", "", `comma separated list of shop ids, crawls only these`) + user_agent_f := flag.StringP("user-agent", "u", "", "sets user agent") + delay_f := flag.Int("delay", 0, "toggles random delay between crawls") + ignore_robots_f := flag.Bool("ignore-robots-txt", true, "ignores robots.txt") flag.Parse() loglevel := strings.ToLower(*loglevel_f) @@ -41,6 +44,16 @@ func init() { _conf.parseConfig(*configFile) + if *user_agent_f != "" { + _conf.UserAgent = *user_agent_f + } + if *delay_f != 0 { + _conf.Delay = *delay_f + } + if !*ignore_robots_f { + _conf.IgnoreRobotsTXT = *ignore_robots_f + } + if _conf.Debug && !*silent { log.SetLevel(log.DebugLevel) } -- cgit v1.2.3