From 8d68ac7c900241eb8499a94c23ab1f60750e7aed Mon Sep 17 00:00:00 2001 From: horus Date: Fri, 15 Jun 2018 23:28:18 +0200 Subject: Introduces config for user agent, robots.txt and crawler delay. (crawler) --- crawler/config.go | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'crawler/config.go') diff --git a/crawler/config.go b/crawler/config.go index f89fa45..a3939c4 100644 --- a/crawler/config.go +++ b/crawler/config.go @@ -17,6 +17,10 @@ type Config struct { DBOptions string DBPath string // for sqlite + UserAgent string + Delay int + IgnoreRobotsTXT bool + DisableURLShorter bool Polr_URL string Polr_API_Key string @@ -40,6 +44,12 @@ func (c *Config) parseConfig(configFile string) { viper.SetDefault("FixDatabase", false) viper.SetDefault("DisableURLShorter", false) viper.SetDefault("ShopIDs", []string{}) + viper.SetDefault("Delay", 0) + + // needs some refactoring to truly respect robots.txt + viper.SetDefault("IgnoreRobotsTXT", true) + + viper.SetDefault("UserAgent", "colly - a friendly crawler :)") // Name of the configuration file viper.SetConfigName("config") @@ -95,10 +105,16 @@ func (c *Config) setsConfig() { c.DBDBName = viper.GetString("DB_DBName") c.DBOptions = viper.GetString("DB_Options") c.DBPath = viper.GetString("DB_Path") - c.Debug = viper.GetBool("Debug") - c.FixDatabase = viper.GetBool("FixDatabase") + + c.UserAgent = viper.GetString("UserAgent") + c.Delay = viper.GetInt("Delay") + c.IgnoreRobotsTXT = viper.GetBool("IgnoreRobotsTXT") + c.DisableURLShorter = viper.GetBool("DisableURLShorter") - c.ShopIDs = viper.GetStringSlice("ShopIDs") c.Polr_URL = viper.GetString("Polr_URL") c.Polr_API_Key = viper.GetString("Polr_API_Key") + + c.Debug = viper.GetBool("Debug") + c.FixDatabase = viper.GetBool("FixDatabase") + c.ShopIDs = viper.GetStringSlice("ShopIDs") } -- cgit v1.2.3