summaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go230
1 files changed, 212 insertions, 18 deletions
diff --git a/main.go b/main.go
index 98f4ab2..7cc7449 100644
--- a/main.go
+++ b/main.go
@@ -44,6 +44,8 @@ func main() {
}
defer app.DB.Close()
+ app.FixURLs()
+ return
/*
app.deleteOrphanedArticles()
app.topStories()
@@ -229,9 +231,7 @@ func getStory(id int) (Story, bool) {
return Story, false
}
if is_video {
- var link Link
- link.Url = normalizeUrl(Story.Url)
- link.Field = 2
+ link := getURL(Story.Url, 2)
Story.Links = append(Story.Links, link)
log.Info("match youtube host")
@@ -249,9 +249,7 @@ func getStory(id int) (Story, bool) {
return Story, false
}
if is_movie {
- var link Link
- link.Url = normalizeUrl(Story.Url)
- link.Field = 1
+ link := getURL(Story.Url, 1)
Story.Links = append(Story.Links, link)
log.Info("match moview platform url")
@@ -271,9 +269,7 @@ func getStory(id int) (Story, bool) {
if is_video {
if !duplicates[Story.Url] {
- var link Link
- link.Url = normalizeUrl(Story.Url)
- link.Field = 2
+ link := getURL(Story.Url, 2)
Story.Links = append(Story.Links, link)
log.Info("match video title")
@@ -295,9 +291,7 @@ func getStory(id int) (Story, bool) {
if is_movie {
if !duplicates[Story.Url] {
- var link Link
- link.Url = normalizeUrl(Story.Url)
- link.Field = 1
+ link := getURL(Story.Url, 1)
Story.Links = append(Story.Links, link)
log.Info("match moview platform url")
@@ -362,9 +356,7 @@ func getStory(id int) (Story, bool) {
if is_video {
if !duplicates[l] {
- var link Link
- link.Url = normalizeUrl(l)
- link.Field = 2
+ link := getURL(l, 2)
Story.Links = append(Story.Links, link)
log.Info("match youtube text")
@@ -387,9 +379,7 @@ func getStory(id int) (Story, bool) {
if is_movie {
if !duplicates[l] {
- var link Link
- link.Url = normalizeUrl(l)
- link.Field = 1
+ link := getURL(l, 1)
Story.Links = append(Story.Links, link)
log.Info("match moview platform text")
@@ -410,6 +400,210 @@ func getStory(id int) (Story, bool) {
}
}
+func getURL(_url string, field int) Link {
+
+ _url = normalizeUrl(_url)
+
+ var link Link
+
+ u, err := url.Parse(_url)
+ if err != nil {
+ log.Warnf("getURL: Parsing URL failed: %s \n", err.Error())
+ return link
+ }
+
+ link.Field = field
+ link.Url = _url
+ link.Host = u.Host
+
+ switch u.Host {
+ case "www.youtube.com",
+ "music.youtube.com",
+ "www.music.youtube.com":
+ //log.Warn(_url)
+ if strings.HasPrefix(u.Path, "/@") || strings.HasPrefix(u.Path, "/c/") || strings.HasPrefix(u.Path, "/user/") || strings.HasPrefix(u.Path, "/channel/") {
+ link.Type = "channel"
+
+ path_parts := strings.Split(u.Path, "/")
+
+ if strings.HasPrefix(u.Path, "/@") {
+ link.Param = strings.TrimPrefix(path_parts[1], "@")
+ } else {
+ link.Param = path_parts[2]
+ }
+
+ } else if strings.HasPrefix(u.Path, "/shorts") || strings.HasPrefix(u.Path, "/live") || strings.HasPrefix(u.Path, "/embed") {
+ link.Type = "video"
+
+ path_parts := strings.Split(u.Path, "/")
+
+ if len(path_parts) > 2 {
+ link.Param = path_parts[2]
+ } else {
+ link.Param = ""
+ link.Type = "null"
+ }
+
+ } else if strings.HasPrefix(u.Path, "/playlist") {
+ link.Type = "playlist"
+ m, err := url.ParseQuery(u.RawQuery)
+ if err != nil {
+ log.Warnf("getURL: Parsing RawQuery for Youtube failed: %s \n", err.Error())
+ return link
+ }
+
+ /**
+ * ?list= includes the playlist id
+ */
+
+ p, ok := m["list"]
+ if !ok {
+ log.Warnf("getURL: Playlist: Youtube has no param: %s \n", link.Url)
+ } else {
+ link.Param = p[0]
+ }
+
+ } else if strings.HasPrefix(u.Path, "/watch/") {
+
+ link.Type = "video"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/watch") {
+
+ link.Type = "video"
+
+ m, err := url.ParseQuery(u.RawQuery)
+ if err != nil {
+ log.Warnf("getURL: Parsing RawQuery for Youtube failed: %s \n", err.Error())
+ return link
+ }
+
+ /**
+ * ?v= includes the video id
+ */
+ p, ok := m["v"]
+ if !ok {
+ //log.Infof("getURL: v=VideoID: Youtube has no param: %s \n", link.Url)
+ log.Warnf("getURL: v=VideoID: Youtube has no param: %s \n", link.Url)
+ } else {
+ link.Param = p[0]
+ }
+ } else if u.Path == "/" || u.Path == "" {
+ link.Type = "null"
+ link.Param = ""
+ } else {
+ link.Type = "channel"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[1]
+ }
+
+ /**
+ * nice debug
+ if link.Param == "" && !strings.HasPrefix(u.Path, "/clip") && !strings.HasPrefix(u.Path, "/results") && u.Path != "/watch" {
+ log.Fatal(link)
+ }
+ */
+ break
+ case "www.imdb.com":
+ //log.Warn(u.Path)
+ if strings.HasPrefix(u.Path, "/title/") {
+
+ link.Type = "film"
+
+ path_parts := strings.Split(u.Path, "/")
+ if strings.HasPrefix(path_parts[1], "tt") {
+ link.Param = path_parts[1]
+ } else {
+ log.Infof("getURL: IMDB: path_parts[1] doesn't have a film id: %s %s %+v", u.Path, path_parts[2], path_parts)
+ log.Info(path_parts)
+ }
+
+ } else if strings.HasPrefix(u.Path, "/tt") {
+ link.Type = "film"
+
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[1]
+
+ } else if strings.HasPrefix(u.Path, "/name/") {
+
+ link.Type = "actor"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/character/") {
+
+ link.Type = "character"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/company/") {
+
+ link.Type = "company"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/video/") || strings.HasPrefix(u.Path, "/videoplayer/") {
+
+ link.Type = "video"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/user/") {
+
+ link.Type = "user"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/news/") || strings.HasPrefix(u.Path, "/board/announcement") {
+
+ link.Type = "news"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/review/") {
+
+ link.Type = "review"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/404") || u.Path == "" || u.Path == "/" || strings.HasPrefix(u.Path, "/interfaces") || strings.HasPrefix(u.Path, "/titanic") || strings.HasPrefix(u.Path, "/freedive/") || strings.HasPrefix(u.Path, "/conditions") || strings.HasPrefix(u.Path, "/help/") || strings.HasPrefix(u.Path, "/showtimes/") || strings.HasPrefix(u.Path, "/1") || strings.Contains(u.Path, "/mediaviewer/") || strings.HasPrefix(u.Path, "/media") || strings.HasPrefix(u.Path, "/licensing") || strings.HasPrefix(u.Path, "/lists") || strings.HasPrefix(u.Path, "/stats") || strings.HasPrefix(u.Path, "/rg/") || strings.HasPrefix(u.Path, "/hackernews") || strings.HasPrefix(u.Path, "/robots.txt") || strings.HasPrefix(u.Path, "/: the prestige") || strings.HasPrefix(u.Path, "/features/") || strings.HasPrefix(u.Path, "/keyword/") {
+
+ link.Type = "null"
+ link.Param = ""
+
+ } else if strings.HasPrefix(u.Path, "/list/") || strings.HasPrefix(u.Path, "/chart/") || strings.Contains(u.Path, "/top-rated-") || strings.HasPrefix(u.Path, "/whats-on-tv/") {
+
+ link.Type = "film_list"
+ path_parts := strings.Split(u.Path, "/")
+ link.Param = path_parts[2]
+
+ } else if strings.HasPrefix(u.Path, "/find") || strings.HasPrefix(u.Path, "/search") || strings.HasPrefix(u.Path, "/filmosearch") {
+
+ link.Type = "imdb_search"
+
+ } else {
+ log.Infof("getURL: IMDB: path does not include title: %s \n", u.Path)
+ //log.Warnf("getURL: IMDB: path does not include title: %s \n", u.Path)
+ log.Fatalf("getURL: IMDB: path does not include title: %s \n", u.Path)
+ }
+ break
+ case "www.vimeo.com":
+ // todo
+ default:
+ //log.Warnf("getURL: Host not recognized. Missing param: %s", u.Host)
+ return link
+ }
+
+ if link.Host == "www.music.youtube.com" {
+ u.Host = "music.youtube.com"
+ link.Url = u.String()
+ link.Host = "music.youtube.com"
+ }
+
+ return link
+}
+
func getResponse(url string) *http.Response {
var err error
var response *http.Response