aboutsummaryrefslogtreecommitdiff
path: root/ytparser.go
diff options
context:
space:
mode:
authorJulian Hurst <julian.hurst92@gmail.com>2020-10-19 11:02:47 +0200
committerJulian Hurst <julian.hurst92@gmail.com>2020-10-19 11:02:47 +0200
commit79fc981011ae28d770fb38a3f9902095151fc265 (patch)
treebef67c8b1515769dc8b49b318aa6f18dcdd244ad /ytparser.go
downloadytparser-79fc981011ae28d770fb38a3f9902095151fc265.tar.gz
Initial commit
Diffstat (limited to 'ytparser.go')
-rw-r--r--ytparser.go188
1 files changed, 188 insertions, 0 deletions
diff --git a/ytparser.go b/ytparser.go
new file mode 100644
index 0000000..1ebd783
--- /dev/null
+++ b/ytparser.go
@@ -0,0 +1,188 @@
+package ytparser
+
+import (
+ "text/template"
+ "encoding/json"
+ "fmt"
+ "net/http"
+ "bytes"
+ "os"
+ "io"
+ "io/ioutil"
+ "strings"
+ "net/url"
+ "regexp"
+)
+
+const windowInitDataString string = "window[\"ytInitialData\"] = "
+const initDataString string = "var ytInitialData = "
+const baseUrl string = "https://youtube.com"
+
+type Item struct {
+ Id string
+ Title string
+ Url string
+ Thumb string
+ ChannelTitle string
+ ChannelUrl string
+ Published string
+}
+
+func (item Item) Format(t *template.Template) string {
+ var b strings.Builder
+ err := t.Execute(&b, item)
+ if err != nil {
+ panic(err)
+ }
+ return b.String()
+}
+
+func (item Item) String() string {
+ return fmt.Sprintf("id: %s, title: %s, url: %s, thumb: %s", item.Id, item.Title, item.Url, item.Thumb)
+}
+
+func parsejson(data string) ([]Item, error) {
+ dec := json.NewDecoder(strings.NewReader(data))
+
+ depth := 0
+ isArray := false
+ isValue := false
+ var items []Item
+ var item Item
+ var names []string
+ nbItems := 0
+ for {
+ tok, err := dec.Token()
+ if err == io.EOF {
+ break
+ } else if err != nil {
+ return items, err
+ }
+
+ switch t := tok.(type) {
+ case json.Delim:
+ if t == '{' {
+ depth++
+ } else if t == '}' {
+ depth--
+ names = names[:depth]
+ }
+ isArray = t == '['
+ isValue = false
+ case string:
+ if !isArray {
+ if !isValue {
+ if t == "videoRenderer" {
+ if nbItems > 0 {
+ items = append(items, item)
+ }
+ item = Item{}
+ nbItems++
+ }
+ if depth > len(names) {
+ names = append(names, t)
+ } else {
+ names[depth - 1] = t
+ }
+ //fmt.Println(t, depth, len(names), names[depth - 1])
+ isValue = true
+ } else {
+ //fmt.Println(names[len(names) - 1])
+ if names[depth-1] == "videoId" {
+ item.Id = t
+ item.Url = fmt.Sprintf("https://youtube.com/watch?v=%s", t)
+ }
+ if depth >= 3 && names[depth-3] == "title" &&
+ names[depth-2] == "runs" &&
+ names[depth-1] == "text" {
+ item.Title = t
+ }
+ if depth >= 3 && names[depth-3] == "ownerText" &&
+ names[depth-2] == "runs" &&
+ names[depth-1] == "text" {
+ item.ChannelTitle = t
+ }
+ if depth >= 6 &&
+ names[depth-6] == "ownerText" &&
+ names[depth-5] == "runs" &&
+ names[depth-4] == "navigationEndpoint" &&
+ names[depth-3] == "commandMetadata" &&
+ names[depth-2] == "webCommandMetadata" &&
+ names[depth-1] == "url" {
+ item.ChannelUrl = baseUrl + t
+ }
+ if depth >= 4 &&
+ names[depth-4] == "videoRenderer" &&
+ names[depth-3] == "thumbnail" &&
+ names[depth-2] == "thumbnails" &&
+ names[depth-1] == "url" {
+ item.Thumb = t
+ }
+ if depth >= 3 &&
+ names[depth-3] == "videoRenderer" &&
+ names[depth-2] == "publishedTimeText" &&
+ names[depth-1] == "simpleText" {
+ item.Published = t
+ }
+ isValue = false
+ }
+ }
+ default:
+ }
+ }
+ return items, nil
+}
+
+func PrintItems(items []Item, format string) {
+ t := template.Must(template.New("items").Parse(format))
+ for _, i := range items {
+ fmt.Println(i.Format(t))
+ }
+}
+
+func request(query string, page int, lang string) (string, error) {
+ q := url.QueryEscape(query)
+ url := fmt.Sprintf("https://www.youtube.com/results?search_query=%s&page=%d&hl=%s", q, page, lang)
+ res, err := http.DefaultClient.Get(url)
+ if err != nil {
+ return "", err
+ }
+ body, err := ioutil.ReadAll(res.Body)
+ if err != nil {
+ return "", err
+ }
+ initString := windowInitDataString
+ idx := bytes.Index(body, []byte(windowInitDataString))
+ if idx == -1 {
+ initString = initDataString
+ idx = bytes.Index(body, []byte(initDataString))
+ }
+ idx += len(initString)
+ startData := body[idx:]
+ pattern := regexp.MustCompile(`; *\n`)
+ loc := pattern.FindIndex(startData)
+ startData = startData[:loc[0]]
+ return string(startData), nil
+}
+
+func isValidData(data string) bool {
+ return data != ""
+}
+
+func Search(query string, page int, lang string) ([]Item, error) {
+ if lang == "" {
+ lang = "en"
+ }
+ var data string = ""
+ var err error
+ for i := 1; i < 4 && !isValidData(data); i++ {
+ if i > 1 {
+ fmt.Fprintf(os.Stderr, "Yt data invalid, retrying (attempt %d)\n", i)
+ }
+ data, err = request(query, page, lang)
+ if err != nil {
+ return nil, err
+ }
+ }
+ return parsejson(data)
+}