package ytparser import ( "text/template" "encoding/json" "fmt" "net/http" "bytes" "os" "io" "io/ioutil" "strings" "net/url" "regexp" ) const windowInitDataString string = "window[\"ytInitialData\"] = " const initDataString string = "var ytInitialData = " const baseUrl string = "https://youtube.com" type Item struct { Id string Title string Url string Thumb string ChannelTitle string ChannelUrl string Published string } func (item Item) Format(t *template.Template) string { var b strings.Builder err := t.Execute(&b, item) if err != nil { panic(err) } return b.String() } func (item Item) String() string { return fmt.Sprintf("id: %s, title: %s, url: %s, thumb: %s", item.Id, item.Title, item.Url, item.Thumb) } func parsejson(data string) ([]Item, error) { dec := json.NewDecoder(strings.NewReader(data)) depth := 0 isArray := false isValue := false var items []Item var item Item var names []string nbItems := 0 for { tok, err := dec.Token() if err == io.EOF { break } else if err != nil { return items, err } switch t := tok.(type) { case json.Delim: if t == '{' { depth++ } else if t == '}' { depth-- names = names[:depth] } isArray = t == '[' isValue = false case string: if !isArray { if !isValue { if t == "videoRenderer" { if nbItems > 0 { items = append(items, item) } item = Item{} nbItems++ } if depth > len(names) { names = append(names, t) } else { names[depth - 1] = t } //fmt.Println(t, depth, len(names), names[depth - 1]) isValue = true } else { //fmt.Println(names[len(names) - 1]) if names[depth-1] == "videoId" { item.Id = t item.Url = fmt.Sprintf("https://youtube.com/watch?v=%s", t) } if depth >= 3 && names[depth-3] == "title" && names[depth-2] == "runs" && names[depth-1] == "text" { item.Title = t } if depth >= 3 && names[depth-3] == "ownerText" && names[depth-2] == "runs" && names[depth-1] == "text" { item.ChannelTitle = t } if depth >= 6 && names[depth-6] == "ownerText" && names[depth-5] == "runs" && names[depth-4] == "navigationEndpoint" && names[depth-3] == "commandMetadata" && names[depth-2] == "webCommandMetadata" && names[depth-1] == "url" { item.ChannelUrl = baseUrl + t } if depth >= 4 && names[depth-4] == "videoRenderer" && names[depth-3] == "thumbnail" && names[depth-2] == "thumbnails" && names[depth-1] == "url" { item.Thumb = t } if depth >= 3 && names[depth-3] == "videoRenderer" && names[depth-2] == "publishedTimeText" && names[depth-1] == "simpleText" { item.Published = t } isValue = false } } default: } } return items, nil } func PrintItems(items []Item, format string) { t := template.Must(template.New("items").Parse(format)) for _, i := range items { fmt.Println(i.Format(t)) } } func request(query string, page int, lang string) (string, error) { q := url.QueryEscape(query) url := fmt.Sprintf("https://www.youtube.com/results?search_query=%s&page=%d&hl=%s", q, page, lang) res, err := http.DefaultClient.Get(url) if err != nil { return "", err } body, err := ioutil.ReadAll(res.Body) if err != nil { return "", err } initString := windowInitDataString idx := bytes.Index(body, []byte(windowInitDataString)) if idx == -1 { initString = initDataString idx = bytes.Index(body, []byte(initDataString)) } idx += len(initString) startData := body[idx:] pattern := regexp.MustCompile(`; *\n`) loc := pattern.FindIndex(startData) startData = startData[:loc[0]] return string(startData), nil } func isValidData(data string) bool { return data != "" } func Search(query string, page int, lang string) ([]Item, error) { if lang == "" { lang = "en" } var data string = "" var err error for i := 1; i < 4 && !isValidData(data); i++ { if i > 1 { fmt.Fprintf(os.Stderr, "Yt data invalid, retrying (attempt %d)\n", i) } data, err = request(query, page, lang) if err != nil { return nil, err } } return parsejson(data) }