diff options
| author | Julian Hurst <julian.hurst92@gmail.com> | 2020-10-19 11:02:47 +0200 |
|---|---|---|
| committer | Julian Hurst <julian.hurst92@gmail.com> | 2020-10-19 11:02:47 +0200 |
| commit | 79fc981011ae28d770fb38a3f9902095151fc265 (patch) | |
| tree | bef67c8b1515769dc8b49b318aa6f18dcdd244ad /ytparser.go | |
| download | ytparser-79fc981011ae28d770fb38a3f9902095151fc265.tar.gz | |
Initial commit
Diffstat (limited to 'ytparser.go')
| -rw-r--r-- | ytparser.go | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/ytparser.go b/ytparser.go new file mode 100644 index 0000000..1ebd783 --- /dev/null +++ b/ytparser.go @@ -0,0 +1,188 @@ +package ytparser + +import ( + "text/template" + "encoding/json" + "fmt" + "net/http" + "bytes" + "os" + "io" + "io/ioutil" + "strings" + "net/url" + "regexp" +) + +const windowInitDataString string = "window[\"ytInitialData\"] = " +const initDataString string = "var ytInitialData = " +const baseUrl string = "https://youtube.com" + +type Item struct { + Id string + Title string + Url string + Thumb string + ChannelTitle string + ChannelUrl string + Published string +} + +func (item Item) Format(t *template.Template) string { + var b strings.Builder + err := t.Execute(&b, item) + if err != nil { + panic(err) + } + return b.String() +} + +func (item Item) String() string { + return fmt.Sprintf("id: %s, title: %s, url: %s, thumb: %s", item.Id, item.Title, item.Url, item.Thumb) +} + +func parsejson(data string) ([]Item, error) { + dec := json.NewDecoder(strings.NewReader(data)) + + depth := 0 + isArray := false + isValue := false + var items []Item + var item Item + var names []string + nbItems := 0 + for { + tok, err := dec.Token() + if err == io.EOF { + break + } else if err != nil { + return items, err + } + + switch t := tok.(type) { + case json.Delim: + if t == '{' { + depth++ + } else if t == '}' { + depth-- + names = names[:depth] + } + isArray = t == '[' + isValue = false + case string: + if !isArray { + if !isValue { + if t == "videoRenderer" { + if nbItems > 0 { + items = append(items, item) + } + item = Item{} + nbItems++ + } + if depth > len(names) { + names = append(names, t) + } else { + names[depth - 1] = t + } + //fmt.Println(t, depth, len(names), names[depth - 1]) + isValue = true + } else { + //fmt.Println(names[len(names) - 1]) + if names[depth-1] == "videoId" { + item.Id = t + item.Url = fmt.Sprintf("https://youtube.com/watch?v=%s", t) + } + if depth >= 3 && names[depth-3] == "title" && + names[depth-2] == "runs" && + names[depth-1] == "text" { + item.Title = t + } + if depth >= 3 && names[depth-3] == "ownerText" && + names[depth-2] == "runs" && + names[depth-1] == "text" { + item.ChannelTitle = t + } + if depth >= 6 && + names[depth-6] == "ownerText" && + names[depth-5] == "runs" && + names[depth-4] == "navigationEndpoint" && + names[depth-3] == "commandMetadata" && + names[depth-2] == "webCommandMetadata" && + names[depth-1] == "url" { + item.ChannelUrl = baseUrl + t + } + if depth >= 4 && + names[depth-4] == "videoRenderer" && + names[depth-3] == "thumbnail" && + names[depth-2] == "thumbnails" && + names[depth-1] == "url" { + item.Thumb = t + } + if depth >= 3 && + names[depth-3] == "videoRenderer" && + names[depth-2] == "publishedTimeText" && + names[depth-1] == "simpleText" { + item.Published = t + } + isValue = false + } + } + default: + } + } + return items, nil +} + +func PrintItems(items []Item, format string) { + t := template.Must(template.New("items").Parse(format)) + for _, i := range items { + fmt.Println(i.Format(t)) + } +} + +func request(query string, page int, lang string) (string, error) { + q := url.QueryEscape(query) + url := fmt.Sprintf("https://www.youtube.com/results?search_query=%s&page=%d&hl=%s", q, page, lang) + res, err := http.DefaultClient.Get(url) + if err != nil { + return "", err + } + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + initString := windowInitDataString + idx := bytes.Index(body, []byte(windowInitDataString)) + if idx == -1 { + initString = initDataString + idx = bytes.Index(body, []byte(initDataString)) + } + idx += len(initString) + startData := body[idx:] + pattern := regexp.MustCompile(`; *\n`) + loc := pattern.FindIndex(startData) + startData = startData[:loc[0]] + return string(startData), nil +} + +func isValidData(data string) bool { + return data != "" +} + +func Search(query string, page int, lang string) ([]Item, error) { + if lang == "" { + lang = "en" + } + var data string = "" + var err error + for i := 1; i < 4 && !isValidData(data); i++ { + if i > 1 { + fmt.Fprintf(os.Stderr, "Yt data invalid, retrying (attempt %d)\n", i) + } + data, err = request(query, page, lang) + if err != nil { + return nil, err + } + } + return parsejson(data) +} |
