diff options
Diffstat (limited to 'ytparser/ytparser.go')
| -rw-r--r-- | ytparser/ytparser.go | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/ytparser/ytparser.go b/ytparser/ytparser.go new file mode 100644 index 0000000..ec89406 --- /dev/null +++ b/ytparser/ytparser.go @@ -0,0 +1,163 @@ +package ytparser + +import ( + "text/template" + "encoding/json" + "fmt" + "net/http" + "bytes" + "io" + "io/ioutil" + "strings" + "net/url" +) + +const initDataString string = "window[\"ytInitialData\"] = " +const baseUrl string = "https://youtube.com" + +type Item struct { + Id string + Title string + Url string + Thumb string + ChannelTitle string + ChannelUrl string +} + +func (item Item) Format(t *template.Template) string { + var b strings.Builder + err := t.Execute(&b, item) + if err != nil { + panic(err) + } + return b.String() +} + +func (item Item) String() string { + return fmt.Sprintf("id: %s, title: %s, url: %s, thumb: %s", item.Id, item.Title, item.Url, item.Thumb) +} + +func parsejson(data string) []Item { + //fmt.Println(data) + dec := json.NewDecoder(strings.NewReader(data)) + + + depth := 0 + isArray := false + isValue := false + var items []Item + var item Item + var names []string + nbItems := 0 + for { + tok, err := dec.Token() + if err == io.EOF { + break + } else if err != nil { + panic(err) + } + + switch t := tok.(type) { + case json.Delim: + if t == '{' { + depth++ + } else if t == '}' { + depth-- + names = names[:depth] + } + isArray = t == '[' + isValue = false + case string: + if !isArray { + if !isValue { + if t == "videoRenderer" { + if nbItems > 0 { + items = append(items, item) + } + item = Item{} + nbItems++ + } + if depth > len(names) { + names = append(names, t) + } else { + names[depth - 1] = t + } + //fmt.Println(t, depth, len(names), names[depth - 1]) + isValue = true + } else { + //fmt.Println(names[len(names) - 1]) + if names[depth-1] == "videoId" { + item.Id = t + item.Url = fmt.Sprintf("https://youtube.com/watch?v=%s", t) + } + if depth >= 3 && names[depth-3] == "title" && + names[depth-2] == "runs" && + names[depth-1] == "text" { + item.Title = t + } + if depth >= 3 && names[depth-3] == "ownerText" && + names[depth-2] == "runs" && + names[depth-1] == "text" { + item.ChannelTitle = t + } + if depth >= 6 && + names[depth-6] == "ownerText" && + names[depth-5] == "runs" && + names[depth-4] == "navigationEndpoint" && + names[depth-3] == "commandMetadata" && + names[depth-2] == "webCommandMetadata" && + names[depth-1] == "url" { + item.ChannelUrl = baseUrl + t + } + if depth >= 4 && + names[depth-4] == "videoRenderer" && + names[depth-3] == "thumbnail" && + names[depth-2] == "thumbnails" && + names[depth-1] == "url" { + item.Thumb = t + } + isValue = false + } + } + default: + } + } + return items + //fmt.Println(names) +} + +func PrintItems(items []Item, format string) { + t := template.Must(template.New("items").Parse(format)) + for _, i := range items { + fmt.Println(i.Format(t)) + //fmt.Println(i) + } +} + +func request(query string, page int) (string, error) { + q := url.QueryEscape(query) + url := fmt.Sprintf("https://www.youtube.com/results?search_query=%s&page=%d", q, page) + res, err := http.DefaultClient.Get(url) + if err != nil { + return "", err + } + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", err + } + idx := bytes.Index(body, []byte(initDataString)) + idx += len(initDataString) + startData := body[idx:] + idx = bytes.Index(startData, []byte(";\n")) + startData = startData[:idx] + //fmt.Println(string(startData)) + return string(startData), nil +} + +func Search(query string, page int) []Item { + data, err := request(query, page) + if err != nil { + panic(err) + } + return parsejson(data) +} |
