From 54eaffd9dd11a893e4ec72e2e57be6d1870f5f70 Mon Sep 17 00:00:00 2001 From: Julian Hurst Date: Fri, 16 Oct 2020 12:00:19 +0200 Subject: [ytparser]: Fix parsing issues --- ytparser/ytparser.go | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/ytparser/ytparser.go b/ytparser/ytparser.go index fe9ae66..1ebd783 100644 --- a/ytparser/ytparser.go +++ b/ytparser/ytparser.go @@ -6,13 +6,16 @@ import ( "fmt" "net/http" "bytes" + "os" "io" "io/ioutil" "strings" "net/url" + "regexp" ) -const initDataString string = "window[\"ytInitialData\"] = " +const windowInitDataString string = "window[\"ytInitialData\"] = " +const initDataString string = "var ytInitialData = " const baseUrl string = "https://youtube.com" type Item struct { @@ -53,7 +56,7 @@ func parsejson(data string) ([]Item, error) { if err == io.EOF { break } else if err != nil { - return nil, err + return items, err } switch t := tok.(type) { @@ -148,21 +151,38 @@ func request(query string, page int, lang string) (string, error) { if err != nil { return "", err } - idx := bytes.Index(body, []byte(initDataString)) - idx += len(initDataString) + initString := windowInitDataString + idx := bytes.Index(body, []byte(windowInitDataString)) + if idx == -1 { + initString = initDataString + idx = bytes.Index(body, []byte(initDataString)) + } + idx += len(initString) startData := body[idx:] - idx = bytes.Index(startData, []byte(";\n")) - startData = startData[:idx] + pattern := regexp.MustCompile(`; *\n`) + loc := pattern.FindIndex(startData) + startData = startData[:loc[0]] return string(startData), nil } +func isValidData(data string) bool { + return data != "" +} + func Search(query string, page int, lang string) ([]Item, error) { if lang == "" { lang = "en" } - data, err := request(query, page, lang) - if err != nil { - return nil, err + var data string = "" + var err error + for i := 1; i < 4 && !isValidData(data); i++ { + if i > 1 { + fmt.Fprintf(os.Stderr, "Yt data invalid, retrying (attempt %d)\n", i) + } + data, err = request(query, page, lang) + if err != nil { + return nil, err + } } return parsejson(data) } -- cgit v1.2.3