diff options
| author | Julian Hurst <julian.hurst92@gmail.com> | 2020-10-16 12:00:19 +0200 |
|---|---|---|
| committer | Julian Hurst <julian.hurst92@gmail.com> | 2020-10-16 12:00:19 +0200 |
| commit | 54eaffd9dd11a893e4ec72e2e57be6d1870f5f70 (patch) | |
| tree | ce99eb34d178322c095c4645dc3009525b659584 | |
| parent | e949e31dcb6959b6352f2f9123b945e7432da7cf (diff) | |
| download | grimtube-54eaffd9dd11a893e4ec72e2e57be6d1870f5f70.tar.gz | |
[ytparser]: Fix parsing issues
| -rw-r--r-- | ytparser/ytparser.go | 38 |
1 files changed, 29 insertions, 9 deletions
diff --git a/ytparser/ytparser.go b/ytparser/ytparser.go index fe9ae66..1ebd783 100644 --- a/ytparser/ytparser.go +++ b/ytparser/ytparser.go @@ -6,13 +6,16 @@ import ( "fmt" "net/http" "bytes" + "os" "io" "io/ioutil" "strings" "net/url" + "regexp" ) -const initDataString string = "window[\"ytInitialData\"] = " +const windowInitDataString string = "window[\"ytInitialData\"] = " +const initDataString string = "var ytInitialData = " const baseUrl string = "https://youtube.com" type Item struct { @@ -53,7 +56,7 @@ func parsejson(data string) ([]Item, error) { if err == io.EOF { break } else if err != nil { - return nil, err + return items, err } switch t := tok.(type) { @@ -148,21 +151,38 @@ func request(query string, page int, lang string) (string, error) { if err != nil { return "", err } - idx := bytes.Index(body, []byte(initDataString)) - idx += len(initDataString) + initString := windowInitDataString + idx := bytes.Index(body, []byte(windowInitDataString)) + if idx == -1 { + initString = initDataString + idx = bytes.Index(body, []byte(initDataString)) + } + idx += len(initString) startData := body[idx:] - idx = bytes.Index(startData, []byte(";\n")) - startData = startData[:idx] + pattern := regexp.MustCompile(`; *\n`) + loc := pattern.FindIndex(startData) + startData = startData[:loc[0]] return string(startData), nil } +func isValidData(data string) bool { + return data != "" +} + func Search(query string, page int, lang string) ([]Item, error) { if lang == "" { lang = "en" } - data, err := request(query, page, lang) - if err != nil { - return nil, err + var data string = "" + var err error + for i := 1; i < 4 && !isValidData(data); i++ { + if i > 1 { + fmt.Fprintf(os.Stderr, "Yt data invalid, retrying (attempt %d)\n", i) + } + data, err = request(query, page, lang) + if err != nil { + return nil, err + } } return parsejson(data) } |
