Platinium
Platinium

Reputation: 98

YouTube API - retrieve more than 5k items

I just want to fetch all my liked videos ~25k items. as far as my research goes this is not possible via the YouTube v3 API.

I have already found multiple issues (issue, issue) on the same problem, though some claim to have fixed it, but it only works for them as they don't have < 5000 items in their liked video list.

playlistItems list API endpoint with playlist id set to "liked videos" (LL) has a limit of 5000.

videos list API endpoint has a limit of 1000.

Unfortunately those endpoints don't provide me with parameters that I could use to paginate the requests myself (e.g. give me all the liked videos between date x and y), so I'm forced to take the provided order (which I can't get past 5k entries).

Is there any possibility I can fetch all my likes via the API?

Upvotes: 3

Views: 816

Answers (3)

DerekDrew
DerekDrew

Reputation: 11

Version that will grab Likes, Dislikes and also the secondary headline (or sub-title or sub-head or channel), the latter being useful sometimes for music because it can help identify the artist if the artist name is not in the title itself but in the sub-head or channel name instead. Fixed invalid video IDs (with a URL instead of a video ID title) "www..." breaking this. After you get your Json do this: Excel > Data > Get Data > From File > From Json > Convert into Table > Click on the 2nd column VALUE (double arrow box) > EXTRACT VALUES ... > Choose the TAB delimiter > Select the Value Column then SPLIT COLUMN tool up in the ribbon > By Delimiter > TAB > Close and Load.

    // https://www.youtube.com/watch?v=eZPXmCIQW5M
// https://myactivity.google.com/page?utm_source=my-activity&hl=en&page=youtube_likes 



// go over all "cards" in the activity webpage. (after scrolling down to the absolute bottom of it)
// create a dictionary - the key is the Video ID, the value is a list of the video's properties
function collector(all_cards) { 
    var videos = {};
    all_cards.forEach(card => {
        let actionText = card.innerText;
        // Check if the action text explicitly contains "liked https://www.youtube.com"
        if (actionText.includes("liked https://www.youtube.com")) {
            return; // Skip this card if it matches the specific action text condition
        }

        let action = actionText.split("\n")[1].split(" ")[0];
        if (action === "Liked" || action === "Disliked") {
            let a_links = card.querySelectorAll("a");
            let details = a_links[0];
            let url = details.href.split("?v=")[1];
            let video_length = a_links[3] ? a_links[3].innerText : "Unknown";
            let time = a_links[2] ? a_links[2].parentElement.innerText.split(" • ")[0] : "Unknown";
            let title = details.innerText;
            let date = card.closest("[data-date]") ? card.closest("[data-date]").getAttribute("data-date") : "Unknown";
            let secondary_headline = card.innerText.split("\n")[2].replace(/"/g, "");
            videos[url] = [title, video_length, date, time, action, secondary_headline];
        }
    });

    return videos;
}


// https://stackoverflow.com/questions/57709550/how-to-download-text-from-javascript-variable-on-all-browsers
function download(filename, text, type = "text/plain") {
    // Create an invisible A element
    const a = document.createElement("a");
    a.style.display = "none";
    document.body.appendChild(a);

    // Set the HREF to a Blob representation of the data to be downloaded
    a.href = window.URL.createObjectURL(
        new Blob([text], { type })
    );
    
    // Use download attribute to set set desired file name
    a.setAttribute("download", filename);

    // Trigger the download by simulating click
    a.click();

    // Cleanup
    window.URL.revokeObjectURL(a.href);
    document.body.removeChild(a);
}

function main() {
    // gather relevant elements
    var all_cards = document.querySelectorAll("div[aria-label='Card showing an activity from YouTube']")
    var liked_videos = collector(all_cards)
    // download json
    download("liked_videos.json", JSON.stringify(liked_videos))

}

main()

Upvotes: 1

Platinium
Platinium

Reputation: 98

more thoughts to the reply from @Yarin_007

  • if there are deleted videos in the timeline they appear as "Liked https://...url" , the script doesnt like that format and fails as the underlying elements dont have the same structure as existing videos can be easily fixed with a try catch
function collector(all_cards) { 
    var liked_videos = {};
    all_cards.forEach(card => {
        try {
            // ignore Dislikes
            if (card.innerText.split("\n")[1].startsWith("Liked")) {
                ....
            }
        }
        catch {
            console.log("error, prolly deleted video")
        }
    })

    return liked_videos;
}
  • to scroll down to the bottom of the page ive used this simple script, no need to spin up something big
var millisecondsToWait = 1000;
setInterval(function() {
    window.scrollTo(0, document.body.scrollHeight);
    console.log("scrolling")
}, millisecondsToWait);
  • when more ppl want to retrive this kind of data, one could think about building a proper script that is more convenient to use. If you check the network requests you can find the desired data in the response of requests called batchexecute. One could copy the authentification of one of them provide them to a script that queries those endpoints and prepares the data like the other script i currently manually inject.

Upvotes: 4

Yarin_007
Yarin_007

Reputation: 1598

Hmm. perhaps Google Takeout?

takeout

I have verified the youtube data contains a csv called "liked videos.csv". The header is Video Id,Time Added, and the rows are

dQw4w9WgXcQ,2022-12-18 23:42:19 UTC

prvXCuEA1lw,2022-12-24 13:22:13 UTC

for example.

So you would need to retrieve video metadata per video ID. Not too bad though.

Note: the export could take a while, especially with 25k videos. (select only YouTube data)

I also had an idea that involves scraping the actual liked videos page (which would save you 25k HTTP Requests). But I'm unsure if it breaks with more than 5000 songs. (also, emulating the POST requests on that page may prove quite difficult, albeit not impossible. (they fetch /browse?key=..., and have some kind of obfuscated / encrypted base64 strings in the request-body, among other parameters)


EDIT:

Look. There's probably a normal way to get a complete dump of all you google data. (i mean, other than takeout. Email them? idk.) anyway, the following is the other idea...

  1. Follow this deep link to your liked videos history.

  2. Scroll to the bottom... maybe with selenium, maybe with autoit, maybe put something on the "end" key of your keyboard until you reach your first liked video.

  3. Hit f12 and run this in the developer console

// https://www.youtube.com/watch?v=eZPXmCIQW5M
// https://myactivity.google.com/page?utm_source=my-activity&hl=en&page=youtube_likes 



// go over all "cards" in the activity webpage. (after scrolling down to the absolute bottom of it)
// create a dictionary - the key is the Video ID, the value is a list of the video's properties
function collector(all_cards) { 
    var liked_videos = {};
    all_cards.forEach(card => {
        // ignore Dislikes
        if (card.innerText.split("\n")[1].startsWith("Liked")) {
            // horrible parsing. your mileage may vary. I Tried to avoid using any gibberish class names.
            let a_links = card.querySelectorAll("a")
            let details = a_links[0];
            let url = details.href.split("?v=")[1]
            let video_length = a_links[3].innerText;
            let time = a_links[2].parentElement.innerText.split(" • ")[0];
            let title = details.innerText;
            let date = card.closest("[data-date]").getAttribute("data-date")
            liked_videos[url] = [title,video_length, date, time];
            // console.log(title, video_length, date, time, url);
        }
    })

    return liked_videos;
}


// https://stackoverflow.com/questions/57709550/how-to-download-text-from-javascript-variable-on-all-browsers
function download(filename, text, type = "text/plain") {
    // Create an invisible A element
    const a = document.createElement("a");
    a.style.display = "none";
    document.body.appendChild(a);

    // Set the HREF to a Blob representation of the data to be downloaded
    a.href = window.URL.createObjectURL(
        new Blob([text], { type })
    );
    
    // Use download attribute to set set desired file name
    a.setAttribute("download", filename);

    // Trigger the download by simulating click
    a.click();

    // Cleanup
    window.URL.revokeObjectURL(a.href);
    document.body.removeChild(a);
}

function main() {
    // gather relevant elements
    var all_cards = document.querySelectorAll("div[aria-label='Card showing an activity from YouTube']")
    var liked_videos = collector(all_cards)
    // download json
    download("liked_videos.json", JSON.stringify(liked_videos))

}

main()



Basically it gathers all the liked videos' details and creates a key: video_ID - Value: [title,video_length, date, time] object for each liked video.

It then automatically downloads the json as a file.

Upvotes: 4

Related Questions