Allow filtering scrobble uploads by date

Necessary to avoid double scrobbling if uploading from two sources (i.e. my Spotify data [2016-2023] and my last.fm data [2019-present])
2025-02-21 11:54:02 -05:00 · 2025-02-21 11:54:02 -05:00 · 7957345057
commit 7957345057
parent ab01f2e213
6 changed files with 90 additions and 26 deletions
--- a/build.zig
+++ b/build.zig
@ -13,6 +13,9 @@ pub fn build(b: *std.Build) !void {
    });

    // Example dependency:
+    //
+    const zig_time_dep = b.dependency("zeit", .{});
+    exe.root_module.addImport("zeit", zig_time_dep.module("zeit"));

    // All dependencies **must** be added to imports above this line.

--- a/src/app/database/Schema.zig
+++ b/src/app/database/Schema.zig
@ -188,13 +188,31 @@ pub const Songartist = jetquery.Model(
    },
 );

-pub const Albumsong = jetquery.Model(@This(), "Albumsongs", struct {
+pub const Albumsong = jetquery.Model(
+    @This(),
+    "Albumsongs",
+    struct {
        id: i32,
        album_id: i32,
        song_id: i32,
        created_at: jetquery.DateTime,
        updated_at: jetquery.DateTime,
-}, .{ .relations = .{
+    },
+    .{
+        .relations = .{
            .album = jetquery.belongsTo(.Album, .{}),
            .song = jetquery.belongsTo(.Song, .{}),
+        },
+    },
+);
+
+pub const Scrobbleartist = jetquery.Model(@This(), "Scrobbleartists", struct {
+    id: i32,
+    scrobble_id: i32,
+    artist_id: i32,
+    created_at: jetquery.DateTime,
+    updated_at: jetquery.DateTime,
+}, .{ .relations = .{
+    .scrobble = jetquery.belongsTo(.Scrobble, .{}),
+    .artist = jetquery.belongsTo(.Artist, .{}),
 } });
--- a/src/app/database/migrations/2025-02-21_14-24-31_create_scrobbleartists.zig
+++ b/src/app/database/migrations/2025-02-21_14-24-31_create_scrobbleartists.zig
@ -0,0 +1,20 @@
+const std = @import("std");
+const jetquery = @import("jetquery");
+const t = jetquery.schema.table;
+
+pub fn up(repo: anytype) !void {
+    try repo.createTable(
+        "Scrobbleartists",
+        &.{
+            t.primaryKey("id", .{}),
+            t.column("scrobble_id", .integer, .{}),
+            t.column("artist_id", .integer, .{}),
+            t.timestamps(.{}),
+        },
+        .{},
+    );
+}
+
+pub fn down(repo: anytype) !void {
+    try repo.dropTable("Scrobbleartists", .{});
+}
--- a/src/app/jobs/process_scrobbles.zig
+++ b/src/app/jobs/process_scrobbles.zig
@ -20,7 +20,7 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
    if (params.getT(.array, "scrobbles")) |scrobbles| {
        for (scrobbles.items()) |item| {
            //const fixed_date: u32 = @as(u32, item.getT(.integer, "date").?);
-            const scrobble: Scrobble = .{ .track = item.getT(.string, "track").?, .artist = item.getT(.string, "artist").?, .album = item.getT(.string, "album") orelse "empty", .date = @as(u64, @bitCast(@as(i64, @truncate(@divTrunc(item.getT(.integer, "date").?, 1000))))) };
+            const scrobble: Scrobble = .{ .track = item.getT(.string, "track").?, .artist = item.getT(.string, "artist").?, .album = item.getT(.string, "album") orelse "", .date = @as(u64, @bitCast(@as(i64, @truncate(item.getT(.integer, "date").? * 1000)))) };

            // Make hashes
            const album_hash = @as(i32, @bitCast(std.hash.Fnv1a_32.hash(scrobble.album)));
@ -33,7 +33,11 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
            //          the IDs also depend on the hash of the album
            //          they're on, as well as the artist name. As far
            //          as I can tell, this is only as issue for Sufjan
-            //          Steven's `Songs for Christmas`.
+            //          Steven's `Songs for Christmas`. (In practice.
+            //          In reality, there are albums with several untitled
+            //          songs (Selected Ambient Works Vol. II by Aphex Twin,
+            //           ( ) by Sigur Ros, ...) that have working titles
+            //          in their place.)

            // Album:   If the album is not self-titled, then
            //          album hash XOR artist hash. This way, if two
@ -65,6 +69,10 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
            const artist_check = try jetzig.database.Query(.Artist).find(artist_id).execute(env.repo);
            const song_check = try jetzig.database.Query(.Song).find(song_id).execute(env.repo);

+            // I think there must be a better way to do this next part
+            // There are very few situations where artist_check is null
+            // but song_check/album is not. Also yes, the order of these
+            // checks is weird, I didn't put a lot of thought into it
            var associative_table_flags: [3]bool = [3]bool{ true, true, true };

            if (album_check == null) {
@ -88,25 +96,30 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
                if (associative_table_flags[2]) try jetzig.database.Query(.Songartist).insert(.{ .song_id = song_id, .artist_id = artist_id }).execute(env.repo);
            }

-            //try env.repo.execute(album_insert);
-            //try env.repo.execute(song_insert);
-            // Checks
-
-            // if (album_check == 0) try env.repo.execute(album_insert);
-            // if (artist_check == 0) try env.repo.execute(artist_insert);
-            // if (song_check == 0) try env.repo.execute(song_insert);
-
-            //const scrobble_offset = try jetzig.database.Query(.Scrobble).select(.{}).count().execute(env.repo) orelse unreachable;
-            //try jetzig.database.Query(.Scrobble).insert(.{ .id = scrobble_offset + 1, .song_id = song_id, .album_id = album_id, .artist_id = artist_id, .date = scrobble.date }).execute(env.repo);
+            try jetzig.database.Query(.Scrobble).insert(.{ .song_id = song_id, .album_id = album_id, .date = scrobble.date }).execute(env.repo);
        }
    }

-    const query = jetzig.database.Query(.Artist).include(.artistalbums, .{});
-    const results = try env.repo.all(query);
-    defer env.repo.free(results);
-    for (results) |result| {
-        for (result.artistalbums) |artistalbum| {
-            std.log.debug("{s}: {any}", .{ result.name, artistalbum.album_id });
-        }
-    }
+    // I would like to replicate this kind of functionality for several kinds of queries
+    // This one gives me all albums by Dream Theater (it also returns Dream Theater for
+    // each entry, but removing artists.name from the SELECT would remove that)
+    //
+    // SELECT
+    // artists.name, albums.name
+    // FROM
+    // "Albumartists"
+    // INNER JOIN artists
+    // ON "Albumartists".artist_id = artists.id
+    // INNER JOIN albums
+    // ON "Albumartists".album_id = albums.id
+    // WHERE artists.name = 'Dream Theater';
+
+    //const query = jetzig.database.Query(.Artist).include(.artistalbums, .{});
+    //const results = try env.repo.all(query);
+    //defer env.repo.free(results);
+    //for (results) |result| {
+    //    for (result.artistalbums) |artistalbum| {
+    //        std.log.debug("{s}: {any}", .{ result.name, artistalbum.album_id });
+    //    }
+    //}
 }
--- a/src/app/views/upload.zig
+++ b/src/app/views/upload.zig
@ -3,6 +3,7 @@ const jetzig = @import("jetzig");
 const jetquery = @import("jetzig").jetquery;
 const Scrobble = @import("../../types.zig").LastFMScrobble;
 const lastfm = @import("../../types.zig").LastFM;
+const zeit = @import("zeit");

 pub fn index(request: *jetzig.Request, data: *jetzig.Data) !jetzig.View {
    _ = data;
@ -27,7 +28,15 @@ pub fn post(request: *jetzig.Request) !jetzig.View {
        var job = try request.job("process_scrobbles");
        var scrobbles_data = try job.params.put("scrobbles", .array);

-        for (content.scrobbles) |scrobble| {
+        const params = try request.params();
+        const limiting_date_string: ?[]const u8 = if (params.get("l")) |param| param.string.value else null;
+        const limiting_date_instant: ?zeit.Instant = if (limiting_date_string) |str| try zeit.instant(.{ .source = .{ .iso8601 = str } }) else null;
+        // This is seconds from Unix epoch
+        const limiting_date_epoch = if (limiting_date_instant) |time| time.unixTimestamp() else 9_223_372_036_854_775_807;
+
+        appends: for (content.scrobbles) |scrobble| {
+            // Scrobble.date is in milliseconds from Unix epoch
+            if (scrobble.date < limiting_date_epoch * 1000) continue :appends;
            var value = try scrobbles_data.append(.object);
            // This is so unnecessary, probably useful once I start doing Spotify integration though
            inline for (std.meta.fields(Scrobble)) |f| {
--- a/src/app/views/upload/index.zmpl
+++ b/src/app/views/upload/index.zmpl
@ -16,6 +16,7 @@
  <fieldset>
  <input type="radio" name="t" label="Last.fm">Last.fm</input>
  <input type="radio" name="t" label="Spotify">Spotify</input>
+  Upload Scrobbles after: <input type="datetime-local" name="l" label="date"></input>
  </fieldset>
 </form>
 </body>