scrobbleIngest

The code sucks. The functionality isn't quite what we hoped for. Dates are formatted incorrectly. There are practically no comments. It's not modular whatsoever. I lost several years of my life trying to make this work. LGTM
2025-07-19 18:25:10 -04:00 · 2025-07-19 18:25:10 -04:00 · 5f451868af
commit 5f451868af
parent 2a42e07df0
8 changed files with 346 additions and 372 deletions
--- a/src/app/jobs/add_album.zig
+++ b/src/app/jobs/add_album.zig
@ -0,0 +1,31 @@
+const std = @import("std");
+const jetzig = @import("jetzig");
+
+// The `run` function for a job is invoked every time the job is processed by a queue worker
+// (or by the Jetzig server if the job is processed in-line).
+//
+// Arguments:
+// * allocator: Arena allocator for use during the job execution process.
+// * params:    Params assigned to a job (from a request, values added to response data).
+// * env:       Provides the following fields:
+//              - logger:      Logger attached to the same stream as the Jetzig server.
+//              - environment: Enum of `{ production, development }`.
+pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
+    _ = allocator;
+    const artists = params.getT(.array, "artists").?.items();
+    const album_id = try (params.get("album_hash").?).coerce(u64);
+
+    for (artists) |artist| {
+        const artist_name = try artist.coerce([]const u8);
+        const artist_id = std.hash.Fnv1a_64.hash(artist_name);
+        const paired = @as(i64, @bitCast(@mod(@divFloor((artist_id +% album_id) *% (artist_id +% album_id +% 1), 2) +% album_id, std.math.maxInt(u64))));
+        const aa_query = try jetzig.database.Query(.Artistalbum)
+            .find(paired).execute(env.repo);
+
+        if (aa_query == null) {
+            try jetzig.database.Query(.Artistalbum)
+                .insert(.{ .id = paired, .artist_id = @as(i64, @bitCast(artist_id)), .album_id = @as(i64, @bitCast(album_id)) })
+                .execute(env.repo);
+        }
+    }
+}
--- a/src/app/jobs/add_artist.zig
+++ b/src/app/jobs/add_artist.zig
@ -0,0 +1,25 @@
+const std = @import("std");
+const jetzig = @import("jetzig");
+
+// The `run` function for a job is invoked every time the job is processed by a queue worker
+// (or by the Jetzig server if the job is processed in-line).
+//
+// Arguments:
+// * allocator: Arena allocator for use during the job execution process.
+// * params:    Params assigned to a job (from a request, values added to response data).
+// * env:       Provides the following fields:
+//              - logger:      Logger attached to the same stream as the Jetzig server.
+//              - environment: Enum of `{ production, development }`.
+pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
+    _ = allocator;
+    const artist = params.getT(.string, "artist").?;
+    const id = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist)));
+    const artist_query = try jetzig.database.Query(.Artist)
+        .find(id).execute(env.repo);
+
+    if (artist_query == null) {
+        try jetzig.database.Query(.Artist)
+            .insert(.{ .id = id, .name = artist })
+            .execute(env.repo);
+    }
+}
--- a/src/app/jobs/add_song.zig
+++ b/src/app/jobs/add_song.zig
@ -0,0 +1,34 @@
+const std = @import("std");
+const jetzig = @import("jetzig");
+
+// The `run` function for a job is invoked every time the job is processed by a queue worker
+// (or by the Jetzig server if the job is processed in-line).
+//
+// Arguments:
+// * allocator: Arena allocator for use during the job execution process.
+// * params:    Params assigned to a job (from a request, values added to response data).
+// * env:       Provides the following fields:
+//              - logger:      Logger attached to the same stream as the Jetzig server.
+//              - environment: Enum of `{ production, development }`.
+pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
+    _ = allocator;
+    //const album = params.getT(.string, "album").?;
+    const as_id = try (params.get("as_hash").?).coerce(u64);
+    const album_artists = params.getT(.array, "album_artists").?.items();
+    // Will use this eventually, but not now
+    // const track_artists = params.getT(.array,"track_artists");
+
+    for (album_artists) |artist| {
+        const artist_name = try artist.coerce([]const u8);
+        const artist_id = std.hash.Fnv1a_64.hash(artist_name);
+        const asa_id = @as(i64, @bitCast(@mod(@divFloor((as_id +% artist_id) *% (as_id +% artist_id +% 1), 2) +% artist_id, std.math.maxInt(u64))));
+        const asa_query = try jetzig.database.Query(.Albumsongsartist)
+            .find(asa_id).execute(env.repo);
+
+        if (asa_query == null) {
+            try jetzig.database.Query(.Albumsongsartist)
+                .insert(.{ .id = asa_id, .albumsong_id = @as(i64, @bitCast(as_id)), .artist_id = @as(i64, @bitCast(artist_id)) })
+                .execute(env.repo);
+        }
+    }
+}
--- a/src/app/jobs/process_scrobbles.zig
+++ b/src/app/jobs/process_scrobbles.zig
@ -1,134 +0,0 @@
-const std = @import("std");
-const jetzig = @import("jetzig");
-const jetquery = @import("jetzig").jetquery;
-const Data = @import("../../types.zig");
-const rules = @import("../../apply_rule.zig");
-
-// The `run` function for a job is invoked every time the job is processed by a queue worker
-// (or by the Jetzig server if the job is processed in-line).
-//
-// Arguments:
-// * allocator: Arena allocator for use during the job execution process.
-// * params:    Params assigned to a job (from a request, values added to response data).
-// * env:       Provides the following fields:
-//              - logger:      Logger attached to the same stream as the Jetzig server.
-//              - environment: Enum of `{ production, development }`.
-pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
-    //_ = env;
-    if (params.getT(.array, "scrobbles")) |scrobbles| {
-        for (scrobbles.items()) |item| {
-
-            // Probably want to include artist name here, but not sure how to yet
-
-            const track_artists = item.getT(.array, "artists_track").?.items();
-            const album_artists = item.getT(.array, "artists_album").?.items();
-
-            var track_artist_name_buffer = try allocator.alloc([]const u8, track_artists.len);
-            var album_artist_name_buffer = try allocator.alloc([]const u8, album_artists.len);
-            var track_artist_id_buffer = try allocator.alloc(i64, track_artists.len);
-            var album_artist_id_buffer = try allocator.alloc(i64, album_artists.len);
-
-            const scrobble: Data.Scrobble = .{
-                .track = item.getT(.string, "track").?,
-                .artists_track = track_artist_name_buffer,
-                .album = item.getT(.string, "album") orelse "",
-                .artists_album = album_artist_name_buffer,
-                .date = @as(i64, @truncate(item.getT(.integer, "date").?)),
-            };
-
-            var album_hash_string = std.ArrayList(u8).init(allocator);
-            var track_hash_string = std.ArrayList(u8).init(allocator);
-
-            // I theoretically don't need this for loop
-            for (track_artists, 0..track_artists.len) |artist, i| {
-                const artist_name = try artist.coerce([]const u8);
-                track_artist_name_buffer[i] = artist_name;
-                track_artist_id_buffer[i] = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist_name)));
-            }
-
-            for (album_artists, 0..album_artists.len) |artist, i| {
-                const artist_name = try artist.coerce([]const u8);
-                album_artist_name_buffer[i] = artist_name;
-                album_artist_id_buffer[i] = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist_name)));
-                try album_hash_string.appendSlice(artist_name);
-            }
-
-            try album_hash_string.appendSlice(scrobble.album);
-            try track_hash_string.appendSlice(scrobble.album);
-            const album_hash = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(album_hash_string.items)));
-            try track_hash_string.appendSlice(scrobble.track);
-            const track_hash = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(track_hash_string.items)));
-
-            var albumsong_id = try jetzig.database.Query(.Albumsong)
-                .find(album_hash ^ track_hash)
-                .select(.{.id}).execute(env.repo);
-
-            var album_id = try jetzig.database.Query(.Album)
-                .find(album_hash)
-                .select(.{.id}).execute(env.repo);
-
-            for (track_artist_name_buffer, track_artist_id_buffer) |scrobble_track_artist, track_artist_hash| {
-                var artist_id = try jetzig.database.Query(.Artist)
-                    .find(track_artist_hash)
-                    .select(.{.id}).execute(env.repo);
-
-                if (artist_id == null)
-                    artist_id = try jetzig.database.Query(.Artist)
-                        .insert(.{ .id = track_artist_hash, .name = scrobble_track_artist, .disambiguation = null })
-                        .returning(.{.id}).execute(env.repo);
-
-                if (albumsong_id == null) {
-                    var track_id = try jetzig.database.Query(.Song)
-                        .find(track_hash)
-                        .select(.{.id}).execute(env.repo);
-
-                    if (track_id == null)
-                        track_id = try jetzig.database.Query(.Song)
-                            .insert(.{ .id = track_hash, .name = scrobble.track, .length = null, .hidden = false })
-                            .returning(.{.id}).execute(env.repo);
-
-                    if (album_id == null)
-                        album_id = try jetzig.database.Query(.Album)
-                            .insert(.{ .id = album_hash, .name = scrobble.album, .length = null })
-                            .returning(.{.id}).execute(env.repo);
-
-                    albumsong_id = try jetzig.database.Query(.Albumsong)
-                        .insert(.{ .song_id = track_id.?.id, .album_id = album_id.?.id })
-                        .returning(.{.id}).execute(env.repo);
-
-                    try jetzig.database.Query(.Albumsongsartist)
-                        .insert(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo);
-                } else {
-                    const ins_albumsongartist = try jetzig.database.Query(.Albumsongsartist)
-                        .findBy(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id })
-                        .select(.{.id}).execute(env.repo);
-
-                    if (ins_albumsongartist == null)
-                        try jetzig.database.Query(.Albumsongsartist)
-                            .insert(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo);
-                }
-            }
-
-            for (album_artist_name_buffer, album_artist_id_buffer) |scrobble_album_artist, album_artist_hash| {
-                const artistalbum_id = try jetzig.database.Query(.Artistalbum)
-                    .findBy(.{ .album_id = album_id.?.id, .artist_id = album_artist_hash })
-                    .select(.{.id}).execute(env.repo);
-
-                if (artistalbum_id == null) {
-                    var artist_id = try jetzig.database.Query(.Artist)
-                        .find(album_artist_hash)
-                        .select(.{.id}).execute(env.repo);
-                    if (artist_id == null)
-                        artist_id = try jetzig.database.Query(.Artist)
-                            .insert(.{ .id = album_artist_hash, .name = scrobble_album_artist, .disambiguation = null })
-                            .returning(.{.id}).execute(env.repo);
-                    try jetzig.database.Query(.Artistalbum)
-                        .insert(.{ .album_id = album_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo);
-                }
-            }
-
-            try jetzig.database.Query(.Scrobble)
-                .insert(.{ .albumsong = albumsong_id.?.id, .datetime = scrobble.date }).execute(env.repo);
-        }
-    }
-}
--- a/src/app/views/upload.zig
+++ b/src/app/views/upload.zig
@ -13,6 +13,8 @@ pub fn index(request: *jetzig.Request, data: *jetzig.Data) !jetzig.View {
 }

 pub fn post(request: *jetzig.Request) !jetzig.View {
+    var root = try request.data(.object);
+    var view_params = try root.put("scrobbles", .array);
    const UploadParams = struct {
        source: enum { LFMW, LFMS, Spotify },
        earliest_date: ?[]const u8,
@ -25,21 +27,21 @@ pub fn post(request: *jetzig.Request) !jetzig.View {
    const latest_ts = (try zeit.instant(.{ .source = if (params.latest_date) |ld| .{ .iso8601 = ld } else .now })).timestamp;
    const earliest_ts = (try zeit.instant(.{ .source = if (params.earliest_date) |ed| .{ .iso8601 = ed } else .{ .unix_timestamp = 0 } })).timestamp;

-    var skipped_tracks: u64 = 0;
-    var limited_tracks: u64 = 0;
-
-    const imported_scrobbles: []Data.UnifiedScrobble = switch (params.source) {
-        .LFMS, .Spotify => try Utils.scrobbleIngest(request.allocator, if (try request.file("upload")) |file| file.content else unreachable),
-        .LFMW => blk: {
+    switch (params.source) {
+        .LFMS, .Spotify => {
+            const ctx = try Utils.scrobbleIngest(request, if (try request.file("upload")) |file| file.content else unreachable, .{}, null);
+            for (ctx.rows) |row| try view_params.append(row);
+        },
+        .LFMW => {
            const user_agent: []const u8 = "Zuletzt/0.0.1";
            var client = Client{ .allocator = request.allocator };
            var lastfm_response_buffer = std.ArrayList(u8).init(request.allocator);
-            var scrobble_buffer = std.ArrayList(Data.UnifiedScrobble).init(request.allocator);

            const username = if (params.username) |un| un else "VAOTM";

            var page: usize = 1;
-            //var max_pages: ?usize = null;
+
+            var ctx: ?Utils.IngestContext = null;

            while (true) : (page += 1) {
                if (page > 91) break;
@ -52,128 +54,11 @@ pub fn post(request: *jetzig.Request) !jetzig.View {
                    continue;
                }
                const response_string = try lastfm_response_buffer.toOwnedSlice();
-                const parsed_lastfm_response = try Utils.scrobbleIngest(request.allocator, response_string);
-                //const parsed_lastfm_response = try std.json.parseFromSliceLeaky(Data.LastFMWeb, request.allocator, response_string, .{ .ignore_unknown_fields = true });
-                //if (max_pages == null) max_pages = try std.fmt.parseInt(usize, parsed_lastfm_response.recenttracks.@"@attr".totalPages, 10);
-                try scrobble_buffer.appendSlice(parsed_lastfm_response);
+                ctx = try Utils.scrobbleIngest(request, response_string, .{}, ctx);
            }

-            break :blk try scrobble_buffer.toOwnedSlice();
+            for (ctx.?.rows) |row| try view_params.append(row);
        },
-    };
-
-    var root = try request.data(.object);
-    var view_params = try root.put("scrobbles", .array);
-
-    var job = try request.job("process_scrobbles2");
-    const rule_list = try Utils.loadRules(request.allocator);
-
-    var artists = try job.params.put("artists", .object);
-    var albums = try job.params.put("albums", .object);
-    var tracks = try job.params.put("tracks", .object);
-    var artistalbums = try job.params.put("artistalbums", .object);
-    var albumsongs = try job.params.put("albumsongs", .object);
-    var albumsongsartists = try job.params.put("albumsongsartists", .object);
-
-    var hash_buffer = [_]u8{undefined} ** 20; // A minimum i64 needs 19 digits + 1 negative sign
-
-    appends: for (imported_scrobbles) |scrobble| {
-        if (scrobble.date > latest_ts or scrobble.date < earliest_ts) {
-            limited_tracks += 1;
-            continue :appends;
-        }
-        if (scrobble.playtime != null and scrobble.playtime.? < 30_000 and (scrobble.reason_end == null or !std.mem.eql(u8, scrobble.reason_end.?, "trackdone"))) {
-            skipped_tracks += 1;
-            continue :appends;
-        }
-        if (scrobble.track_artist == null or scrobble.album_artist == null or scrobble.track == null) {
-            skipped_tracks += 1;
-            continue :appends;
-        }
-
-        const filtered_scrobble = Data.Scrobble{
-            .album = scrobble.album.?,
-            .artists_album = &.{scrobble.album_artist.?},
-            .artists_track = &.{scrobble.track_artist.?},
-            .date = scrobble.date,
-            .track = scrobble.track.?,
-        };
-
-        const complete_scrobble = if (rule_list) |rl| try rules.applyScrobbleRule(request.allocator, filtered_scrobble, rl) else filtered_scrobble;
-
-        const row = try Utils.scrobbleToRow(request.allocator, complete_scrobble);
-        try view_params.append(row);
-
-        var stored_artist_hashes = std.ArrayList(u64).init(request.allocator);
-
-        var album_hash_string = std.ArrayList(u8).init(request.allocator);
-        for (complete_scrobble.artists_album) |artist| {
-            try album_hash_string.appendSlice(artist);
-            const artist_hash = std.hash.Fnv1a_64.hash(artist);
-            try stored_artist_hashes.append(artist_hash);
-            const signed_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artist_hash))});
-            if (artists.get(signed_hash_string) == null) try artists.put(signed_hash_string, artist);
-        }
-
-        try album_hash_string.appendSlice(complete_scrobble.album);
-        const album_hash = std.hash.Fnv1a_64.hash(album_hash_string.items);
-        const signed_album_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(album_hash))});
-        if (albums.get(signed_album_hash_string) == null) try albums.put(signed_album_hash_string, complete_scrobble.album);
-
-        for (stored_artist_hashes.items) |artist_hash| {
-            const artistalbum_hash = pair(artist_hash, album_hash);
-            const signed_artistalbums_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artistalbum_hash))});
-            if (tracks.get(signed_artistalbums_hash_string) == null) {
-                var artistalbum = try artistalbums.put(signed_artistalbums_hash_string, .object);
-                try artistalbum.put("artist", @as(i64, @bitCast(artist_hash)));
-                try artistalbum.put("album", @as(i64, @bitCast(album_hash)));
-            }
-        }
-
-        // The track hash does not currently include the track artists. Probably not necessary for it to,
-        // but easily can if need be
-        var track_hash_string = std.ArrayList(u8).init(request.allocator);
-        try track_hash_string.appendSlice(complete_scrobble.album);
-        try track_hash_string.appendSlice(complete_scrobble.track);
-        const track_hash = std.hash.Fnv1a_64.hash(track_hash_string.items);
-        const signed_track_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(track_hash))});
-        if (tracks.get(signed_track_hash_string) == null) try tracks.put(signed_track_hash_string, complete_scrobble.track);
-
-        const albumsong_hash = pair(album_hash, track_hash);
-        const signed_albumsong_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(albumsong_hash))});
-        if (albumsongs.get(signed_albumsong_hash_string)) |albumsong| {
-            var albumsong_scrobbles = albumsong.get("scrobbles");
-            try albumsong_scrobbles.?.append(@divFloor(complete_scrobble.date, std.time.ns_per_us)); // MICROSECONDS
-        } else {
-            var albumsong = try albumsongs.put(signed_albumsong_hash_string, .object);
-            try albumsong.put("album", @as(i64, @bitCast(album_hash)));
-            try albumsong.put("song", @as(i64, @bitCast(track_hash)));
-            var albumsong_scrobbles = try albumsong.put("scrobbles", .array);
-            try albumsong_scrobbles.append(@divFloor(complete_scrobble.date, std.time.ns_per_us)); // MICROSECONDS
-        }
-
-        for (complete_scrobble.artists_track) |artist| {
-            const artist_hash = std.hash.Fnv1a_64.hash(artist);
-            const signed_artist_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artist_hash))});
-            if (artists.get(signed_artist_hash_string) == null) try artists.put(signed_artist_hash_string, artist);
-            const albumsongsartist_hash = pair(albumsong_hash, artist_hash);
-            const signed_albumsongsartist_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(albumsongsartist_hash))});
-            if (albumsongsartists.get(signed_albumsongsartist_hash_string) == null) {
-                var albumsongartist = try albumsongsartists.put(signed_albumsongsartist_hash_string, .object);
-                try albumsongartist.put("albumsong", @as(i64, @bitCast(albumsong_hash)));
-                try albumsongartist.put("artist", @as(i64, @bitCast(artist_hash)));
-            }
-        }
    }
-
-    std.log.debug("Skipped {} tracks\nFiltered {} tracks by date", .{ skipped_tracks, limited_tracks });
-    try job.schedule();
-
    return request.render(.created);
 }
-
-// Cantor Pairing Function
-// https://en.wikipedia.org/wiki/Pairing_function
-fn pair(a: u64, b: u64) u64 {
-    return @mod(@divFloor((a +% b) *% (a +% b +% 1), 2) +% b, std.math.maxInt(u64));
-}
--- a/src/apply_rule.zig
+++ b/src/apply_rule.zig
@ -27,14 +27,25 @@ pub fn applyScrobbleRule(allocator: std.mem.Allocator, scrobble: Data.Scrobble,
            switch (rule.cond_req) {
                .any => switch (cond.match_on) {
                    inline .album, .track => |on| match_found = match_found or match_fn(@field(scrobble, @tagName(on)), cond.match_txt),
-                    inline .artists_album, .artists_track => |on| {
-                        for (@field(scrobble, @tagName(on))) |artist| match_found = match_found or match_fn(artist, cond.match_txt);
+                    .album_artists => {
+                        for (scrobble.album_artists) |artist| match_found = match_found or match_fn(artist, cond.match_txt);
+                    },
+                    .track_artists => {
+                        if (scrobble.track_artists) |ta| {
+                            for (ta) |a| match_found = match_found or match_fn(a, cond.match_txt);
+                        } else match_found = false;
                    },
                },
                .all => switch (cond.match_on) {
                    inline .album, .track => |on| match_found = match_found and match_fn(@field(scrobble, @tagName(on)), cond.match_txt),
-                    inline .artists_album, .artists_track => |on| {
-                        for (@field(scrobble, @tagName(on))) |artist| match_found = match_found and match_fn(artist, cond.match_txt);
+                    .album_artists => {
+                        for (scrobble.album_artists) |artist| match_found = match_found and match_fn(artist, cond.match_txt);
+                    },
+
+                    .track_artists => {
+                        if (scrobble.track_artists) |ta| {
+                            for (ta) |a| match_found = match_found and match_fn(a, cond.match_txt);
+                        } else match_found = false;
                    },
                },
            }
@ -46,17 +57,21 @@ pub fn applyScrobbleRule(allocator: std.mem.Allocator, scrobble: Data.Scrobble,
                        var al = std.ArrayList([]const u8).init(allocator);
                        switch (act.action_on) {
                            .album, .track => unreachable,
-                            inline else => |on| {
-                                try al.appendSlice(@field(output_scrobble, @tagName(on)));
+                            .album_artists => {
+                                try al.appendSlice(scrobble.album_artists);
                                try al.append(act.action_txt);
-                                const list = try al.toOwnedSlice();
-                                @field(output_scrobble, @tagName(on)) = list;
+                                output_scrobble.album_artists = try al.toOwnedSlice();
+                            },
+                            .track_artists => {
+                                if (scrobble.track_artists) |ta| try al.appendSlice(ta);
+                                try al.append(act.action_txt);
+                                output_scrobble.track_artists = try al.toOwnedSlice();
                            },
                        }
                    },
                    .replace => switch (act.action_on) {
                        inline .album, .track => |on| @field(output_scrobble, @tagName(on)) = act.action_txt,
-                        inline .artists_album, .artists_track => |on| {
+                        inline .album_artists, .track_artists => |on| {
                            const artist = try allocator.alloc([]const u8, 1);
                            artist[0] = act.action_txt;
                            @field(output_scrobble, @tagName(on)) = artist;
--- a/src/date_fmt.zig
+++ b/src/date_fmt.zig
@ -1,10 +1,12 @@
 const std = @import("std");
 const zeit = @import("zeit");
 const Data = @import("types.zig");
+const jetzig = @import("jetzig");
+const applyRule = @import("apply_rule.zig").applyScrobbleRule;

 pub fn dateFmt(allocator: std.mem.Allocator, epoch: i64) ![]const u8 {
    var date = std.ArrayList(u8).init(allocator);
-    try (try zeit.instant(.{ .source = .{ .unix_timestamp = @divFloor(epoch, std.time.ns_per_s) } })).time().strftime(date.writer(), "%d %b %Y, %H:%M");
+    try (try zeit.instant(.{ .source = .{ .unix_timestamp = @divFloor(epoch, std.time.ns_per_us) } })).time().strftime(date.writer(), "%d %b %Y, %H:%M");
    return date.items;
 }

@ -23,7 +25,7 @@ pub fn dateCompare(self: *[]const u8, earliest: []const u8, latest: []const u8)

 pub fn scrobbleToRow(allocator: std.mem.Allocator, scrobble: Data.Scrobble) !Data.TableRow {
    var artistlist = std.ArrayList(Data.HyperlinkData).init(allocator);
-    for (scrobble.artists_track) |a| {
+    for (scrobble.track_artists orelse scrobble.album_artists) |a| {
        try artistlist.append(Data.HyperlinkData{ .name = a, .id = 0 });
    }
    return Data.TableRow{
@ -79,17 +81,46 @@ pub fn loadRules(allocator: std.mem.Allocator) !?[]Data.Rule {
    return std.json.parseFromSliceLeaky([]Data.Rule, allocator, rule_file_content, .{}) catch null;
 }

-pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.UnifiedScrobble {
+/// Configuration for scrobble ingestion
+const IngestConfig = struct {
+    /// The earliest date (a unix timestamp in nanoseconds) to accept a scrobble
+    earliest: ?i128 = null,
+    /// The latest date (a unix timestamp in nanoseconds) to accept a scrobble
+    latest: ?i128 = null,
+    /// The minimum number of milliseconds needed to accept a scrobble
+    /// Only affects Spotify scrobbles
+    minimum_playtime: i128 = 30_000,
+    /// The amount of metadata required to accept a scrobble. A track name is always required
+    /// - need_artist: Only an artist name is required to accept a scrobble
+    /// - need_album: Only an album name is required to accept a scrobble
+    /// - need_both: Both an artist name and an album name are required to accept a scrobble
+    /// - need_neither: No extra metadata is required to accept a scrobble
+    null_tolerance: enum { need_artist, need_album, need_both, need_neither } = .need_both,
+};
+
+pub const IngestContext = struct {
+    rows: []Data.TableRow,
+    map: ?std.StringHashMap(std.StringHashMap(std.StringHashMap(?std.BufSet))),
+};
+
+///
+pub fn scrobbleIngest(request: *jetzig.Request, input: []const u8, config: IngestConfig, context: ?IngestContext) !IngestContext {
+    const allocator = request.allocator;
+    var out = std.ArrayList(Data.TableRow).init(allocator);
+    var artists = if (context) |ctx| blk: {
+        try out.appendSlice(ctx.rows);
+        break :blk ctx.map.?;
+    } else std.StringHashMap(std.StringHashMap(std.StringHashMap(?std.BufSet))).init(allocator);
    var scanner = std.json.Scanner.initCompleteInput(allocator, input);
    defer scanner.deinit();

-    var out = std.ArrayList(Data.UnifiedScrobble).init(allocator);
+    const rule_list = try loadRules(allocator);

    array: switch (try scanner.peekNextTokenType()) {
        .array_begin => {
            // Go into array
            _ = try scanner.next();
-            while (try scanner.peekNextTokenType() != .array_end) {
+            scrobble_array: while (try scanner.peekNextTokenType() != .array_end) {
                var r: Data.UnifiedScrobble = undefined;
                // Go into object
                _ = try scanner.next();
@ -97,18 +128,22 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                    const key_token = try scanner.nextAlloc(allocator, .alloc_if_needed);
                    const field_name = std.meta.stringToEnum(ScrobbleFields, switch (key_token) {
                        inline .string, .allocated_string => |slice| slice,
-                        else => return error.UnexpectedToken,
+                        else => {
+                            std.log.debug("{any}", .{key_token});
+                            return error.UnexpectedToken;
+                        },
                    }) orelse .irrelevant;
                    switch (field_name) {
-                        .@"@attr" => {
-                            freeAllocated(allocator, key_token);
+                        .@"@attr" => |d| {
                            r = undefined;
-                            try scanner.skipUntilStackHeight(3);
+                            try skipScrobble(allocator, &scanner, key_token, d);
                        },
                        .ts, .date => |d| {
                            freeAllocated(allocator, key_token);
-                            const date = switch (d) {
+                            const date: i64 = switch (d) {
                                .date => blk: {
+                                    // We can filter by date via the API, so we will always have results in the
+                                    // specified timeframe through LFMW
                                    if (try scanner.peekNextTokenType() == .object_begin) {
                                        // For now, try to just skip over the object_begin and assume the next field is uts
                                        _ = try scanner.next();
@ -121,7 +156,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                                        const lfw_date = try std.fmt.parseInt(i64, switch (lfw_date_token) {
                                            inline .number, .allocated_number, .string, .allocated_string => |slice| slice,
                                            else => return error.UnexpectedToken,
-                                        }, 10) * std.time.ns_per_s;
+                                        }, 10) * std.time.us_per_s;
                                        freeAllocated(allocator, lfw_date_token);
                                        break :blk lfw_date;
                                    } else {
@ -129,7 +164,11 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                                        const lfs_date = try std.fmt.parseInt(i64, switch (lfs_date_token) {
                                            inline .number, .allocated_number, .string, .allocated_string => |slice| slice,
                                            else => return error.UnexpectedToken,
-                                        }, 10) * std.time.ns_per_ms;
+                                        }, 10) * std.time.us_per_ms;
+                                        if ((config.earliest != null and lfs_date < config.earliest.?) or (config.latest != null and lfs_date > config.latest.?)) {
+                                            r = undefined;
+                                            try skipScrobble(allocator, &scanner, lfs_date_token, d);
+                                        }
                                        freeAllocated(allocator, lfs_date_token);
                                        break :blk lfs_date;
                                    }
@ -137,12 +176,16 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                                .ts => blk: {
                                    // This might need to be an alloc_always, but I'm gonna try if_needed first
                                    const spotify_date_token = try scanner.nextAlloc(allocator, .alloc_if_needed);
-                                    const spotify_date = try zeit.instant(.{ .source = .{ .iso8601 = switch (spotify_date_token) {
+                                    const spotify_date = (try zeit.instant(.{ .source = .{ .iso8601 = switch (spotify_date_token) {
                                        inline .string, .allocated_string => |slice| slice,
                                        else => return error.UnexpectedToken,
-                                    } } });
+                                    } } })).timestamp;
+                                    if ((config.earliest != null and spotify_date < config.earliest.?) or (config.latest != null and spotify_date > config.latest.?)) {
+                                        r = undefined;
+                                        try skipScrobble(allocator, &scanner, spotify_date_token, d);
+                                    }
                                    freeAllocated(allocator, spotify_date_token);
-                                    break :blk spotify_date.unixTimestamp() * std.time.ns_per_s;
+                                    break :blk @as(i64, @truncate(@divFloor(spotify_date, std.time.us_per_ms)));
                                },
                                else => unreachable,
                            };
@ -157,7 +200,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                            }, 10);
                            freeAllocated(allocator, spotify_ms_played);
                        },
-                        .master_metadata_track_name, .track, .name => {
+                        .master_metadata_track_name, .track, .name => |d| {
                            freeAllocated(allocator, key_token);
                            const track = try scanner.nextAlloc(allocator, .alloc_always);
                            @field(r, "track") = switch (track) {
@ -165,6 +208,11 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                                .null => null,
                                else => return error.UnexpectedToken,
                            };
+                            if (r.track == null) {
+                                r = undefined;
+                                try skipScrobble(allocator, &scanner, track, d);
+                                _ = try scanner.next();
+                            }
                        },
                        .master_metadata_album_artist_name, .artist => {
                            freeAllocated(allocator, key_token);
@ -189,7 +237,6 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                                    else => return error.UnexpectedToken,
                                };
                            };
-                            @field(r, "track_artist") = artist;
                            @field(r, "album_artist") = artist;
                        },
                        .master_metadata_album_album_name, .album => {
@ -234,7 +281,118 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
                }
                // Exit object
                _ = try scanner.next();
-                try out.append(r);
+
+                // Final checks
+                if (r.playtime != null and r.playtime.? < config.minimum_playtime and (r.reason_end == null or !std.mem.eql(u8, r.reason_end.?, "trackdone"))) continue :scrobble_array;
+                switch (config.null_tolerance) {
+                    .need_neither => {},
+                    .need_both => if (r.album == null and r.album_artist == null) continue :scrobble_array,
+                    .need_album => if (r.album == null) continue :scrobble_array,
+                    .need_artist => if (r.album_artist == null) continue :scrobble_array,
+                }
+                var scr = Data.Scrobble{
+                    .track = r.track.?,
+                    .album = r.album orelse "Unknown Album",
+                    .track_artists = null,
+                    .album_artists = &.{r.album_artist orelse "Unknown Artist"},
+                    .date = r.date,
+                };
+
+                if (rule_list) |rules| scr = try applyRule(allocator, scr, rules);
+
+                // Try not to have an aneurysm (impossible challenge 2025)
+                const artist: []const u8, const single_artist_flag: bool = if (scr.album_artists.len == 1) .{ scr.album_artists[0], true } else blk: {
+                    var combined = try std.ArrayListUnmanaged(u8).initCapacity(allocator, 0);
+                    for (scr.album_artists) |aa| try combined.appendSlice(allocator, aa);
+                    break :blk .{ try combined.toOwnedSlice(allocator), false };
+                };
+
+                const premade_hashes = try scr.asHash(allocator);
+
+                // I'm doing all the hashing in the jobs, meaning we hash more than we need to
+                // If I get bored maybe I'll work on storing them instead
+                const hm_artist_info = try artists.getOrPut(artist);
+                if (!hm_artist_info.found_existing) {
+                    hm_artist_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator);
+                    if (single_artist_flag) {
+                        var add_artist = try request.job("add_artist");
+                        try add_artist.params.put("artist", artist);
+                        try add_artist.schedule();
+                    } else {
+                        for (scr.album_artists) |a| {
+                            const hm_ind_artist_info = try artists.getOrPut(a);
+                            if (!hm_ind_artist_info.found_existing) {
+                                hm_ind_artist_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator);
+                                var add_artist = try request.job("add_artist");
+                                try add_artist.params.put("artist", a);
+                                try add_artist.schedule();
+                            }
+                        }
+                    }
+                }
+                const hm_album_info = try hm_artist_info.value_ptr.*.getOrPut(scr.album);
+                if (!hm_album_info.found_existing) {
+                    const album_query = try jetzig.database.Query(.Album)
+                        .find(@as(i64, @bitCast(premade_hashes[0]))).execute(request.repo);
+
+                    if (album_query == null) {
+                        try jetzig.database.Query(.Album)
+                            .insert(.{ .id = @as(i64, @bitCast(premade_hashes[0])), .name = scr.album, .length = null })
+                            .execute(request.repo);
+                    }
+                    hm_album_info.value_ptr.* = std.StringHashMap(?std.BufSet).init(allocator);
+                    var add_album = try request.job("add_album");
+                    try add_album.params.put("album_hash", premade_hashes[0]);
+                    try add_album.params.put("artists", scr.album_artists);
+                    //if (!single_artist_flag) add_album.put("combined", artist);
+                    try add_album.schedule();
+                }
+
+                const hm_song_info = try hm_album_info.value_ptr.*.getOrPut(scr.track);
+                if (!hm_song_info.found_existing) {
+                    const track_query = try jetzig.database.Query(.Song)
+                        .find(@as(i64, @bitCast(premade_hashes[1]))).execute(request.repo);
+
+                    if (track_query == null) {
+                        try jetzig.database.Query(.Song)
+                            .insert(.{ .id = @as(i64, @bitCast(premade_hashes[1])), .name = scr.track, .length = null, .hidden = false })
+                            .execute(request.repo);
+                    }
+                    const as_query = try jetzig.database.Query(.Albumsong)
+                        .find(@as(i64, @bitCast(premade_hashes[2]))).execute(request.repo);
+
+                    if (as_query == null) {
+                        try jetzig.database.Query(.Albumsong)
+                            .insert(.{ .id = @as(i64, @bitCast(premade_hashes[2])), .song_id = @as(i64, @bitCast(premade_hashes[1])), .album_id = @as(i64, @bitCast(premade_hashes[0])) })
+                            .execute(request.repo);
+                    }
+                    hm_song_info.value_ptr.* = null;
+                    var add_song = try request.job("add_song");
+                    try add_song.params.put("as_hash", premade_hashes[2]);
+                    //add_song.put("album", scr.album);
+                    if (scr.track_artists) |track_artists| {
+                        try add_song.params.put("track_artists", track_artists);
+                        if (hm_song_info.value_ptr.* == null) hm_song_info.value_ptr.* = std.BufSet.init(allocator);
+                        for (track_artists) |ta| {
+                            try hm_song_info.value_ptr.*.?.insert(ta);
+                            const hm_ta_info = try artists.getOrPut(ta);
+                            if (!hm_ta_info.found_existing) {
+                                hm_ta_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator);
+                                var add_artist = try request.job("add_artist");
+                                try add_artist.params.put("artist", ta);
+                                try add_artist.schedule();
+                            }
+                        }
+                    }
+                    try add_song.params.put("album_artists", scr.album_artists);
+                    //if (!single_artist_flag) add_song.put("combined", artist);
+                    try add_song.schedule();
+                }
+                try jetzig.database.Query(.Scrobble).insert(.{ .albumsong = @as(i64, @bitCast(premade_hashes[2])), .datetime = scr.date })
+                    .execute(request.repo);
+
+                const b = try scrobbleToRow(allocator, scr);
+                try out.append(b);
            }
        },
        // LastFM(stats)
@ -282,7 +440,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
        else => return error.UnexpectedToken,
    }
    const scrobbles = try out.toOwnedSlice();
-    return scrobbles;
+    return IngestContext{ .map = artists, .rows = scrobbles };
 }

 fn freeAllocated(allocator: std.mem.Allocator, token: std.json.Token) void {
@ -293,3 +451,24 @@ fn freeAllocated(allocator: std.mem.Allocator, token: std.json.Token) void {
        else => {},
    }
 }
+
+// Cantor Pairing Function
+// https://en.wikipedia.org/wiki/Pairing_function
+
+fn skipScrobble(allocator: std.mem.Allocator, scanner: *std.json.Scanner, token: std.json.Token, field: ScrobbleFields) !void {
+    freeAllocated(allocator, token);
+    try scanner.skipUntilStackHeight(switch (field) {
+        // Spotify specific fields
+        .ts, .master_metadata_album_album_name, .master_metadata_album_artist_name, .master_metadata_track_name, .ms_played, .reason_end => 1,
+        // LastFM Stats specific field
+        .track => 2,
+        // LastFM Web specific fields
+        .name, .@"@attr" => 3,
+        // Fields shared by LastFM Stats and LastFM Web: album, artist, date (although date is never invalid for LastFM Web)
+        else => switch (scanner.stackHeight()) {
+            5 => 3, // Five levels deep => LastFM Web (all of those fields are fortunately objects / same stack height)
+            3 => 2, // Three levels deep => LastFM stats
+            else => unreachable,
+        },
+    });
+}
--- a/src/types.zig
+++ b/src/types.zig
@ -3,7 +3,7 @@ const std = @import("std");
 pub const UnifiedScrobble = struct {
    track: ?[]const u8,
    // These can be null per Spotify
-    track_artist: ?[]const u8,
+    //track_artist: ?[]const u8, // As far as I'm aware, there are no services that provide separate track/album artist lists
    album: ?[]const u8,
    album_artist: ?[]const u8,
    date: i64,
@ -12,114 +12,53 @@ pub const UnifiedScrobble = struct {
    reason_end: ?[]const u8 = null,
 };

-pub const ImportedScrobbles = union(ScrobbleSources) {
-    LastFMStats: []IgnorantScrobble,
-    LastFMWeb: []LastFMWebScrobble,
-    Spotify: []SpotifyScrobble,
-};
-
 const ScrobbleSources = enum {
    LastFMStats,
    LastFMWeb,
    Spotify,
 };

-pub const IgnorantScrobble = struct {
-    track: []const u8,
-    artist: []const u8,
-    album: []const u8 = "Not Provided",
-    //albumId: []const u8,
-    date: i64,
-};
+fn hashAndSign(a: []const u8) !i64 {
+    return @as(i64, @bitCast(std.hash.Fnv1a_64.hash(a)));
+}
+
+fn pair(a: u64, b: u64) u64 {
+    return @mod(@divFloor((a +% b) *% (a +% b +% 1), 2) +% b, std.math.maxInt(u64));
+}

 pub const Scrobble = struct {
    track: []const u8,
-    artists_track: []const []const u8,
-    album: []const u8 = "",
-    artists_album: []const []const u8,
+    track_artists: ?[]const []const u8,
+    album: []const u8 = "Unknown Album",
+    album_artists: []const []const u8,
    date: i64,
-};

-// From lastfmstats.com
-pub const LastFMStats = struct { username: []const u8, scrobbles: []IgnorantScrobble };
+    pub fn asHash(self: *Scrobble, allocator: std.mem.Allocator) ![3]u64 {
+        var string_buf = try std.ArrayListUnmanaged(u8).initCapacity(allocator, 0);

-// I derived whether or not these values were optional from searching
-// the respective fields for null in Vim, so there may be some fields
-// that can be optional that I haven't run into yet
-pub const SpotifyScrobble = struct {
-    ts: []const u8,
-    //username: []const u8,
-    //platform: []const u8,
-    ms_played: u64,
-    //conn_country: []const u8,
-    //ip_addr_decrypted: ?[]const u8,
-    //user_agent_decrypted: ?[]const u8,
-    master_metadata_track_name: ?[]const u8,
-    master_metadata_album_artist_name: ?[]const u8,
-    master_metadata_album_album_name: ?[]const u8,
-    //spotify_track_uri: ?[]const u8,
-    //episode_name: ?[]const u8,
-    //episode_show_name: ?[]const u8,
-    //spotify_episode_uri: ?[]const u8,
-    reason_start: []const u8,
-    reason_end: ?[]const u8,
-    //shuffle: bool,
-    skipped: ?bool,
-    //offline: bool,
-    offline_timestamp: u64,
-    //incognito_mode: ?bool,
-};
+        for (self.album_artists) |artist| try string_buf.appendSlice(allocator, artist);

-pub const LastFMWeb = struct {
-    recenttracks: struct {
-        track: []LastFMWebScrobble,
-        @"@attr": LastFMWebQueryInfo,
-    },
-};
+        try string_buf.appendSlice(allocator, self.album);
+        const a = std.hash.Fnv1a_64.hash(string_buf.items);

-pub const LastFMWebHyperlinkData = struct {
-    mbid: []const u8,
-    @"#text": []const u8,
-};
+        try string_buf.appendSlice(allocator, self.track);
+        const s = std.hash.Fnv1a_64.hash(try string_buf.toOwnedSlice(allocator));

-pub const LastFMWebScrobble = struct {
-    artist: LastFMWebHyperlinkData,
-    album: ?LastFMWebHyperlinkData = null,
-    name: []const u8,
-    mbid: ?[]const u8 = null,
-    image: []struct {
-        size: []const u8,
-        @"#text": []const u8,
-    },
-    date: ?struct {
-        uts: []const u8,
-        @"#text": []const u8,
-    } = null,
-    @"@attr": ?struct {
-        nowplaying: []const u8,
-    } = null,
-    url: []const u8,
-};
-
-pub const LastFMWebQueryInfo = struct {
-    perPage: []const u8,
-    totalPages: []const u8,
-    page: []const u8,
-    user: []const u8,
-    total: []const u8,
+        return .{ a, s, pair(a, s) };
+    }
 };

 pub const Rule = struct {
    name: []const u8,
    cond_req: enum { any, all },
    conditionals: []struct {
-        match_on: enum { artists_album, artists_track, album, track },
+        match_on: enum { album_artists, track_artists, album, track },
        match_cond: enum { is, contains },
        match_txt: []const u8,
    },
    actions: []struct {
        action: enum { replace, add },
-        action_on: enum { artists_album, album, artists_track, track },
+        action_on: enum { album_artists, album, track_artists, track },
        action_txt: []const u8,
    },
 };