diff --git a/src/app/jobs/add_album.zig b/src/app/jobs/add_album.zig new file mode 100644 index 0000000..a5d46a5 --- /dev/null +++ b/src/app/jobs/add_album.zig @@ -0,0 +1,31 @@ +const std = @import("std"); +const jetzig = @import("jetzig"); + +// The `run` function for a job is invoked every time the job is processed by a queue worker +// (or by the Jetzig server if the job is processed in-line). +// +// Arguments: +// * allocator: Arena allocator for use during the job execution process. +// * params: Params assigned to a job (from a request, values added to response data). +// * env: Provides the following fields: +// - logger: Logger attached to the same stream as the Jetzig server. +// - environment: Enum of `{ production, development }`. +pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void { + _ = allocator; + const artists = params.getT(.array, "artists").?.items(); + const album_id = try (params.get("album_hash").?).coerce(u64); + + for (artists) |artist| { + const artist_name = try artist.coerce([]const u8); + const artist_id = std.hash.Fnv1a_64.hash(artist_name); + const paired = @as(i64, @bitCast(@mod(@divFloor((artist_id +% album_id) *% (artist_id +% album_id +% 1), 2) +% album_id, std.math.maxInt(u64)))); + const aa_query = try jetzig.database.Query(.Artistalbum) + .find(paired).execute(env.repo); + + if (aa_query == null) { + try jetzig.database.Query(.Artistalbum) + .insert(.{ .id = paired, .artist_id = @as(i64, @bitCast(artist_id)), .album_id = @as(i64, @bitCast(album_id)) }) + .execute(env.repo); + } + } +} diff --git a/src/app/jobs/add_artist.zig b/src/app/jobs/add_artist.zig new file mode 100644 index 0000000..06b5b25 --- /dev/null +++ b/src/app/jobs/add_artist.zig @@ -0,0 +1,25 @@ +const std = @import("std"); +const jetzig = @import("jetzig"); + +// The `run` function for a job is invoked every time the job is processed by a queue worker +// (or by the Jetzig server if the job is processed in-line). +// +// Arguments: +// * allocator: Arena allocator for use during the job execution process. +// * params: Params assigned to a job (from a request, values added to response data). +// * env: Provides the following fields: +// - logger: Logger attached to the same stream as the Jetzig server. +// - environment: Enum of `{ production, development }`. +pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void { + _ = allocator; + const artist = params.getT(.string, "artist").?; + const id = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist))); + const artist_query = try jetzig.database.Query(.Artist) + .find(id).execute(env.repo); + + if (artist_query == null) { + try jetzig.database.Query(.Artist) + .insert(.{ .id = id, .name = artist }) + .execute(env.repo); + } +} diff --git a/src/app/jobs/add_song.zig b/src/app/jobs/add_song.zig new file mode 100644 index 0000000..6f7c504 --- /dev/null +++ b/src/app/jobs/add_song.zig @@ -0,0 +1,34 @@ +const std = @import("std"); +const jetzig = @import("jetzig"); + +// The `run` function for a job is invoked every time the job is processed by a queue worker +// (or by the Jetzig server if the job is processed in-line). +// +// Arguments: +// * allocator: Arena allocator for use during the job execution process. +// * params: Params assigned to a job (from a request, values added to response data). +// * env: Provides the following fields: +// - logger: Logger attached to the same stream as the Jetzig server. +// - environment: Enum of `{ production, development }`. +pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void { + _ = allocator; + //const album = params.getT(.string, "album").?; + const as_id = try (params.get("as_hash").?).coerce(u64); + const album_artists = params.getT(.array, "album_artists").?.items(); + // Will use this eventually, but not now + // const track_artists = params.getT(.array,"track_artists"); + + for (album_artists) |artist| { + const artist_name = try artist.coerce([]const u8); + const artist_id = std.hash.Fnv1a_64.hash(artist_name); + const asa_id = @as(i64, @bitCast(@mod(@divFloor((as_id +% artist_id) *% (as_id +% artist_id +% 1), 2) +% artist_id, std.math.maxInt(u64)))); + const asa_query = try jetzig.database.Query(.Albumsongsartist) + .find(asa_id).execute(env.repo); + + if (asa_query == null) { + try jetzig.database.Query(.Albumsongsartist) + .insert(.{ .id = asa_id, .albumsong_id = @as(i64, @bitCast(as_id)), .artist_id = @as(i64, @bitCast(artist_id)) }) + .execute(env.repo); + } + } +} diff --git a/src/app/jobs/process_scrobbles.zig b/src/app/jobs/process_scrobbles.zig deleted file mode 100644 index 770bde7..0000000 --- a/src/app/jobs/process_scrobbles.zig +++ /dev/null @@ -1,134 +0,0 @@ -const std = @import("std"); -const jetzig = @import("jetzig"); -const jetquery = @import("jetzig").jetquery; -const Data = @import("../../types.zig"); -const rules = @import("../../apply_rule.zig"); - -// The `run` function for a job is invoked every time the job is processed by a queue worker -// (or by the Jetzig server if the job is processed in-line). -// -// Arguments: -// * allocator: Arena allocator for use during the job execution process. -// * params: Params assigned to a job (from a request, values added to response data). -// * env: Provides the following fields: -// - logger: Logger attached to the same stream as the Jetzig server. -// - environment: Enum of `{ production, development }`. -pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void { - //_ = env; - if (params.getT(.array, "scrobbles")) |scrobbles| { - for (scrobbles.items()) |item| { - - // Probably want to include artist name here, but not sure how to yet - - const track_artists = item.getT(.array, "artists_track").?.items(); - const album_artists = item.getT(.array, "artists_album").?.items(); - - var track_artist_name_buffer = try allocator.alloc([]const u8, track_artists.len); - var album_artist_name_buffer = try allocator.alloc([]const u8, album_artists.len); - var track_artist_id_buffer = try allocator.alloc(i64, track_artists.len); - var album_artist_id_buffer = try allocator.alloc(i64, album_artists.len); - - const scrobble: Data.Scrobble = .{ - .track = item.getT(.string, "track").?, - .artists_track = track_artist_name_buffer, - .album = item.getT(.string, "album") orelse "", - .artists_album = album_artist_name_buffer, - .date = @as(i64, @truncate(item.getT(.integer, "date").?)), - }; - - var album_hash_string = std.ArrayList(u8).init(allocator); - var track_hash_string = std.ArrayList(u8).init(allocator); - - // I theoretically don't need this for loop - for (track_artists, 0..track_artists.len) |artist, i| { - const artist_name = try artist.coerce([]const u8); - track_artist_name_buffer[i] = artist_name; - track_artist_id_buffer[i] = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist_name))); - } - - for (album_artists, 0..album_artists.len) |artist, i| { - const artist_name = try artist.coerce([]const u8); - album_artist_name_buffer[i] = artist_name; - album_artist_id_buffer[i] = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist_name))); - try album_hash_string.appendSlice(artist_name); - } - - try album_hash_string.appendSlice(scrobble.album); - try track_hash_string.appendSlice(scrobble.album); - const album_hash = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(album_hash_string.items))); - try track_hash_string.appendSlice(scrobble.track); - const track_hash = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(track_hash_string.items))); - - var albumsong_id = try jetzig.database.Query(.Albumsong) - .find(album_hash ^ track_hash) - .select(.{.id}).execute(env.repo); - - var album_id = try jetzig.database.Query(.Album) - .find(album_hash) - .select(.{.id}).execute(env.repo); - - for (track_artist_name_buffer, track_artist_id_buffer) |scrobble_track_artist, track_artist_hash| { - var artist_id = try jetzig.database.Query(.Artist) - .find(track_artist_hash) - .select(.{.id}).execute(env.repo); - - if (artist_id == null) - artist_id = try jetzig.database.Query(.Artist) - .insert(.{ .id = track_artist_hash, .name = scrobble_track_artist, .disambiguation = null }) - .returning(.{.id}).execute(env.repo); - - if (albumsong_id == null) { - var track_id = try jetzig.database.Query(.Song) - .find(track_hash) - .select(.{.id}).execute(env.repo); - - if (track_id == null) - track_id = try jetzig.database.Query(.Song) - .insert(.{ .id = track_hash, .name = scrobble.track, .length = null, .hidden = false }) - .returning(.{.id}).execute(env.repo); - - if (album_id == null) - album_id = try jetzig.database.Query(.Album) - .insert(.{ .id = album_hash, .name = scrobble.album, .length = null }) - .returning(.{.id}).execute(env.repo); - - albumsong_id = try jetzig.database.Query(.Albumsong) - .insert(.{ .song_id = track_id.?.id, .album_id = album_id.?.id }) - .returning(.{.id}).execute(env.repo); - - try jetzig.database.Query(.Albumsongsartist) - .insert(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo); - } else { - const ins_albumsongartist = try jetzig.database.Query(.Albumsongsartist) - .findBy(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }) - .select(.{.id}).execute(env.repo); - - if (ins_albumsongartist == null) - try jetzig.database.Query(.Albumsongsartist) - .insert(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo); - } - } - - for (album_artist_name_buffer, album_artist_id_buffer) |scrobble_album_artist, album_artist_hash| { - const artistalbum_id = try jetzig.database.Query(.Artistalbum) - .findBy(.{ .album_id = album_id.?.id, .artist_id = album_artist_hash }) - .select(.{.id}).execute(env.repo); - - if (artistalbum_id == null) { - var artist_id = try jetzig.database.Query(.Artist) - .find(album_artist_hash) - .select(.{.id}).execute(env.repo); - if (artist_id == null) - artist_id = try jetzig.database.Query(.Artist) - .insert(.{ .id = album_artist_hash, .name = scrobble_album_artist, .disambiguation = null }) - .returning(.{.id}).execute(env.repo); - try jetzig.database.Query(.Artistalbum) - .insert(.{ .album_id = album_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo); - } - } - - try jetzig.database.Query(.Scrobble) - .insert(.{ .albumsong = albumsong_id.?.id, .datetime = scrobble.date }).execute(env.repo); - } - } -} diff --git a/src/app/views/upload.zig b/src/app/views/upload.zig index 740fa8b..27b2e78 100644 --- a/src/app/views/upload.zig +++ b/src/app/views/upload.zig @@ -13,6 +13,8 @@ pub fn index(request: *jetzig.Request, data: *jetzig.Data) !jetzig.View { } pub fn post(request: *jetzig.Request) !jetzig.View { + var root = try request.data(.object); + var view_params = try root.put("scrobbles", .array); const UploadParams = struct { source: enum { LFMW, LFMS, Spotify }, earliest_date: ?[]const u8, @@ -25,21 +27,21 @@ pub fn post(request: *jetzig.Request) !jetzig.View { const latest_ts = (try zeit.instant(.{ .source = if (params.latest_date) |ld| .{ .iso8601 = ld } else .now })).timestamp; const earliest_ts = (try zeit.instant(.{ .source = if (params.earliest_date) |ed| .{ .iso8601 = ed } else .{ .unix_timestamp = 0 } })).timestamp; - var skipped_tracks: u64 = 0; - var limited_tracks: u64 = 0; - - const imported_scrobbles: []Data.UnifiedScrobble = switch (params.source) { - .LFMS, .Spotify => try Utils.scrobbleIngest(request.allocator, if (try request.file("upload")) |file| file.content else unreachable), - .LFMW => blk: { + switch (params.source) { + .LFMS, .Spotify => { + const ctx = try Utils.scrobbleIngest(request, if (try request.file("upload")) |file| file.content else unreachable, .{}, null); + for (ctx.rows) |row| try view_params.append(row); + }, + .LFMW => { const user_agent: []const u8 = "Zuletzt/0.0.1"; var client = Client{ .allocator = request.allocator }; var lastfm_response_buffer = std.ArrayList(u8).init(request.allocator); - var scrobble_buffer = std.ArrayList(Data.UnifiedScrobble).init(request.allocator); const username = if (params.username) |un| un else "VAOTM"; var page: usize = 1; - //var max_pages: ?usize = null; + + var ctx: ?Utils.IngestContext = null; while (true) : (page += 1) { if (page > 91) break; @@ -52,128 +54,11 @@ pub fn post(request: *jetzig.Request) !jetzig.View { continue; } const response_string = try lastfm_response_buffer.toOwnedSlice(); - const parsed_lastfm_response = try Utils.scrobbleIngest(request.allocator, response_string); - //const parsed_lastfm_response = try std.json.parseFromSliceLeaky(Data.LastFMWeb, request.allocator, response_string, .{ .ignore_unknown_fields = true }); - //if (max_pages == null) max_pages = try std.fmt.parseInt(usize, parsed_lastfm_response.recenttracks.@"@attr".totalPages, 10); - try scrobble_buffer.appendSlice(parsed_lastfm_response); + ctx = try Utils.scrobbleIngest(request, response_string, .{}, ctx); } - break :blk try scrobble_buffer.toOwnedSlice(); + for (ctx.?.rows) |row| try view_params.append(row); }, - }; - - var root = try request.data(.object); - var view_params = try root.put("scrobbles", .array); - - var job = try request.job("process_scrobbles2"); - const rule_list = try Utils.loadRules(request.allocator); - - var artists = try job.params.put("artists", .object); - var albums = try job.params.put("albums", .object); - var tracks = try job.params.put("tracks", .object); - var artistalbums = try job.params.put("artistalbums", .object); - var albumsongs = try job.params.put("albumsongs", .object); - var albumsongsartists = try job.params.put("albumsongsartists", .object); - - var hash_buffer = [_]u8{undefined} ** 20; // A minimum i64 needs 19 digits + 1 negative sign - - appends: for (imported_scrobbles) |scrobble| { - if (scrobble.date > latest_ts or scrobble.date < earliest_ts) { - limited_tracks += 1; - continue :appends; - } - if (scrobble.playtime != null and scrobble.playtime.? < 30_000 and (scrobble.reason_end == null or !std.mem.eql(u8, scrobble.reason_end.?, "trackdone"))) { - skipped_tracks += 1; - continue :appends; - } - if (scrobble.track_artist == null or scrobble.album_artist == null or scrobble.track == null) { - skipped_tracks += 1; - continue :appends; - } - - const filtered_scrobble = Data.Scrobble{ - .album = scrobble.album.?, - .artists_album = &.{scrobble.album_artist.?}, - .artists_track = &.{scrobble.track_artist.?}, - .date = scrobble.date, - .track = scrobble.track.?, - }; - - const complete_scrobble = if (rule_list) |rl| try rules.applyScrobbleRule(request.allocator, filtered_scrobble, rl) else filtered_scrobble; - - const row = try Utils.scrobbleToRow(request.allocator, complete_scrobble); - try view_params.append(row); - - var stored_artist_hashes = std.ArrayList(u64).init(request.allocator); - - var album_hash_string = std.ArrayList(u8).init(request.allocator); - for (complete_scrobble.artists_album) |artist| { - try album_hash_string.appendSlice(artist); - const artist_hash = std.hash.Fnv1a_64.hash(artist); - try stored_artist_hashes.append(artist_hash); - const signed_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artist_hash))}); - if (artists.get(signed_hash_string) == null) try artists.put(signed_hash_string, artist); - } - - try album_hash_string.appendSlice(complete_scrobble.album); - const album_hash = std.hash.Fnv1a_64.hash(album_hash_string.items); - const signed_album_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(album_hash))}); - if (albums.get(signed_album_hash_string) == null) try albums.put(signed_album_hash_string, complete_scrobble.album); - - for (stored_artist_hashes.items) |artist_hash| { - const artistalbum_hash = pair(artist_hash, album_hash); - const signed_artistalbums_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artistalbum_hash))}); - if (tracks.get(signed_artistalbums_hash_string) == null) { - var artistalbum = try artistalbums.put(signed_artistalbums_hash_string, .object); - try artistalbum.put("artist", @as(i64, @bitCast(artist_hash))); - try artistalbum.put("album", @as(i64, @bitCast(album_hash))); - } - } - - // The track hash does not currently include the track artists. Probably not necessary for it to, - // but easily can if need be - var track_hash_string = std.ArrayList(u8).init(request.allocator); - try track_hash_string.appendSlice(complete_scrobble.album); - try track_hash_string.appendSlice(complete_scrobble.track); - const track_hash = std.hash.Fnv1a_64.hash(track_hash_string.items); - const signed_track_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(track_hash))}); - if (tracks.get(signed_track_hash_string) == null) try tracks.put(signed_track_hash_string, complete_scrobble.track); - - const albumsong_hash = pair(album_hash, track_hash); - const signed_albumsong_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(albumsong_hash))}); - if (albumsongs.get(signed_albumsong_hash_string)) |albumsong| { - var albumsong_scrobbles = albumsong.get("scrobbles"); - try albumsong_scrobbles.?.append(@divFloor(complete_scrobble.date, std.time.ns_per_us)); // MICROSECONDS - } else { - var albumsong = try albumsongs.put(signed_albumsong_hash_string, .object); - try albumsong.put("album", @as(i64, @bitCast(album_hash))); - try albumsong.put("song", @as(i64, @bitCast(track_hash))); - var albumsong_scrobbles = try albumsong.put("scrobbles", .array); - try albumsong_scrobbles.append(@divFloor(complete_scrobble.date, std.time.ns_per_us)); // MICROSECONDS - } - - for (complete_scrobble.artists_track) |artist| { - const artist_hash = std.hash.Fnv1a_64.hash(artist); - const signed_artist_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artist_hash))}); - if (artists.get(signed_artist_hash_string) == null) try artists.put(signed_artist_hash_string, artist); - const albumsongsartist_hash = pair(albumsong_hash, artist_hash); - const signed_albumsongsartist_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(albumsongsartist_hash))}); - if (albumsongsartists.get(signed_albumsongsartist_hash_string) == null) { - var albumsongartist = try albumsongsartists.put(signed_albumsongsartist_hash_string, .object); - try albumsongartist.put("albumsong", @as(i64, @bitCast(albumsong_hash))); - try albumsongartist.put("artist", @as(i64, @bitCast(artist_hash))); - } - } } - - std.log.debug("Skipped {} tracks\nFiltered {} tracks by date", .{ skipped_tracks, limited_tracks }); - try job.schedule(); - return request.render(.created); } - -// Cantor Pairing Function -// https://en.wikipedia.org/wiki/Pairing_function -fn pair(a: u64, b: u64) u64 { - return @mod(@divFloor((a +% b) *% (a +% b +% 1), 2) +% b, std.math.maxInt(u64)); -} diff --git a/src/apply_rule.zig b/src/apply_rule.zig index fc4c0bc..0d357a6 100644 --- a/src/apply_rule.zig +++ b/src/apply_rule.zig @@ -27,14 +27,25 @@ pub fn applyScrobbleRule(allocator: std.mem.Allocator, scrobble: Data.Scrobble, switch (rule.cond_req) { .any => switch (cond.match_on) { inline .album, .track => |on| match_found = match_found or match_fn(@field(scrobble, @tagName(on)), cond.match_txt), - inline .artists_album, .artists_track => |on| { - for (@field(scrobble, @tagName(on))) |artist| match_found = match_found or match_fn(artist, cond.match_txt); + .album_artists => { + for (scrobble.album_artists) |artist| match_found = match_found or match_fn(artist, cond.match_txt); + }, + .track_artists => { + if (scrobble.track_artists) |ta| { + for (ta) |a| match_found = match_found or match_fn(a, cond.match_txt); + } else match_found = false; }, }, .all => switch (cond.match_on) { inline .album, .track => |on| match_found = match_found and match_fn(@field(scrobble, @tagName(on)), cond.match_txt), - inline .artists_album, .artists_track => |on| { - for (@field(scrobble, @tagName(on))) |artist| match_found = match_found and match_fn(artist, cond.match_txt); + .album_artists => { + for (scrobble.album_artists) |artist| match_found = match_found and match_fn(artist, cond.match_txt); + }, + + .track_artists => { + if (scrobble.track_artists) |ta| { + for (ta) |a| match_found = match_found and match_fn(a, cond.match_txt); + } else match_found = false; }, }, } @@ -46,17 +57,21 @@ pub fn applyScrobbleRule(allocator: std.mem.Allocator, scrobble: Data.Scrobble, var al = std.ArrayList([]const u8).init(allocator); switch (act.action_on) { .album, .track => unreachable, - inline else => |on| { - try al.appendSlice(@field(output_scrobble, @tagName(on))); + .album_artists => { + try al.appendSlice(scrobble.album_artists); try al.append(act.action_txt); - const list = try al.toOwnedSlice(); - @field(output_scrobble, @tagName(on)) = list; + output_scrobble.album_artists = try al.toOwnedSlice(); + }, + .track_artists => { + if (scrobble.track_artists) |ta| try al.appendSlice(ta); + try al.append(act.action_txt); + output_scrobble.track_artists = try al.toOwnedSlice(); }, } }, .replace => switch (act.action_on) { inline .album, .track => |on| @field(output_scrobble, @tagName(on)) = act.action_txt, - inline .artists_album, .artists_track => |on| { + inline .album_artists, .track_artists => |on| { const artist = try allocator.alloc([]const u8, 1); artist[0] = act.action_txt; @field(output_scrobble, @tagName(on)) = artist; diff --git a/src/date_fmt.zig b/src/date_fmt.zig index b8838ad..4c40a25 100644 --- a/src/date_fmt.zig +++ b/src/date_fmt.zig @@ -1,10 +1,12 @@ const std = @import("std"); const zeit = @import("zeit"); const Data = @import("types.zig"); +const jetzig = @import("jetzig"); +const applyRule = @import("apply_rule.zig").applyScrobbleRule; pub fn dateFmt(allocator: std.mem.Allocator, epoch: i64) ![]const u8 { var date = std.ArrayList(u8).init(allocator); - try (try zeit.instant(.{ .source = .{ .unix_timestamp = @divFloor(epoch, std.time.ns_per_s) } })).time().strftime(date.writer(), "%d %b %Y, %H:%M"); + try (try zeit.instant(.{ .source = .{ .unix_timestamp = @divFloor(epoch, std.time.ns_per_us) } })).time().strftime(date.writer(), "%d %b %Y, %H:%M"); return date.items; } @@ -23,7 +25,7 @@ pub fn dateCompare(self: *[]const u8, earliest: []const u8, latest: []const u8) pub fn scrobbleToRow(allocator: std.mem.Allocator, scrobble: Data.Scrobble) !Data.TableRow { var artistlist = std.ArrayList(Data.HyperlinkData).init(allocator); - for (scrobble.artists_track) |a| { + for (scrobble.track_artists orelse scrobble.album_artists) |a| { try artistlist.append(Data.HyperlinkData{ .name = a, .id = 0 }); } return Data.TableRow{ @@ -79,17 +81,46 @@ pub fn loadRules(allocator: std.mem.Allocator) !?[]Data.Rule { return std.json.parseFromSliceLeaky([]Data.Rule, allocator, rule_file_content, .{}) catch null; } -pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.UnifiedScrobble { +/// Configuration for scrobble ingestion +const IngestConfig = struct { + /// The earliest date (a unix timestamp in nanoseconds) to accept a scrobble + earliest: ?i128 = null, + /// The latest date (a unix timestamp in nanoseconds) to accept a scrobble + latest: ?i128 = null, + /// The minimum number of milliseconds needed to accept a scrobble + /// Only affects Spotify scrobbles + minimum_playtime: i128 = 30_000, + /// The amount of metadata required to accept a scrobble. A track name is always required + /// - need_artist: Only an artist name is required to accept a scrobble + /// - need_album: Only an album name is required to accept a scrobble + /// - need_both: Both an artist name and an album name are required to accept a scrobble + /// - need_neither: No extra metadata is required to accept a scrobble + null_tolerance: enum { need_artist, need_album, need_both, need_neither } = .need_both, +}; + +pub const IngestContext = struct { + rows: []Data.TableRow, + map: ?std.StringHashMap(std.StringHashMap(std.StringHashMap(?std.BufSet))), +}; + +/// +pub fn scrobbleIngest(request: *jetzig.Request, input: []const u8, config: IngestConfig, context: ?IngestContext) !IngestContext { + const allocator = request.allocator; + var out = std.ArrayList(Data.TableRow).init(allocator); + var artists = if (context) |ctx| blk: { + try out.appendSlice(ctx.rows); + break :blk ctx.map.?; + } else std.StringHashMap(std.StringHashMap(std.StringHashMap(?std.BufSet))).init(allocator); var scanner = std.json.Scanner.initCompleteInput(allocator, input); defer scanner.deinit(); - var out = std.ArrayList(Data.UnifiedScrobble).init(allocator); + const rule_list = try loadRules(allocator); array: switch (try scanner.peekNextTokenType()) { .array_begin => { // Go into array _ = try scanner.next(); - while (try scanner.peekNextTokenType() != .array_end) { + scrobble_array: while (try scanner.peekNextTokenType() != .array_end) { var r: Data.UnifiedScrobble = undefined; // Go into object _ = try scanner.next(); @@ -97,18 +128,22 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U const key_token = try scanner.nextAlloc(allocator, .alloc_if_needed); const field_name = std.meta.stringToEnum(ScrobbleFields, switch (key_token) { inline .string, .allocated_string => |slice| slice, - else => return error.UnexpectedToken, + else => { + std.log.debug("{any}", .{key_token}); + return error.UnexpectedToken; + }, }) orelse .irrelevant; switch (field_name) { - .@"@attr" => { - freeAllocated(allocator, key_token); + .@"@attr" => |d| { r = undefined; - try scanner.skipUntilStackHeight(3); + try skipScrobble(allocator, &scanner, key_token, d); }, .ts, .date => |d| { freeAllocated(allocator, key_token); - const date = switch (d) { + const date: i64 = switch (d) { .date => blk: { + // We can filter by date via the API, so we will always have results in the + // specified timeframe through LFMW if (try scanner.peekNextTokenType() == .object_begin) { // For now, try to just skip over the object_begin and assume the next field is uts _ = try scanner.next(); @@ -121,7 +156,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U const lfw_date = try std.fmt.parseInt(i64, switch (lfw_date_token) { inline .number, .allocated_number, .string, .allocated_string => |slice| slice, else => return error.UnexpectedToken, - }, 10) * std.time.ns_per_s; + }, 10) * std.time.us_per_s; freeAllocated(allocator, lfw_date_token); break :blk lfw_date; } else { @@ -129,7 +164,11 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U const lfs_date = try std.fmt.parseInt(i64, switch (lfs_date_token) { inline .number, .allocated_number, .string, .allocated_string => |slice| slice, else => return error.UnexpectedToken, - }, 10) * std.time.ns_per_ms; + }, 10) * std.time.us_per_ms; + if ((config.earliest != null and lfs_date < config.earliest.?) or (config.latest != null and lfs_date > config.latest.?)) { + r = undefined; + try skipScrobble(allocator, &scanner, lfs_date_token, d); + } freeAllocated(allocator, lfs_date_token); break :blk lfs_date; } @@ -137,12 +176,16 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U .ts => blk: { // This might need to be an alloc_always, but I'm gonna try if_needed first const spotify_date_token = try scanner.nextAlloc(allocator, .alloc_if_needed); - const spotify_date = try zeit.instant(.{ .source = .{ .iso8601 = switch (spotify_date_token) { + const spotify_date = (try zeit.instant(.{ .source = .{ .iso8601 = switch (spotify_date_token) { inline .string, .allocated_string => |slice| slice, else => return error.UnexpectedToken, - } } }); + } } })).timestamp; + if ((config.earliest != null and spotify_date < config.earliest.?) or (config.latest != null and spotify_date > config.latest.?)) { + r = undefined; + try skipScrobble(allocator, &scanner, spotify_date_token, d); + } freeAllocated(allocator, spotify_date_token); - break :blk spotify_date.unixTimestamp() * std.time.ns_per_s; + break :blk @as(i64, @truncate(@divFloor(spotify_date, std.time.us_per_ms))); }, else => unreachable, }; @@ -157,7 +200,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U }, 10); freeAllocated(allocator, spotify_ms_played); }, - .master_metadata_track_name, .track, .name => { + .master_metadata_track_name, .track, .name => |d| { freeAllocated(allocator, key_token); const track = try scanner.nextAlloc(allocator, .alloc_always); @field(r, "track") = switch (track) { @@ -165,6 +208,11 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U .null => null, else => return error.UnexpectedToken, }; + if (r.track == null) { + r = undefined; + try skipScrobble(allocator, &scanner, track, d); + _ = try scanner.next(); + } }, .master_metadata_album_artist_name, .artist => { freeAllocated(allocator, key_token); @@ -189,7 +237,6 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U else => return error.UnexpectedToken, }; }; - @field(r, "track_artist") = artist; @field(r, "album_artist") = artist; }, .master_metadata_album_album_name, .album => { @@ -234,7 +281,118 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U } // Exit object _ = try scanner.next(); - try out.append(r); + + // Final checks + if (r.playtime != null and r.playtime.? < config.minimum_playtime and (r.reason_end == null or !std.mem.eql(u8, r.reason_end.?, "trackdone"))) continue :scrobble_array; + switch (config.null_tolerance) { + .need_neither => {}, + .need_both => if (r.album == null and r.album_artist == null) continue :scrobble_array, + .need_album => if (r.album == null) continue :scrobble_array, + .need_artist => if (r.album_artist == null) continue :scrobble_array, + } + var scr = Data.Scrobble{ + .track = r.track.?, + .album = r.album orelse "Unknown Album", + .track_artists = null, + .album_artists = &.{r.album_artist orelse "Unknown Artist"}, + .date = r.date, + }; + + if (rule_list) |rules| scr = try applyRule(allocator, scr, rules); + + // Try not to have an aneurysm (impossible challenge 2025) + const artist: []const u8, const single_artist_flag: bool = if (scr.album_artists.len == 1) .{ scr.album_artists[0], true } else blk: { + var combined = try std.ArrayListUnmanaged(u8).initCapacity(allocator, 0); + for (scr.album_artists) |aa| try combined.appendSlice(allocator, aa); + break :blk .{ try combined.toOwnedSlice(allocator), false }; + }; + + const premade_hashes = try scr.asHash(allocator); + + // I'm doing all the hashing in the jobs, meaning we hash more than we need to + // If I get bored maybe I'll work on storing them instead + const hm_artist_info = try artists.getOrPut(artist); + if (!hm_artist_info.found_existing) { + hm_artist_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator); + if (single_artist_flag) { + var add_artist = try request.job("add_artist"); + try add_artist.params.put("artist", artist); + try add_artist.schedule(); + } else { + for (scr.album_artists) |a| { + const hm_ind_artist_info = try artists.getOrPut(a); + if (!hm_ind_artist_info.found_existing) { + hm_ind_artist_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator); + var add_artist = try request.job("add_artist"); + try add_artist.params.put("artist", a); + try add_artist.schedule(); + } + } + } + } + const hm_album_info = try hm_artist_info.value_ptr.*.getOrPut(scr.album); + if (!hm_album_info.found_existing) { + const album_query = try jetzig.database.Query(.Album) + .find(@as(i64, @bitCast(premade_hashes[0]))).execute(request.repo); + + if (album_query == null) { + try jetzig.database.Query(.Album) + .insert(.{ .id = @as(i64, @bitCast(premade_hashes[0])), .name = scr.album, .length = null }) + .execute(request.repo); + } + hm_album_info.value_ptr.* = std.StringHashMap(?std.BufSet).init(allocator); + var add_album = try request.job("add_album"); + try add_album.params.put("album_hash", premade_hashes[0]); + try add_album.params.put("artists", scr.album_artists); + //if (!single_artist_flag) add_album.put("combined", artist); + try add_album.schedule(); + } + + const hm_song_info = try hm_album_info.value_ptr.*.getOrPut(scr.track); + if (!hm_song_info.found_existing) { + const track_query = try jetzig.database.Query(.Song) + .find(@as(i64, @bitCast(premade_hashes[1]))).execute(request.repo); + + if (track_query == null) { + try jetzig.database.Query(.Song) + .insert(.{ .id = @as(i64, @bitCast(premade_hashes[1])), .name = scr.track, .length = null, .hidden = false }) + .execute(request.repo); + } + const as_query = try jetzig.database.Query(.Albumsong) + .find(@as(i64, @bitCast(premade_hashes[2]))).execute(request.repo); + + if (as_query == null) { + try jetzig.database.Query(.Albumsong) + .insert(.{ .id = @as(i64, @bitCast(premade_hashes[2])), .song_id = @as(i64, @bitCast(premade_hashes[1])), .album_id = @as(i64, @bitCast(premade_hashes[0])) }) + .execute(request.repo); + } + hm_song_info.value_ptr.* = null; + var add_song = try request.job("add_song"); + try add_song.params.put("as_hash", premade_hashes[2]); + //add_song.put("album", scr.album); + if (scr.track_artists) |track_artists| { + try add_song.params.put("track_artists", track_artists); + if (hm_song_info.value_ptr.* == null) hm_song_info.value_ptr.* = std.BufSet.init(allocator); + for (track_artists) |ta| { + try hm_song_info.value_ptr.*.?.insert(ta); + const hm_ta_info = try artists.getOrPut(ta); + if (!hm_ta_info.found_existing) { + hm_ta_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator); + var add_artist = try request.job("add_artist"); + try add_artist.params.put("artist", ta); + try add_artist.schedule(); + } + } + } + try add_song.params.put("album_artists", scr.album_artists); + //if (!single_artist_flag) add_song.put("combined", artist); + try add_song.schedule(); + } + try jetzig.database.Query(.Scrobble).insert(.{ .albumsong = @as(i64, @bitCast(premade_hashes[2])), .datetime = scr.date }) + .execute(request.repo); + + const b = try scrobbleToRow(allocator, scr); + try out.append(b); } }, // LastFM(stats) @@ -282,7 +440,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U else => return error.UnexpectedToken, } const scrobbles = try out.toOwnedSlice(); - return scrobbles; + return IngestContext{ .map = artists, .rows = scrobbles }; } fn freeAllocated(allocator: std.mem.Allocator, token: std.json.Token) void { @@ -293,3 +451,24 @@ fn freeAllocated(allocator: std.mem.Allocator, token: std.json.Token) void { else => {}, } } + +// Cantor Pairing Function +// https://en.wikipedia.org/wiki/Pairing_function + +fn skipScrobble(allocator: std.mem.Allocator, scanner: *std.json.Scanner, token: std.json.Token, field: ScrobbleFields) !void { + freeAllocated(allocator, token); + try scanner.skipUntilStackHeight(switch (field) { + // Spotify specific fields + .ts, .master_metadata_album_album_name, .master_metadata_album_artist_name, .master_metadata_track_name, .ms_played, .reason_end => 1, + // LastFM Stats specific field + .track => 2, + // LastFM Web specific fields + .name, .@"@attr" => 3, + // Fields shared by LastFM Stats and LastFM Web: album, artist, date (although date is never invalid for LastFM Web) + else => switch (scanner.stackHeight()) { + 5 => 3, // Five levels deep => LastFM Web (all of those fields are fortunately objects / same stack height) + 3 => 2, // Three levels deep => LastFM stats + else => unreachable, + }, + }); +} diff --git a/src/types.zig b/src/types.zig index df067e8..dd5fd20 100644 --- a/src/types.zig +++ b/src/types.zig @@ -3,7 +3,7 @@ const std = @import("std"); pub const UnifiedScrobble = struct { track: ?[]const u8, // These can be null per Spotify - track_artist: ?[]const u8, + //track_artist: ?[]const u8, // As far as I'm aware, there are no services that provide separate track/album artist lists album: ?[]const u8, album_artist: ?[]const u8, date: i64, @@ -12,114 +12,53 @@ pub const UnifiedScrobble = struct { reason_end: ?[]const u8 = null, }; -pub const ImportedScrobbles = union(ScrobbleSources) { - LastFMStats: []IgnorantScrobble, - LastFMWeb: []LastFMWebScrobble, - Spotify: []SpotifyScrobble, -}; - const ScrobbleSources = enum { LastFMStats, LastFMWeb, Spotify, }; -pub const IgnorantScrobble = struct { - track: []const u8, - artist: []const u8, - album: []const u8 = "Not Provided", - //albumId: []const u8, - date: i64, -}; +fn hashAndSign(a: []const u8) !i64 { + return @as(i64, @bitCast(std.hash.Fnv1a_64.hash(a))); +} + +fn pair(a: u64, b: u64) u64 { + return @mod(@divFloor((a +% b) *% (a +% b +% 1), 2) +% b, std.math.maxInt(u64)); +} pub const Scrobble = struct { track: []const u8, - artists_track: []const []const u8, - album: []const u8 = "", - artists_album: []const []const u8, + track_artists: ?[]const []const u8, + album: []const u8 = "Unknown Album", + album_artists: []const []const u8, date: i64, -}; -// From lastfmstats.com -pub const LastFMStats = struct { username: []const u8, scrobbles: []IgnorantScrobble }; + pub fn asHash(self: *Scrobble, allocator: std.mem.Allocator) ![3]u64 { + var string_buf = try std.ArrayListUnmanaged(u8).initCapacity(allocator, 0); -// I derived whether or not these values were optional from searching -// the respective fields for null in Vim, so there may be some fields -// that can be optional that I haven't run into yet -pub const SpotifyScrobble = struct { - ts: []const u8, - //username: []const u8, - //platform: []const u8, - ms_played: u64, - //conn_country: []const u8, - //ip_addr_decrypted: ?[]const u8, - //user_agent_decrypted: ?[]const u8, - master_metadata_track_name: ?[]const u8, - master_metadata_album_artist_name: ?[]const u8, - master_metadata_album_album_name: ?[]const u8, - //spotify_track_uri: ?[]const u8, - //episode_name: ?[]const u8, - //episode_show_name: ?[]const u8, - //spotify_episode_uri: ?[]const u8, - reason_start: []const u8, - reason_end: ?[]const u8, - //shuffle: bool, - skipped: ?bool, - //offline: bool, - offline_timestamp: u64, - //incognito_mode: ?bool, -}; + for (self.album_artists) |artist| try string_buf.appendSlice(allocator, artist); -pub const LastFMWeb = struct { - recenttracks: struct { - track: []LastFMWebScrobble, - @"@attr": LastFMWebQueryInfo, - }, -}; + try string_buf.appendSlice(allocator, self.album); + const a = std.hash.Fnv1a_64.hash(string_buf.items); -pub const LastFMWebHyperlinkData = struct { - mbid: []const u8, - @"#text": []const u8, -}; + try string_buf.appendSlice(allocator, self.track); + const s = std.hash.Fnv1a_64.hash(try string_buf.toOwnedSlice(allocator)); -pub const LastFMWebScrobble = struct { - artist: LastFMWebHyperlinkData, - album: ?LastFMWebHyperlinkData = null, - name: []const u8, - mbid: ?[]const u8 = null, - image: []struct { - size: []const u8, - @"#text": []const u8, - }, - date: ?struct { - uts: []const u8, - @"#text": []const u8, - } = null, - @"@attr": ?struct { - nowplaying: []const u8, - } = null, - url: []const u8, -}; - -pub const LastFMWebQueryInfo = struct { - perPage: []const u8, - totalPages: []const u8, - page: []const u8, - user: []const u8, - total: []const u8, + return .{ a, s, pair(a, s) }; + } }; pub const Rule = struct { name: []const u8, cond_req: enum { any, all }, conditionals: []struct { - match_on: enum { artists_album, artists_track, album, track }, + match_on: enum { album_artists, track_artists, album, track }, match_cond: enum { is, contains }, match_txt: []const u8, }, actions: []struct { action: enum { replace, add }, - action_on: enum { artists_album, album, artists_track, track }, + action_on: enum { album_artists, album, track_artists, track }, action_txt: []const u8, }, };