scrobbleIngest

The code sucks. The functionality isn't quite what we hoped for. Dates are formatted incorrectly. There are practically no comments. It's not modular whatsoever.  I lost several years of my life trying to make this work.

LGTM
This commit is contained in:
mitteneer 2025-07-19 18:25:10 -04:00
parent 2a42e07df0
commit 5f451868af
8 changed files with 346 additions and 372 deletions

View file

@ -0,0 +1,31 @@
const std = @import("std");
const jetzig = @import("jetzig");
// The `run` function for a job is invoked every time the job is processed by a queue worker
// (or by the Jetzig server if the job is processed in-line).
//
// Arguments:
// * allocator: Arena allocator for use during the job execution process.
// * params: Params assigned to a job (from a request, values added to response data).
// * env: Provides the following fields:
// - logger: Logger attached to the same stream as the Jetzig server.
// - environment: Enum of `{ production, development }`.
pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
_ = allocator;
const artists = params.getT(.array, "artists").?.items();
const album_id = try (params.get("album_hash").?).coerce(u64);
for (artists) |artist| {
const artist_name = try artist.coerce([]const u8);
const artist_id = std.hash.Fnv1a_64.hash(artist_name);
const paired = @as(i64, @bitCast(@mod(@divFloor((artist_id +% album_id) *% (artist_id +% album_id +% 1), 2) +% album_id, std.math.maxInt(u64))));
const aa_query = try jetzig.database.Query(.Artistalbum)
.find(paired).execute(env.repo);
if (aa_query == null) {
try jetzig.database.Query(.Artistalbum)
.insert(.{ .id = paired, .artist_id = @as(i64, @bitCast(artist_id)), .album_id = @as(i64, @bitCast(album_id)) })
.execute(env.repo);
}
}
}

View file

@ -0,0 +1,25 @@
const std = @import("std");
const jetzig = @import("jetzig");
// The `run` function for a job is invoked every time the job is processed by a queue worker
// (or by the Jetzig server if the job is processed in-line).
//
// Arguments:
// * allocator: Arena allocator for use during the job execution process.
// * params: Params assigned to a job (from a request, values added to response data).
// * env: Provides the following fields:
// - logger: Logger attached to the same stream as the Jetzig server.
// - environment: Enum of `{ production, development }`.
pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
_ = allocator;
const artist = params.getT(.string, "artist").?;
const id = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist)));
const artist_query = try jetzig.database.Query(.Artist)
.find(id).execute(env.repo);
if (artist_query == null) {
try jetzig.database.Query(.Artist)
.insert(.{ .id = id, .name = artist })
.execute(env.repo);
}
}

34
src/app/jobs/add_song.zig Normal file
View file

@ -0,0 +1,34 @@
const std = @import("std");
const jetzig = @import("jetzig");
// The `run` function for a job is invoked every time the job is processed by a queue worker
// (or by the Jetzig server if the job is processed in-line).
//
// Arguments:
// * allocator: Arena allocator for use during the job execution process.
// * params: Params assigned to a job (from a request, values added to response data).
// * env: Provides the following fields:
// - logger: Logger attached to the same stream as the Jetzig server.
// - environment: Enum of `{ production, development }`.
pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
_ = allocator;
//const album = params.getT(.string, "album").?;
const as_id = try (params.get("as_hash").?).coerce(u64);
const album_artists = params.getT(.array, "album_artists").?.items();
// Will use this eventually, but not now
// const track_artists = params.getT(.array,"track_artists");
for (album_artists) |artist| {
const artist_name = try artist.coerce([]const u8);
const artist_id = std.hash.Fnv1a_64.hash(artist_name);
const asa_id = @as(i64, @bitCast(@mod(@divFloor((as_id +% artist_id) *% (as_id +% artist_id +% 1), 2) +% artist_id, std.math.maxInt(u64))));
const asa_query = try jetzig.database.Query(.Albumsongsartist)
.find(asa_id).execute(env.repo);
if (asa_query == null) {
try jetzig.database.Query(.Albumsongsartist)
.insert(.{ .id = asa_id, .albumsong_id = @as(i64, @bitCast(as_id)), .artist_id = @as(i64, @bitCast(artist_id)) })
.execute(env.repo);
}
}
}

View file

@ -1,134 +0,0 @@
const std = @import("std");
const jetzig = @import("jetzig");
const jetquery = @import("jetzig").jetquery;
const Data = @import("../../types.zig");
const rules = @import("../../apply_rule.zig");
// The `run` function for a job is invoked every time the job is processed by a queue worker
// (or by the Jetzig server if the job is processed in-line).
//
// Arguments:
// * allocator: Arena allocator for use during the job execution process.
// * params: Params assigned to a job (from a request, values added to response data).
// * env: Provides the following fields:
// - logger: Logger attached to the same stream as the Jetzig server.
// - environment: Enum of `{ production, development }`.
pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig.jobs.JobEnv) !void {
//_ = env;
if (params.getT(.array, "scrobbles")) |scrobbles| {
for (scrobbles.items()) |item| {
// Probably want to include artist name here, but not sure how to yet
const track_artists = item.getT(.array, "artists_track").?.items();
const album_artists = item.getT(.array, "artists_album").?.items();
var track_artist_name_buffer = try allocator.alloc([]const u8, track_artists.len);
var album_artist_name_buffer = try allocator.alloc([]const u8, album_artists.len);
var track_artist_id_buffer = try allocator.alloc(i64, track_artists.len);
var album_artist_id_buffer = try allocator.alloc(i64, album_artists.len);
const scrobble: Data.Scrobble = .{
.track = item.getT(.string, "track").?,
.artists_track = track_artist_name_buffer,
.album = item.getT(.string, "album") orelse "",
.artists_album = album_artist_name_buffer,
.date = @as(i64, @truncate(item.getT(.integer, "date").?)),
};
var album_hash_string = std.ArrayList(u8).init(allocator);
var track_hash_string = std.ArrayList(u8).init(allocator);
// I theoretically don't need this for loop
for (track_artists, 0..track_artists.len) |artist, i| {
const artist_name = try artist.coerce([]const u8);
track_artist_name_buffer[i] = artist_name;
track_artist_id_buffer[i] = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist_name)));
}
for (album_artists, 0..album_artists.len) |artist, i| {
const artist_name = try artist.coerce([]const u8);
album_artist_name_buffer[i] = artist_name;
album_artist_id_buffer[i] = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(artist_name)));
try album_hash_string.appendSlice(artist_name);
}
try album_hash_string.appendSlice(scrobble.album);
try track_hash_string.appendSlice(scrobble.album);
const album_hash = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(album_hash_string.items)));
try track_hash_string.appendSlice(scrobble.track);
const track_hash = @as(i64, @bitCast(std.hash.Fnv1a_64.hash(track_hash_string.items)));
var albumsong_id = try jetzig.database.Query(.Albumsong)
.find(album_hash ^ track_hash)
.select(.{.id}).execute(env.repo);
var album_id = try jetzig.database.Query(.Album)
.find(album_hash)
.select(.{.id}).execute(env.repo);
for (track_artist_name_buffer, track_artist_id_buffer) |scrobble_track_artist, track_artist_hash| {
var artist_id = try jetzig.database.Query(.Artist)
.find(track_artist_hash)
.select(.{.id}).execute(env.repo);
if (artist_id == null)
artist_id = try jetzig.database.Query(.Artist)
.insert(.{ .id = track_artist_hash, .name = scrobble_track_artist, .disambiguation = null })
.returning(.{.id}).execute(env.repo);
if (albumsong_id == null) {
var track_id = try jetzig.database.Query(.Song)
.find(track_hash)
.select(.{.id}).execute(env.repo);
if (track_id == null)
track_id = try jetzig.database.Query(.Song)
.insert(.{ .id = track_hash, .name = scrobble.track, .length = null, .hidden = false })
.returning(.{.id}).execute(env.repo);
if (album_id == null)
album_id = try jetzig.database.Query(.Album)
.insert(.{ .id = album_hash, .name = scrobble.album, .length = null })
.returning(.{.id}).execute(env.repo);
albumsong_id = try jetzig.database.Query(.Albumsong)
.insert(.{ .song_id = track_id.?.id, .album_id = album_id.?.id })
.returning(.{.id}).execute(env.repo);
try jetzig.database.Query(.Albumsongsartist)
.insert(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo);
} else {
const ins_albumsongartist = try jetzig.database.Query(.Albumsongsartist)
.findBy(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id })
.select(.{.id}).execute(env.repo);
if (ins_albumsongartist == null)
try jetzig.database.Query(.Albumsongsartist)
.insert(.{ .albumsong_id = albumsong_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo);
}
}
for (album_artist_name_buffer, album_artist_id_buffer) |scrobble_album_artist, album_artist_hash| {
const artistalbum_id = try jetzig.database.Query(.Artistalbum)
.findBy(.{ .album_id = album_id.?.id, .artist_id = album_artist_hash })
.select(.{.id}).execute(env.repo);
if (artistalbum_id == null) {
var artist_id = try jetzig.database.Query(.Artist)
.find(album_artist_hash)
.select(.{.id}).execute(env.repo);
if (artist_id == null)
artist_id = try jetzig.database.Query(.Artist)
.insert(.{ .id = album_artist_hash, .name = scrobble_album_artist, .disambiguation = null })
.returning(.{.id}).execute(env.repo);
try jetzig.database.Query(.Artistalbum)
.insert(.{ .album_id = album_id.?.id, .artist_id = artist_id.?.id }).execute(env.repo);
}
}
try jetzig.database.Query(.Scrobble)
.insert(.{ .albumsong = albumsong_id.?.id, .datetime = scrobble.date }).execute(env.repo);
}
}
}

View file

@ -13,6 +13,8 @@ pub fn index(request: *jetzig.Request, data: *jetzig.Data) !jetzig.View {
}
pub fn post(request: *jetzig.Request) !jetzig.View {
var root = try request.data(.object);
var view_params = try root.put("scrobbles", .array);
const UploadParams = struct {
source: enum { LFMW, LFMS, Spotify },
earliest_date: ?[]const u8,
@ -25,21 +27,21 @@ pub fn post(request: *jetzig.Request) !jetzig.View {
const latest_ts = (try zeit.instant(.{ .source = if (params.latest_date) |ld| .{ .iso8601 = ld } else .now })).timestamp;
const earliest_ts = (try zeit.instant(.{ .source = if (params.earliest_date) |ed| .{ .iso8601 = ed } else .{ .unix_timestamp = 0 } })).timestamp;
var skipped_tracks: u64 = 0;
var limited_tracks: u64 = 0;
const imported_scrobbles: []Data.UnifiedScrobble = switch (params.source) {
.LFMS, .Spotify => try Utils.scrobbleIngest(request.allocator, if (try request.file("upload")) |file| file.content else unreachable),
.LFMW => blk: {
switch (params.source) {
.LFMS, .Spotify => {
const ctx = try Utils.scrobbleIngest(request, if (try request.file("upload")) |file| file.content else unreachable, .{}, null);
for (ctx.rows) |row| try view_params.append(row);
},
.LFMW => {
const user_agent: []const u8 = "Zuletzt/0.0.1";
var client = Client{ .allocator = request.allocator };
var lastfm_response_buffer = std.ArrayList(u8).init(request.allocator);
var scrobble_buffer = std.ArrayList(Data.UnifiedScrobble).init(request.allocator);
const username = if (params.username) |un| un else "VAOTM";
var page: usize = 1;
//var max_pages: ?usize = null;
var ctx: ?Utils.IngestContext = null;
while (true) : (page += 1) {
if (page > 91) break;
@ -52,128 +54,11 @@ pub fn post(request: *jetzig.Request) !jetzig.View {
continue;
}
const response_string = try lastfm_response_buffer.toOwnedSlice();
const parsed_lastfm_response = try Utils.scrobbleIngest(request.allocator, response_string);
//const parsed_lastfm_response = try std.json.parseFromSliceLeaky(Data.LastFMWeb, request.allocator, response_string, .{ .ignore_unknown_fields = true });
//if (max_pages == null) max_pages = try std.fmt.parseInt(usize, parsed_lastfm_response.recenttracks.@"@attr".totalPages, 10);
try scrobble_buffer.appendSlice(parsed_lastfm_response);
ctx = try Utils.scrobbleIngest(request, response_string, .{}, ctx);
}
break :blk try scrobble_buffer.toOwnedSlice();
for (ctx.?.rows) |row| try view_params.append(row);
},
};
var root = try request.data(.object);
var view_params = try root.put("scrobbles", .array);
var job = try request.job("process_scrobbles2");
const rule_list = try Utils.loadRules(request.allocator);
var artists = try job.params.put("artists", .object);
var albums = try job.params.put("albums", .object);
var tracks = try job.params.put("tracks", .object);
var artistalbums = try job.params.put("artistalbums", .object);
var albumsongs = try job.params.put("albumsongs", .object);
var albumsongsartists = try job.params.put("albumsongsartists", .object);
var hash_buffer = [_]u8{undefined} ** 20; // A minimum i64 needs 19 digits + 1 negative sign
appends: for (imported_scrobbles) |scrobble| {
if (scrobble.date > latest_ts or scrobble.date < earliest_ts) {
limited_tracks += 1;
continue :appends;
}
if (scrobble.playtime != null and scrobble.playtime.? < 30_000 and (scrobble.reason_end == null or !std.mem.eql(u8, scrobble.reason_end.?, "trackdone"))) {
skipped_tracks += 1;
continue :appends;
}
if (scrobble.track_artist == null or scrobble.album_artist == null or scrobble.track == null) {
skipped_tracks += 1;
continue :appends;
}
const filtered_scrobble = Data.Scrobble{
.album = scrobble.album.?,
.artists_album = &.{scrobble.album_artist.?},
.artists_track = &.{scrobble.track_artist.?},
.date = scrobble.date,
.track = scrobble.track.?,
};
const complete_scrobble = if (rule_list) |rl| try rules.applyScrobbleRule(request.allocator, filtered_scrobble, rl) else filtered_scrobble;
const row = try Utils.scrobbleToRow(request.allocator, complete_scrobble);
try view_params.append(row);
var stored_artist_hashes = std.ArrayList(u64).init(request.allocator);
var album_hash_string = std.ArrayList(u8).init(request.allocator);
for (complete_scrobble.artists_album) |artist| {
try album_hash_string.appendSlice(artist);
const artist_hash = std.hash.Fnv1a_64.hash(artist);
try stored_artist_hashes.append(artist_hash);
const signed_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artist_hash))});
if (artists.get(signed_hash_string) == null) try artists.put(signed_hash_string, artist);
}
try album_hash_string.appendSlice(complete_scrobble.album);
const album_hash = std.hash.Fnv1a_64.hash(album_hash_string.items);
const signed_album_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(album_hash))});
if (albums.get(signed_album_hash_string) == null) try albums.put(signed_album_hash_string, complete_scrobble.album);
for (stored_artist_hashes.items) |artist_hash| {
const artistalbum_hash = pair(artist_hash, album_hash);
const signed_artistalbums_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artistalbum_hash))});
if (tracks.get(signed_artistalbums_hash_string) == null) {
var artistalbum = try artistalbums.put(signed_artistalbums_hash_string, .object);
try artistalbum.put("artist", @as(i64, @bitCast(artist_hash)));
try artistalbum.put("album", @as(i64, @bitCast(album_hash)));
}
}
// The track hash does not currently include the track artists. Probably not necessary for it to,
// but easily can if need be
var track_hash_string = std.ArrayList(u8).init(request.allocator);
try track_hash_string.appendSlice(complete_scrobble.album);
try track_hash_string.appendSlice(complete_scrobble.track);
const track_hash = std.hash.Fnv1a_64.hash(track_hash_string.items);
const signed_track_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(track_hash))});
if (tracks.get(signed_track_hash_string) == null) try tracks.put(signed_track_hash_string, complete_scrobble.track);
const albumsong_hash = pair(album_hash, track_hash);
const signed_albumsong_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(albumsong_hash))});
if (albumsongs.get(signed_albumsong_hash_string)) |albumsong| {
var albumsong_scrobbles = albumsong.get("scrobbles");
try albumsong_scrobbles.?.append(@divFloor(complete_scrobble.date, std.time.ns_per_us)); // MICROSECONDS
} else {
var albumsong = try albumsongs.put(signed_albumsong_hash_string, .object);
try albumsong.put("album", @as(i64, @bitCast(album_hash)));
try albumsong.put("song", @as(i64, @bitCast(track_hash)));
var albumsong_scrobbles = try albumsong.put("scrobbles", .array);
try albumsong_scrobbles.append(@divFloor(complete_scrobble.date, std.time.ns_per_us)); // MICROSECONDS
}
for (complete_scrobble.artists_track) |artist| {
const artist_hash = std.hash.Fnv1a_64.hash(artist);
const signed_artist_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(artist_hash))});
if (artists.get(signed_artist_hash_string) == null) try artists.put(signed_artist_hash_string, artist);
const albumsongsartist_hash = pair(albumsong_hash, artist_hash);
const signed_albumsongsartist_hash_string = try std.fmt.bufPrint(&hash_buffer, "{}", .{@as(i64, @bitCast(albumsongsartist_hash))});
if (albumsongsartists.get(signed_albumsongsartist_hash_string) == null) {
var albumsongartist = try albumsongsartists.put(signed_albumsongsartist_hash_string, .object);
try albumsongartist.put("albumsong", @as(i64, @bitCast(albumsong_hash)));
try albumsongartist.put("artist", @as(i64, @bitCast(artist_hash)));
}
}
}
std.log.debug("Skipped {} tracks\nFiltered {} tracks by date", .{ skipped_tracks, limited_tracks });
try job.schedule();
return request.render(.created);
}
// Cantor Pairing Function
// https://en.wikipedia.org/wiki/Pairing_function
fn pair(a: u64, b: u64) u64 {
return @mod(@divFloor((a +% b) *% (a +% b +% 1), 2) +% b, std.math.maxInt(u64));
}

View file

@ -27,14 +27,25 @@ pub fn applyScrobbleRule(allocator: std.mem.Allocator, scrobble: Data.Scrobble,
switch (rule.cond_req) {
.any => switch (cond.match_on) {
inline .album, .track => |on| match_found = match_found or match_fn(@field(scrobble, @tagName(on)), cond.match_txt),
inline .artists_album, .artists_track => |on| {
for (@field(scrobble, @tagName(on))) |artist| match_found = match_found or match_fn(artist, cond.match_txt);
.album_artists => {
for (scrobble.album_artists) |artist| match_found = match_found or match_fn(artist, cond.match_txt);
},
.track_artists => {
if (scrobble.track_artists) |ta| {
for (ta) |a| match_found = match_found or match_fn(a, cond.match_txt);
} else match_found = false;
},
},
.all => switch (cond.match_on) {
inline .album, .track => |on| match_found = match_found and match_fn(@field(scrobble, @tagName(on)), cond.match_txt),
inline .artists_album, .artists_track => |on| {
for (@field(scrobble, @tagName(on))) |artist| match_found = match_found and match_fn(artist, cond.match_txt);
.album_artists => {
for (scrobble.album_artists) |artist| match_found = match_found and match_fn(artist, cond.match_txt);
},
.track_artists => {
if (scrobble.track_artists) |ta| {
for (ta) |a| match_found = match_found and match_fn(a, cond.match_txt);
} else match_found = false;
},
},
}
@ -46,17 +57,21 @@ pub fn applyScrobbleRule(allocator: std.mem.Allocator, scrobble: Data.Scrobble,
var al = std.ArrayList([]const u8).init(allocator);
switch (act.action_on) {
.album, .track => unreachable,
inline else => |on| {
try al.appendSlice(@field(output_scrobble, @tagName(on)));
.album_artists => {
try al.appendSlice(scrobble.album_artists);
try al.append(act.action_txt);
const list = try al.toOwnedSlice();
@field(output_scrobble, @tagName(on)) = list;
output_scrobble.album_artists = try al.toOwnedSlice();
},
.track_artists => {
if (scrobble.track_artists) |ta| try al.appendSlice(ta);
try al.append(act.action_txt);
output_scrobble.track_artists = try al.toOwnedSlice();
},
}
},
.replace => switch (act.action_on) {
inline .album, .track => |on| @field(output_scrobble, @tagName(on)) = act.action_txt,
inline .artists_album, .artists_track => |on| {
inline .album_artists, .track_artists => |on| {
const artist = try allocator.alloc([]const u8, 1);
artist[0] = act.action_txt;
@field(output_scrobble, @tagName(on)) = artist;

View file

@ -1,10 +1,12 @@
const std = @import("std");
const zeit = @import("zeit");
const Data = @import("types.zig");
const jetzig = @import("jetzig");
const applyRule = @import("apply_rule.zig").applyScrobbleRule;
pub fn dateFmt(allocator: std.mem.Allocator, epoch: i64) ![]const u8 {
var date = std.ArrayList(u8).init(allocator);
try (try zeit.instant(.{ .source = .{ .unix_timestamp = @divFloor(epoch, std.time.ns_per_s) } })).time().strftime(date.writer(), "%d %b %Y, %H:%M");
try (try zeit.instant(.{ .source = .{ .unix_timestamp = @divFloor(epoch, std.time.ns_per_us) } })).time().strftime(date.writer(), "%d %b %Y, %H:%M");
return date.items;
}
@ -23,7 +25,7 @@ pub fn dateCompare(self: *[]const u8, earliest: []const u8, latest: []const u8)
pub fn scrobbleToRow(allocator: std.mem.Allocator, scrobble: Data.Scrobble) !Data.TableRow {
var artistlist = std.ArrayList(Data.HyperlinkData).init(allocator);
for (scrobble.artists_track) |a| {
for (scrobble.track_artists orelse scrobble.album_artists) |a| {
try artistlist.append(Data.HyperlinkData{ .name = a, .id = 0 });
}
return Data.TableRow{
@ -79,17 +81,46 @@ pub fn loadRules(allocator: std.mem.Allocator) !?[]Data.Rule {
return std.json.parseFromSliceLeaky([]Data.Rule, allocator, rule_file_content, .{}) catch null;
}
pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.UnifiedScrobble {
/// Configuration for scrobble ingestion
const IngestConfig = struct {
/// The earliest date (a unix timestamp in nanoseconds) to accept a scrobble
earliest: ?i128 = null,
/// The latest date (a unix timestamp in nanoseconds) to accept a scrobble
latest: ?i128 = null,
/// The minimum number of milliseconds needed to accept a scrobble
/// Only affects Spotify scrobbles
minimum_playtime: i128 = 30_000,
/// The amount of metadata required to accept a scrobble. A track name is always required
/// - need_artist: Only an artist name is required to accept a scrobble
/// - need_album: Only an album name is required to accept a scrobble
/// - need_both: Both an artist name and an album name are required to accept a scrobble
/// - need_neither: No extra metadata is required to accept a scrobble
null_tolerance: enum { need_artist, need_album, need_both, need_neither } = .need_both,
};
pub const IngestContext = struct {
rows: []Data.TableRow,
map: ?std.StringHashMap(std.StringHashMap(std.StringHashMap(?std.BufSet))),
};
///
pub fn scrobbleIngest(request: *jetzig.Request, input: []const u8, config: IngestConfig, context: ?IngestContext) !IngestContext {
const allocator = request.allocator;
var out = std.ArrayList(Data.TableRow).init(allocator);
var artists = if (context) |ctx| blk: {
try out.appendSlice(ctx.rows);
break :blk ctx.map.?;
} else std.StringHashMap(std.StringHashMap(std.StringHashMap(?std.BufSet))).init(allocator);
var scanner = std.json.Scanner.initCompleteInput(allocator, input);
defer scanner.deinit();
var out = std.ArrayList(Data.UnifiedScrobble).init(allocator);
const rule_list = try loadRules(allocator);
array: switch (try scanner.peekNextTokenType()) {
.array_begin => {
// Go into array
_ = try scanner.next();
while (try scanner.peekNextTokenType() != .array_end) {
scrobble_array: while (try scanner.peekNextTokenType() != .array_end) {
var r: Data.UnifiedScrobble = undefined;
// Go into object
_ = try scanner.next();
@ -97,18 +128,22 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
const key_token = try scanner.nextAlloc(allocator, .alloc_if_needed);
const field_name = std.meta.stringToEnum(ScrobbleFields, switch (key_token) {
inline .string, .allocated_string => |slice| slice,
else => return error.UnexpectedToken,
else => {
std.log.debug("{any}", .{key_token});
return error.UnexpectedToken;
},
}) orelse .irrelevant;
switch (field_name) {
.@"@attr" => {
freeAllocated(allocator, key_token);
.@"@attr" => |d| {
r = undefined;
try scanner.skipUntilStackHeight(3);
try skipScrobble(allocator, &scanner, key_token, d);
},
.ts, .date => |d| {
freeAllocated(allocator, key_token);
const date = switch (d) {
const date: i64 = switch (d) {
.date => blk: {
// We can filter by date via the API, so we will always have results in the
// specified timeframe through LFMW
if (try scanner.peekNextTokenType() == .object_begin) {
// For now, try to just skip over the object_begin and assume the next field is uts
_ = try scanner.next();
@ -121,7 +156,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
const lfw_date = try std.fmt.parseInt(i64, switch (lfw_date_token) {
inline .number, .allocated_number, .string, .allocated_string => |slice| slice,
else => return error.UnexpectedToken,
}, 10) * std.time.ns_per_s;
}, 10) * std.time.us_per_s;
freeAllocated(allocator, lfw_date_token);
break :blk lfw_date;
} else {
@ -129,7 +164,11 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
const lfs_date = try std.fmt.parseInt(i64, switch (lfs_date_token) {
inline .number, .allocated_number, .string, .allocated_string => |slice| slice,
else => return error.UnexpectedToken,
}, 10) * std.time.ns_per_ms;
}, 10) * std.time.us_per_ms;
if ((config.earliest != null and lfs_date < config.earliest.?) or (config.latest != null and lfs_date > config.latest.?)) {
r = undefined;
try skipScrobble(allocator, &scanner, lfs_date_token, d);
}
freeAllocated(allocator, lfs_date_token);
break :blk lfs_date;
}
@ -137,12 +176,16 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
.ts => blk: {
// This might need to be an alloc_always, but I'm gonna try if_needed first
const spotify_date_token = try scanner.nextAlloc(allocator, .alloc_if_needed);
const spotify_date = try zeit.instant(.{ .source = .{ .iso8601 = switch (spotify_date_token) {
const spotify_date = (try zeit.instant(.{ .source = .{ .iso8601 = switch (spotify_date_token) {
inline .string, .allocated_string => |slice| slice,
else => return error.UnexpectedToken,
} } });
} } })).timestamp;
if ((config.earliest != null and spotify_date < config.earliest.?) or (config.latest != null and spotify_date > config.latest.?)) {
r = undefined;
try skipScrobble(allocator, &scanner, spotify_date_token, d);
}
freeAllocated(allocator, spotify_date_token);
break :blk spotify_date.unixTimestamp() * std.time.ns_per_s;
break :blk @as(i64, @truncate(@divFloor(spotify_date, std.time.us_per_ms)));
},
else => unreachable,
};
@ -157,7 +200,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
}, 10);
freeAllocated(allocator, spotify_ms_played);
},
.master_metadata_track_name, .track, .name => {
.master_metadata_track_name, .track, .name => |d| {
freeAllocated(allocator, key_token);
const track = try scanner.nextAlloc(allocator, .alloc_always);
@field(r, "track") = switch (track) {
@ -165,6 +208,11 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
.null => null,
else => return error.UnexpectedToken,
};
if (r.track == null) {
r = undefined;
try skipScrobble(allocator, &scanner, track, d);
_ = try scanner.next();
}
},
.master_metadata_album_artist_name, .artist => {
freeAllocated(allocator, key_token);
@ -189,7 +237,6 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
else => return error.UnexpectedToken,
};
};
@field(r, "track_artist") = artist;
@field(r, "album_artist") = artist;
},
.master_metadata_album_album_name, .album => {
@ -234,7 +281,118 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
}
// Exit object
_ = try scanner.next();
try out.append(r);
// Final checks
if (r.playtime != null and r.playtime.? < config.minimum_playtime and (r.reason_end == null or !std.mem.eql(u8, r.reason_end.?, "trackdone"))) continue :scrobble_array;
switch (config.null_tolerance) {
.need_neither => {},
.need_both => if (r.album == null and r.album_artist == null) continue :scrobble_array,
.need_album => if (r.album == null) continue :scrobble_array,
.need_artist => if (r.album_artist == null) continue :scrobble_array,
}
var scr = Data.Scrobble{
.track = r.track.?,
.album = r.album orelse "Unknown Album",
.track_artists = null,
.album_artists = &.{r.album_artist orelse "Unknown Artist"},
.date = r.date,
};
if (rule_list) |rules| scr = try applyRule(allocator, scr, rules);
// Try not to have an aneurysm (impossible challenge 2025)
const artist: []const u8, const single_artist_flag: bool = if (scr.album_artists.len == 1) .{ scr.album_artists[0], true } else blk: {
var combined = try std.ArrayListUnmanaged(u8).initCapacity(allocator, 0);
for (scr.album_artists) |aa| try combined.appendSlice(allocator, aa);
break :blk .{ try combined.toOwnedSlice(allocator), false };
};
const premade_hashes = try scr.asHash(allocator);
// I'm doing all the hashing in the jobs, meaning we hash more than we need to
// If I get bored maybe I'll work on storing them instead
const hm_artist_info = try artists.getOrPut(artist);
if (!hm_artist_info.found_existing) {
hm_artist_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator);
if (single_artist_flag) {
var add_artist = try request.job("add_artist");
try add_artist.params.put("artist", artist);
try add_artist.schedule();
} else {
for (scr.album_artists) |a| {
const hm_ind_artist_info = try artists.getOrPut(a);
if (!hm_ind_artist_info.found_existing) {
hm_ind_artist_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator);
var add_artist = try request.job("add_artist");
try add_artist.params.put("artist", a);
try add_artist.schedule();
}
}
}
}
const hm_album_info = try hm_artist_info.value_ptr.*.getOrPut(scr.album);
if (!hm_album_info.found_existing) {
const album_query = try jetzig.database.Query(.Album)
.find(@as(i64, @bitCast(premade_hashes[0]))).execute(request.repo);
if (album_query == null) {
try jetzig.database.Query(.Album)
.insert(.{ .id = @as(i64, @bitCast(premade_hashes[0])), .name = scr.album, .length = null })
.execute(request.repo);
}
hm_album_info.value_ptr.* = std.StringHashMap(?std.BufSet).init(allocator);
var add_album = try request.job("add_album");
try add_album.params.put("album_hash", premade_hashes[0]);
try add_album.params.put("artists", scr.album_artists);
//if (!single_artist_flag) add_album.put("combined", artist);
try add_album.schedule();
}
const hm_song_info = try hm_album_info.value_ptr.*.getOrPut(scr.track);
if (!hm_song_info.found_existing) {
const track_query = try jetzig.database.Query(.Song)
.find(@as(i64, @bitCast(premade_hashes[1]))).execute(request.repo);
if (track_query == null) {
try jetzig.database.Query(.Song)
.insert(.{ .id = @as(i64, @bitCast(premade_hashes[1])), .name = scr.track, .length = null, .hidden = false })
.execute(request.repo);
}
const as_query = try jetzig.database.Query(.Albumsong)
.find(@as(i64, @bitCast(premade_hashes[2]))).execute(request.repo);
if (as_query == null) {
try jetzig.database.Query(.Albumsong)
.insert(.{ .id = @as(i64, @bitCast(premade_hashes[2])), .song_id = @as(i64, @bitCast(premade_hashes[1])), .album_id = @as(i64, @bitCast(premade_hashes[0])) })
.execute(request.repo);
}
hm_song_info.value_ptr.* = null;
var add_song = try request.job("add_song");
try add_song.params.put("as_hash", premade_hashes[2]);
//add_song.put("album", scr.album);
if (scr.track_artists) |track_artists| {
try add_song.params.put("track_artists", track_artists);
if (hm_song_info.value_ptr.* == null) hm_song_info.value_ptr.* = std.BufSet.init(allocator);
for (track_artists) |ta| {
try hm_song_info.value_ptr.*.?.insert(ta);
const hm_ta_info = try artists.getOrPut(ta);
if (!hm_ta_info.found_existing) {
hm_ta_info.value_ptr.* = std.StringHashMap(std.StringHashMap(?std.BufSet)).init(allocator);
var add_artist = try request.job("add_artist");
try add_artist.params.put("artist", ta);
try add_artist.schedule();
}
}
}
try add_song.params.put("album_artists", scr.album_artists);
//if (!single_artist_flag) add_song.put("combined", artist);
try add_song.schedule();
}
try jetzig.database.Query(.Scrobble).insert(.{ .albumsong = @as(i64, @bitCast(premade_hashes[2])), .datetime = scr.date })
.execute(request.repo);
const b = try scrobbleToRow(allocator, scr);
try out.append(b);
}
},
// LastFM(stats)
@ -282,7 +440,7 @@ pub fn scrobbleIngest(allocator: std.mem.Allocator, input: []const u8) ![]Data.U
else => return error.UnexpectedToken,
}
const scrobbles = try out.toOwnedSlice();
return scrobbles;
return IngestContext{ .map = artists, .rows = scrobbles };
}
fn freeAllocated(allocator: std.mem.Allocator, token: std.json.Token) void {
@ -293,3 +451,24 @@ fn freeAllocated(allocator: std.mem.Allocator, token: std.json.Token) void {
else => {},
}
}
// Cantor Pairing Function
// https://en.wikipedia.org/wiki/Pairing_function
fn skipScrobble(allocator: std.mem.Allocator, scanner: *std.json.Scanner, token: std.json.Token, field: ScrobbleFields) !void {
freeAllocated(allocator, token);
try scanner.skipUntilStackHeight(switch (field) {
// Spotify specific fields
.ts, .master_metadata_album_album_name, .master_metadata_album_artist_name, .master_metadata_track_name, .ms_played, .reason_end => 1,
// LastFM Stats specific field
.track => 2,
// LastFM Web specific fields
.name, .@"@attr" => 3,
// Fields shared by LastFM Stats and LastFM Web: album, artist, date (although date is never invalid for LastFM Web)
else => switch (scanner.stackHeight()) {
5 => 3, // Five levels deep => LastFM Web (all of those fields are fortunately objects / same stack height)
3 => 2, // Three levels deep => LastFM stats
else => unreachable,
},
});
}

View file

@ -3,7 +3,7 @@ const std = @import("std");
pub const UnifiedScrobble = struct {
track: ?[]const u8,
// These can be null per Spotify
track_artist: ?[]const u8,
//track_artist: ?[]const u8, // As far as I'm aware, there are no services that provide separate track/album artist lists
album: ?[]const u8,
album_artist: ?[]const u8,
date: i64,
@ -12,114 +12,53 @@ pub const UnifiedScrobble = struct {
reason_end: ?[]const u8 = null,
};
pub const ImportedScrobbles = union(ScrobbleSources) {
LastFMStats: []IgnorantScrobble,
LastFMWeb: []LastFMWebScrobble,
Spotify: []SpotifyScrobble,
};
const ScrobbleSources = enum {
LastFMStats,
LastFMWeb,
Spotify,
};
pub const IgnorantScrobble = struct {
track: []const u8,
artist: []const u8,
album: []const u8 = "Not Provided",
//albumId: []const u8,
date: i64,
};
fn hashAndSign(a: []const u8) !i64 {
return @as(i64, @bitCast(std.hash.Fnv1a_64.hash(a)));
}
fn pair(a: u64, b: u64) u64 {
return @mod(@divFloor((a +% b) *% (a +% b +% 1), 2) +% b, std.math.maxInt(u64));
}
pub const Scrobble = struct {
track: []const u8,
artists_track: []const []const u8,
album: []const u8 = "",
artists_album: []const []const u8,
track_artists: ?[]const []const u8,
album: []const u8 = "Unknown Album",
album_artists: []const []const u8,
date: i64,
};
// From lastfmstats.com
pub const LastFMStats = struct { username: []const u8, scrobbles: []IgnorantScrobble };
pub fn asHash(self: *Scrobble, allocator: std.mem.Allocator) ![3]u64 {
var string_buf = try std.ArrayListUnmanaged(u8).initCapacity(allocator, 0);
// I derived whether or not these values were optional from searching
// the respective fields for null in Vim, so there may be some fields
// that can be optional that I haven't run into yet
pub const SpotifyScrobble = struct {
ts: []const u8,
//username: []const u8,
//platform: []const u8,
ms_played: u64,
//conn_country: []const u8,
//ip_addr_decrypted: ?[]const u8,
//user_agent_decrypted: ?[]const u8,
master_metadata_track_name: ?[]const u8,
master_metadata_album_artist_name: ?[]const u8,
master_metadata_album_album_name: ?[]const u8,
//spotify_track_uri: ?[]const u8,
//episode_name: ?[]const u8,
//episode_show_name: ?[]const u8,
//spotify_episode_uri: ?[]const u8,
reason_start: []const u8,
reason_end: ?[]const u8,
//shuffle: bool,
skipped: ?bool,
//offline: bool,
offline_timestamp: u64,
//incognito_mode: ?bool,
};
for (self.album_artists) |artist| try string_buf.appendSlice(allocator, artist);
pub const LastFMWeb = struct {
recenttracks: struct {
track: []LastFMWebScrobble,
@"@attr": LastFMWebQueryInfo,
},
};
try string_buf.appendSlice(allocator, self.album);
const a = std.hash.Fnv1a_64.hash(string_buf.items);
pub const LastFMWebHyperlinkData = struct {
mbid: []const u8,
@"#text": []const u8,
};
try string_buf.appendSlice(allocator, self.track);
const s = std.hash.Fnv1a_64.hash(try string_buf.toOwnedSlice(allocator));
pub const LastFMWebScrobble = struct {
artist: LastFMWebHyperlinkData,
album: ?LastFMWebHyperlinkData = null,
name: []const u8,
mbid: ?[]const u8 = null,
image: []struct {
size: []const u8,
@"#text": []const u8,
},
date: ?struct {
uts: []const u8,
@"#text": []const u8,
} = null,
@"@attr": ?struct {
nowplaying: []const u8,
} = null,
url: []const u8,
};
pub const LastFMWebQueryInfo = struct {
perPage: []const u8,
totalPages: []const u8,
page: []const u8,
user: []const u8,
total: []const u8,
return .{ a, s, pair(a, s) };
}
};
pub const Rule = struct {
name: []const u8,
cond_req: enum { any, all },
conditionals: []struct {
match_on: enum { artists_album, artists_track, album, track },
match_on: enum { album_artists, track_artists, album, track },
match_cond: enum { is, contains },
match_txt: []const u8,
},
actions: []struct {
action: enum { replace, add },
action_on: enum { artists_album, album, artists_track, track },
action_on: enum { album_artists, album, track_artists, track },
action_txt: []const u8,
},
};