Allow filtering scrobble uploads by date

Necessary to avoid double scrobbling if uploading from two sources (i.e. my Spotify data [2016-2023] and my last.fm data [2019-present])
This commit is contained in:
mitteneer 2025-02-21 11:54:02 -05:00
parent ab01f2e213
commit 7957345057
6 changed files with 90 additions and 26 deletions

View file

@ -13,6 +13,9 @@ pub fn build(b: *std.Build) !void {
});
// Example dependency:
//
const zig_time_dep = b.dependency("zeit", .{});
exe.root_module.addImport("zeit", zig_time_dep.module("zeit"));
// All dependencies **must** be added to imports above this line.

View file

@ -188,13 +188,31 @@ pub const Songartist = jetquery.Model(
},
);
pub const Albumsong = jetquery.Model(@This(), "Albumsongs", struct {
pub const Albumsong = jetquery.Model(
@This(),
"Albumsongs",
struct {
id: i32,
album_id: i32,
song_id: i32,
created_at: jetquery.DateTime,
updated_at: jetquery.DateTime,
}, .{ .relations = .{
},
.{
.relations = .{
.album = jetquery.belongsTo(.Album, .{}),
.song = jetquery.belongsTo(.Song, .{}),
},
},
);
pub const Scrobbleartist = jetquery.Model(@This(), "Scrobbleartists", struct {
id: i32,
scrobble_id: i32,
artist_id: i32,
created_at: jetquery.DateTime,
updated_at: jetquery.DateTime,
}, .{ .relations = .{
.scrobble = jetquery.belongsTo(.Scrobble, .{}),
.artist = jetquery.belongsTo(.Artist, .{}),
} });

View file

@ -0,0 +1,20 @@
const std = @import("std");
const jetquery = @import("jetquery");
const t = jetquery.schema.table;
pub fn up(repo: anytype) !void {
try repo.createTable(
"Scrobbleartists",
&.{
t.primaryKey("id", .{}),
t.column("scrobble_id", .integer, .{}),
t.column("artist_id", .integer, .{}),
t.timestamps(.{}),
},
.{},
);
}
pub fn down(repo: anytype) !void {
try repo.dropTable("Scrobbleartists", .{});
}

View file

@ -20,7 +20,7 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
if (params.getT(.array, "scrobbles")) |scrobbles| {
for (scrobbles.items()) |item| {
//const fixed_date: u32 = @as(u32, item.getT(.integer, "date").?);
const scrobble: Scrobble = .{ .track = item.getT(.string, "track").?, .artist = item.getT(.string, "artist").?, .album = item.getT(.string, "album") orelse "empty", .date = @as(u64, @bitCast(@as(i64, @truncate(@divTrunc(item.getT(.integer, "date").?, 1000))))) };
const scrobble: Scrobble = .{ .track = item.getT(.string, "track").?, .artist = item.getT(.string, "artist").?, .album = item.getT(.string, "album") orelse "", .date = @as(u64, @bitCast(@as(i64, @truncate(item.getT(.integer, "date").? * 1000)))) };
// Make hashes
const album_hash = @as(i32, @bitCast(std.hash.Fnv1a_32.hash(scrobble.album)));
@ -33,7 +33,11 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
// the IDs also depend on the hash of the album
// they're on, as well as the artist name. As far
// as I can tell, this is only as issue for Sufjan
// Steven's `Songs for Christmas`.
// Steven's `Songs for Christmas`. (In practice.
// In reality, there are albums with several untitled
// songs (Selected Ambient Works Vol. II by Aphex Twin,
// ( ) by Sigur Ros, ...) that have working titles
// in their place.)
// Album: If the album is not self-titled, then
// album hash XOR artist hash. This way, if two
@ -65,6 +69,10 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
const artist_check = try jetzig.database.Query(.Artist).find(artist_id).execute(env.repo);
const song_check = try jetzig.database.Query(.Song).find(song_id).execute(env.repo);
// I think there must be a better way to do this next part
// There are very few situations where artist_check is null
// but song_check/album is not. Also yes, the order of these
// checks is weird, I didn't put a lot of thought into it
var associative_table_flags: [3]bool = [3]bool{ true, true, true };
if (album_check == null) {
@ -88,25 +96,30 @@ pub fn run(allocator: std.mem.Allocator, params: *jetzig.data.Value, env: jetzig
if (associative_table_flags[2]) try jetzig.database.Query(.Songartist).insert(.{ .song_id = song_id, .artist_id = artist_id }).execute(env.repo);
}
//try env.repo.execute(album_insert);
//try env.repo.execute(song_insert);
// Checks
// if (album_check == 0) try env.repo.execute(album_insert);
// if (artist_check == 0) try env.repo.execute(artist_insert);
// if (song_check == 0) try env.repo.execute(song_insert);
//const scrobble_offset = try jetzig.database.Query(.Scrobble).select(.{}).count().execute(env.repo) orelse unreachable;
//try jetzig.database.Query(.Scrobble).insert(.{ .id = scrobble_offset + 1, .song_id = song_id, .album_id = album_id, .artist_id = artist_id, .date = scrobble.date }).execute(env.repo);
try jetzig.database.Query(.Scrobble).insert(.{ .song_id = song_id, .album_id = album_id, .date = scrobble.date }).execute(env.repo);
}
}
const query = jetzig.database.Query(.Artist).include(.artistalbums, .{});
const results = try env.repo.all(query);
defer env.repo.free(results);
for (results) |result| {
for (result.artistalbums) |artistalbum| {
std.log.debug("{s}: {any}", .{ result.name, artistalbum.album_id });
}
}
// I would like to replicate this kind of functionality for several kinds of queries
// This one gives me all albums by Dream Theater (it also returns Dream Theater for
// each entry, but removing artists.name from the SELECT would remove that)
//
// SELECT
// artists.name, albums.name
// FROM
// "Albumartists"
// INNER JOIN artists
// ON "Albumartists".artist_id = artists.id
// INNER JOIN albums
// ON "Albumartists".album_id = albums.id
// WHERE artists.name = 'Dream Theater';
//const query = jetzig.database.Query(.Artist).include(.artistalbums, .{});
//const results = try env.repo.all(query);
//defer env.repo.free(results);
//for (results) |result| {
// for (result.artistalbums) |artistalbum| {
// std.log.debug("{s}: {any}", .{ result.name, artistalbum.album_id });
// }
//}
}

View file

@ -3,6 +3,7 @@ const jetzig = @import("jetzig");
const jetquery = @import("jetzig").jetquery;
const Scrobble = @import("../../types.zig").LastFMScrobble;
const lastfm = @import("../../types.zig").LastFM;
const zeit = @import("zeit");
pub fn index(request: *jetzig.Request, data: *jetzig.Data) !jetzig.View {
_ = data;
@ -27,7 +28,15 @@ pub fn post(request: *jetzig.Request) !jetzig.View {
var job = try request.job("process_scrobbles");
var scrobbles_data = try job.params.put("scrobbles", .array);
for (content.scrobbles) |scrobble| {
const params = try request.params();
const limiting_date_string: ?[]const u8 = if (params.get("l")) |param| param.string.value else null;
const limiting_date_instant: ?zeit.Instant = if (limiting_date_string) |str| try zeit.instant(.{ .source = .{ .iso8601 = str } }) else null;
// This is seconds from Unix epoch
const limiting_date_epoch = if (limiting_date_instant) |time| time.unixTimestamp() else 9_223_372_036_854_775_807;
appends: for (content.scrobbles) |scrobble| {
// Scrobble.date is in milliseconds from Unix epoch
if (scrobble.date < limiting_date_epoch * 1000) continue :appends;
var value = try scrobbles_data.append(.object);
// This is so unnecessary, probably useful once I start doing Spotify integration though
inline for (std.meta.fields(Scrobble)) |f| {

View file

@ -16,6 +16,7 @@
<fieldset>
<input type="radio" name="t" label="Last.fm">Last.fm</input>
<input type="radio" name="t" label="Spotify">Spotify</input>
Upload Scrobbles after: <input type="datetime-local" name="l" label="date"></input>
</fieldset>
</form>
</body>