[ { "@graph" : [ { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E", "@type" : [ "http://www.nanopub.org/nschema#Nanopublication" ], "http://www.nanopub.org/nschema#hasAssertion" : [ { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#assertion" } ], "http://www.nanopub.org/nschema#hasProvenance" : [ { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#provenance" } ], "http://www.nanopub.org/nschema#hasPublicationInfo" : [ { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#pubinfo" } ] } ], "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#Head" }, { "@graph" : [ { "@id" : "http://id.crossref.org/issn/2451-8492", "http://purl.org/dc/terms/title" : [ { "@value" : "Data Science" } ] }, { "@id" : "https://doi.org/10.3233/DS-240059", "@type" : [ "http://purl.org/spar/fabio/ResourcePaper" ], "http://purl.org/dc/terms/abstract" : [ { "@value" : "Measuring data drift is essential in machine learning applications where model scoring (evaluation) is done on data samples that differ from those used in training. The Kullback-Leibler divergence is a common measure of shifted probability distributions, for which discretized versions are invented to deal with binned or categorical data. We present the Unstable Population Indicator, a robust, flexible and numerically stable, discretized implementation of Jeffrey's divergence, along with an implementation in a Python package that can deal with continuous, discrete, ordinal and nominal data in a variety of popular data types. We show the numerical and statistical properties in controlled experiments. It is not advised to employ a common cut-off to distinguish stable from unstable populations, but rather to let that cut-off depend on the use case." } ], "http://purl.org/dc/terms/date" : [ { "@value" : "2024" } ], "http://purl.org/dc/terms/hasPart" : [ { "@id" : "https://w3id.org/kpxl/ios/ds/np/RA4SqymT32eltSYbr41lDKMBV3Zr8nEBEXRFhfOrN6f3k" } ], "http://purl.org/dc/terms/isPartOf" : [ { "@id" : "http://id.crossref.org/issn/2451-8492" } ], "http://purl.org/dc/terms/title" : [ { "@value" : "Measuring Data Drift with the Unstable Population Indicator" } ], "http://purl.org/pav/authoredBy" : [ { "@id" : "https://orcid.org/0000-0003-2581-8370" }, { "@id" : "https://orcid.org/0009-0003-5030-0108" } ] }, { "@id" : "https://orcid.org/0000-0003-2581-8370", "@type" : [ "http://xmlns.com/foaf/0.1/Person" ], "http://schema.org/affiliation" : [ { "@id" : "https://ror.org/04dkp9463" }, { "@id" : "https://ror.org/05xvt9f17" } ], "http://schema.org/email" : [ { "@value" : "datascience@marcelhaas.com" } ], "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Marcel R. Haas" } ] }, { "@id" : "https://orcid.org/0009-0003-5030-0108", "@type" : [ "http://xmlns.com/foaf/0.1/Person" ], "http://schema.org/affiliation" : [ { "@id" : "https://ror.org/04b8v1s79" }, { "@id" : "https://ror.org/04dkp9463" } ], "http://schema.org/email" : [ { "@value" : "L.Sibbald@tilburguniversity.edu" } ], "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Lisette Sibbald" } ] }, { "@id" : "https://ror.org/04b8v1s79", "@type" : [ "http://xmlns.com/foaf/0.1/Organization" ], "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Department of Methodology and Statistics and Department of Cognitive Neuropsychology, Tilburg University, Prof. Cobbenhagenlaan 125, 5037 DB Tilburg, The Netherlands" } ] }, { "@id" : "https://ror.org/04dkp9463", "@type" : [ "http://xmlns.com/foaf/0.1/Organization" ], "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Business Intelligence, University of Amsterdam, Spui 21, 1012WX Amsterdam, The Netherlands" } ] }, { "@id" : "https://ror.org/05xvt9f17", "@type" : [ "http://xmlns.com/foaf/0.1/Organization" ], "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Public Health and Primary Care, Leiden University Medical Center, Albinusdreef 2, The Netherlands" } ] } ], "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#assertion" }, { "@graph" : [ { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#assertion", "http://www.w3.org/ns/prov#wasAttributedTo" : [ { "@id" : "https://orcid.org/0000-0003-2581-8370" }, { "@id" : "https://orcid.org/0009-0003-5030-0108" } ] } ], "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#provenance" }, { "@graph" : [ { "@id" : "https://orcid.org/0000-0002-1267-0234", "http://xmlns.com/foaf/0.1/name" : [ { "@value" : "Tobias Kuhn" } ] }, { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E", "http://purl.org/dc/terms/created" : [ { "@type" : "http://www.w3.org/2001/XMLSchema#dateTime", "@value" : "2024-02-29T09:56:20.312Z" } ], "http://purl.org/dc/terms/creator" : [ { "@id" : "https://orcid.org/0000-0002-1267-0234" } ], "http://purl.org/dc/terms/license" : [ { "@id" : "https://creativecommons.org/licenses/by/4.0/" } ], "http://purl.org/nanopub/x/hasNanopubType" : [ { "@id" : "http://purl.org/spar/fabio/ScholarlyWork" }, { "@id" : "https://w3id.org/kpxl/ios/ds/terms/DataScienceNanopub" } ], "http://purl.org/nanopub/x/introduces" : [ { "@id" : "https://doi.org/10.3233/DS-240059" } ], "http://purl.org/nanopub/x/supersedes" : [ { "@id" : "https://w3id.org/np/RAf7jLWJEYn21DT5QrBySMfO6wIHrlDohMW8hNQ51a9Y8" } ], "http://purl.org/nanopub/x/wasCreatedAt" : [ { "@id" : "https://nanodash.petapico.org/" } ], "http://www.w3.org/2000/01/rdf-schema#label" : [ { "@value" : "Article: Measuring Data Drift with the Unstable Population Indicator" } ], "https://w3id.org/np/o/ntemplate/wasCreatedFromProvenanceTemplate" : [ { "@id" : "http://purl.org/np/RAi6zZAwhaJ23Hzg4lIjlPir6Take3ZQp-lS9skfBEwfQ" } ], "https://w3id.org/np/o/ntemplate/wasCreatedFromPubinfoTemplate" : [ { "@id" : "http://purl.org/np/RAA2MfqdBCzmz9yVWjKLXNbyfBNcwsMmOqcNUxkk1maIM" }, { "@id" : "http://purl.org/np/RAh1gm83JiG5M6kDxXhaYT1l49nCzyrckMvTzcPn-iv90" }, { "@id" : "http://purl.org/np/RAjpBMlw3owYhJUBo3DtsuDlXsNAJ8cnGeWAutDVjuAuI" }, { "@id" : "https://w3id.org/np/RA5R_qv3VsZIrDKd8Mr37x3HoKCsKkwN5tJVqgQsKhjTE" } ], "https://w3id.org/np/o/ntemplate/wasCreatedFromTemplate" : [ { "@id" : "https://w3id.org/np/RAhPFxesdOZq-w6Z8VBfc1aV9hfN6c5FnJ7XjR0dAMn_I" } ] }, { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#sig", "http://purl.org/nanopub/x/hasAlgorithm" : [ { "@value" : "RSA" } ], "http://purl.org/nanopub/x/hasPublicKey" : [ { "@value" : "MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQCjDGQCS1S+SRnERDuYDXOugdYUP0efEquHJEEHAbU/uLzBVlga89zqrNPCS7fBE6lArBUWEmT8eLKdMapyqvAzI1J3jUWTMhDJF+XFBkUiuiFfNSc4vJJcmi0yujtnuzXsRIG202jyaP4f5ULoskFwaZOSBZJfiE0dsB3D7DTIAQIDAQAB" } ], "http://purl.org/nanopub/x/hasSignature" : [ { "@value" : "ZbXXfjYdfOy3LJfriJd7gRtxVs2e611Y5bOV2cjaBn1U/k6qczNBah4gj7GGToAbaFOnPr2QdIvPbSeP1IX8cwupKGCacD0dpCoWmd5ovI4XE1A83W4sSC/zsBf2S1rQ0f/hCjb2ltQ6HrCgIAZEFeC389PDrP33UbihUkE6rIc=" } ], "http://purl.org/nanopub/x/hasSignatureTarget" : [ { "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E" } ], "http://purl.org/nanopub/x/signedBy" : [ { "@id" : "https://orcid.org/0000-0002-1267-0234" } ] } ], "@id" : "https://w3id.org/np/RAxlDC7tosZR2dA4SCZIftkxycfEn9SMfUsWM5GCkqC9E#pubinfo" } ]