From 1a16f5fb2a98698354e9716c29ee24cd1a53fe2f Mon Sep 17 00:00:00 2001 From: Kerem Sahin Date: Mon, 2 Dec 2019 12:15:17 -0800 Subject: [PATCH] Remove unused dataset models --- .../DatasetSnapshotRequestBuilderTest.java | 15 -- .../metadata/dao/utils/QueryUtilTest.java | 13 -- .../dataplatform/DataPlatformInfo.pdsc | 8 - .../linkedin/dataset/ComplianceDataType.pdsc | 112 -------------- .../dataset/ComplianceDataTypeInfo.pdsc | 52 ------- .../dataset/ComplianceDataTypeKey.pdsc | 19 --- .../com/linkedin/dataset/ComplianceInfo.pdsc | 49 ------ .../dataset/ComplianceInfoSuggestion.pdsc | 45 ------ .../linkedin/dataset/CompliancePurgeType.pdsc | 24 --- .../linkedin/dataset/DataPrivacyReview.pdsc | 43 ------ .../dataset/DatasetClassification.pdsc | 140 ------------------ .../com/linkedin/dataset/ExportPolicy.pdsc | 58 -------- .../com/linkedin/dataset/FieldCompliance.pdsc | 74 --------- .../com/linkedin/dataset/FieldFormat.pdsc | 26 ---- .../linkedin/dataset/ReplicationConfig.pdsc | 14 -- .../com/linkedin/dataset/RetentionPolicy.pdsc | 25 ---- .../dataset/SecurityClassification.pdsc | 20 --- 17 files changed, 737 deletions(-) delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataType.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeInfo.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeKey.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfo.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfoSuggestion.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/CompliancePurgeType.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/DataPrivacyReview.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetClassification.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ExportPolicy.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/FieldCompliance.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/FieldFormat.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/ReplicationConfig.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/RetentionPolicy.pdsc delete mode 100644 metadata-models/src/main/pegasus/com/linkedin/dataset/SecurityClassification.pdsc diff --git a/metadata-dao-impl/restli-dao/src/test/java/com/linkedin/metadata/dao/DatasetSnapshotRequestBuilderTest.java b/metadata-dao-impl/restli-dao/src/test/java/com/linkedin/metadata/dao/DatasetSnapshotRequestBuilderTest.java index 69df2733eb4768..1898b241fe6351 100644 --- a/metadata-dao-impl/restli-dao/src/test/java/com/linkedin/metadata/dao/DatasetSnapshotRequestBuilderTest.java +++ b/metadata-dao-impl/restli-dao/src/test/java/com/linkedin/metadata/dao/DatasetSnapshotRequestBuilderTest.java @@ -3,7 +3,6 @@ import com.linkedin.common.FabricType; import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.DatasetUrn; -import com.linkedin.dataset.ComplianceInfo; import com.linkedin.dataset.DatasetKey; import com.linkedin.metadata.dao.utils.ModelUtils; import com.linkedin.metadata.snapshot.DatasetSnapshot; @@ -25,18 +24,4 @@ public void testUrnClass() { assertEquals(builder.urnClass(), DatasetUrn.class); } - - @Test - public void testGetRequest() { - DatasetSnapshotRequestBuilder builder = new DatasetSnapshotRequestBuilder(); - String aspectName = ModelUtils.getAspectName(ComplianceInfo.class); - DatasetUrn urn = new DatasetUrn(new DataPlatformUrn("mysql"), "QUEUING.bar", FabricType.EI); - - GetRequest request = builder.getRequest(aspectName, urn, 0); - - Map keyPaths = Collections.singletonMap("key", new ComplexResourceKey<>( - new DatasetKey().setPlatform(new DataPlatformUrn("mysql")).setName("QUEUING.bar").setOrigin(FabricType.EI), - new EmptyRecord())); - validateRequest(request, "datasets/{key}/snapshot", keyPaths, aspectName, 0); - } } diff --git a/metadata-dao/src/test/java/com/linkedin/metadata/dao/utils/QueryUtilTest.java b/metadata-dao/src/test/java/com/linkedin/metadata/dao/utils/QueryUtilTest.java index e6d2e895ea8ed3..838ebe855cbd18 100644 --- a/metadata-dao/src/test/java/com/linkedin/metadata/dao/utils/QueryUtilTest.java +++ b/metadata-dao/src/test/java/com/linkedin/metadata/dao/utils/QueryUtilTest.java @@ -3,7 +3,6 @@ import com.google.common.collect.ImmutableSet; import com.linkedin.common.Ownership; import com.linkedin.data.template.RecordTemplate; -import com.linkedin.dataset.ComplianceInfo; import com.linkedin.metadata.aspect.AspectVersion; import com.linkedin.metadata.query.Condition; import com.linkedin.metadata.query.Criterion; @@ -41,18 +40,6 @@ public void testNewFilter() { assertEquals(filter.getCriteria().size(), 0); } - @Test - public void testLatestAspectVersions() { - Set> aspects = ImmutableSet.of(Ownership.class, ComplianceInfo.class); - - Set aspectVersions = QueryUtils.latestAspectVersions(aspects); - - assertEquals(aspectVersions.size(), 2); - - assertTrue(hasAspectVersion(aspectVersions, Ownership.class.getCanonicalName(), 0)); - assertTrue(hasAspectVersion(aspectVersions, ComplianceInfo.class.getCanonicalName(), 0)); - } - private boolean hasAspectVersion(Set aspectVersions, String aspectName, long version) { return aspectVersions.stream() .filter(av -> av.getAspect().equals(aspectName) && av.getVersion().equals(version)) diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataplatform/DataPlatformInfo.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataplatform/DataPlatformInfo.pdsc index a674afeb666dbd..3e0a9c4a442666 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/dataplatform/DataPlatformInfo.pdsc +++ b/metadata-models/src/main/pegasus/com/linkedin/dataplatform/DataPlatformInfo.pdsc @@ -45,14 +45,6 @@ } } }, - { - "name": "supportedPurgePolicies", - "type": { - "type": "array", - "items": "com.linkedin.dataset.CompliancePurgeType" - }, - "doc": "The purge policies supported by this platform" - }, { "name": "datasetNameDelimiter", "type": "string", diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataType.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataType.pdsc deleted file mode 100644 index 89323e061a4caf..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataType.pdsc +++ /dev/null @@ -1,112 +0,0 @@ -{ - "type": "enum", - "name": "ComplianceDataType", - "namespace": "com.linkedin.dataset", - "doc": "The business / semantic meaning or data type of data fields. http://go/gdpr-taxonomy", - "symbols" : [ - "ADDRESS", - "ADVERTISER_ID", - "ARTICLE_ID", - "AUTHENTICATION_TOKEN", - "CITY_STATE_REGION", - "COMPANY_ID", - "CONTENT_TOPIC_ID", - "CONTRACT_ID", - "COOKIE_BEACON_BROWSER_ID", - "CUSTOM_ID", - "DATE_OF_BIRTH", - "DEVICE_ID_ADVERTISING_ID", - "ELEVATE_CONTRACT_ID", - "ELEVATE_SEAT_ID", - "EMAIL", - "ENTERPRISE_ACCOUNT_ID", - "ENTERPRISE_PROFILE_ID", - "EVENT_TIME", - "FINANCIAL_NUMBER", - "FREEFORMED_UGC", - "GROUP_ID", - "HANDLES", - "HIRE_STREAM_ID", - "INGESTED_CONTENT_ID", - "INTEREST_ID", - "IP_ADDRESS", - "JOB_ID", - "LATITUDE_LONGITUDE", - "LOGS_PII", - "LYNDA_MASTER_ADMIN_ID", - "LYNDA_USER_ID", - "MEMBER_ID", - "MEMBER_PHOTO", - "MESSAGE", - "MIXED_ID", - "NAME", - "NATIONAL_ID", - "NONE", - "OTHER_PII", - "PASSWORD_CREDENTIAL", - "PAYMENT_INFO", - "PHONE", - "PROFILE_URL", - "SALARY", - "SEAT_ID", - "SLIDESHARE_USER_ID", - "SOCIAL_NETWORK_ID", - "TRANSACTION_TIME", - "UGC_ID", - "UNSTRUCTURED_PII", - "ZUORA_ACCOUNT_ID" - ], - "symbolDocs" : { - "ADDRESS": "Personal physical address", - "ADVERTISER_ID": "ID for an LMS advertiser", - "ARTICLE_ID": "ID for a shared URL (deprecated version of INGESTED_CONTENT_ID)", - "AUTHENTICATION_TOKEN": "Authentication token, including third party tokens", - "CITY_STATE_REGION": "City, State, Region, etc", - "COMPANY_ID": "ID for companies or organizations that created content at LinkedIn", - "CONTENT_TOPIC_ID": "ID for a content topic. See go/contenttopic for more details", - "CONTRACT_ID": "ID for a contract, a grouping of enterprise users for an LTS product", - "COOKIE_BEACON_BROWSER_ID": "Cookies, beacons, browser ID", - "CUSTOM_ID": "[Deprecated]: Use CUSTOM FieldFormat instead", - "DATE_OF_BIRTH": "Date of birth of a person", - "DEVICE_ID_ADVERTISING_ID": "Device ID, Advertising ID", - "ELEVATE_CONTRACT_ID": "ID for a contract within Elevate (LEAP). Corresponds to a LeapContractUrn.", - "ELEVATE_SEAT_ID": "ID for a user on a contract within Elevate (LEAP). Corresponds to a LeapSeatV2Urn", - "EMAIL": "Personal email address", - "ENTERPRISE_ACCOUNT_ID": "ID for enterprise account", - "ENTERPRISE_PROFILE_ID": "ID for enterprise profile", - "EVENT_TIME": "Time of an event, e.g. header.time", - "FINANCIAL_NUMBER": "Financial number: order amount, payment amount, etc", - "FREEFORMED_UGC": "To capture free-formed user generated content. See go/Metadata/UGC", - "GROUP_ID": "ID for LinkedIn groups", - "HANDLES": "ID that is standard protocol-based, verifiable, globally unique, and allow communication with members. See https://iwww.corp.linkedin.com/wiki/cf/pages/viewpage.action?pageId=102778068 for more details", - "HIRE_STREAM_ID": "ID for a HireStream in AutomatedSourcing. Note: this is a legacy ID which is now derived from a SourcingChannelUrn. It is used to ensure that legacy data is compliant.", - "INGESTED_CONTENT_ID": "ID for a shared URL. See go/contentingestion for more details", - "INTEREST_ID": "ID for an interest. See go/interesttagging for more details", - "IP_ADDRESS": "IPv4 or IPv6 address", - "JOB_ID": "ID for a job posting", - "LATITUDE_LONGITUDE": "Latitude and Longitude", - "LOGS_PII": "Logs that can potentially contain PII", - "LYNDA_MASTER_ADMIN_ID": "Lynda User ID of the Lynda Enterprise Account Master Admin", - "LYNDA_USER_ID": "User ID of Lynda.com user", - "MEMBER_ID": "ID for LinkedIn members", - "MEMBER_PHOTO": "Member's photo", - "MESSAGE": "Private message content", - "MIXED_ID": "[Deprecated] Specify all IDs explicitly", - "NAME": "Name: first name, last name, full name", - "NATIONAL_ID": "National ID number, SSN, driver license", - "NONE": "None of the other types apply", - "OTHER_PII": "[Deprecated] Use LOGS_PII for logs containing PII and UNSTRUCTURED_PII for other unstructured data", - "PASSWORD_CREDENTIAL": "Password and credentials", - "PAYMENT_INFO": "Payment info: credit card, bank account", - "PHONE": "Phone numbers, phone number URN", - "PROFILE_URL": "Member Profile url", - "SALARY": "Salary data", - "SEAT_ID": "ID for a user of an LTS enterprise product", - "SLIDESHARE_USER_ID": "ID for a slideshare user", - "SOCIAL_NETWORK_ID": "Social network ID: facebook ID, WeChat ID", - "TRANSACTION_TIME": "Transaction time, e.g. CREATED_DATE, RECONCILED_DATE, ORDER_DATE", - "UGC_ID": "ID for an ugc post. UGC stands for User Generated Content aka sharing on LinkedIn, publishing articles, image/video shares. See go/ugcbackend for more details", - "UNSTRUCTURED_PII": "Unstructured data, e.g. serialized blob, that can contain PII", - "ZUORA_ACCOUNT_ID": "Account ID in Zuora for Lynda.com users" - } -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeInfo.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeInfo.pdsc deleted file mode 100644 index 1c95bb9ded9c46..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeInfo.pdsc +++ /dev/null @@ -1,52 +0,0 @@ -{ - "type": "record", - "name": "ComplianceDataTypeInfo", - "namespace": "com.linkedin.dataset", - "doc": "Additional information about a specific compliance data type, such as whether it's Personally Identifiable Information or not.", - "fields": [ - { - "name": "id", - "type": "string", - "doc": "ID of the compliance data type." - }, - { - "name": "title", - "type": "string", - "doc": "A UI-friendly short description for the data type." - }, - { - "name": "description", - "type": "string", - "doc": "Full description for the data type." - }, - { - "name": "idType", - "type": "boolean", - "doc": "Whether this data type is a first-party ID, such as Member ID." - }, - { - "name": "pii", - "type": "boolean", - "doc": "Whether this data type is considered Personally Identifiable Information." - }, - { - "name": "supportedFieldFormats", - "doc": "A list of supported FieldFormat for this data type", - "type": { - "type": "array", - "items": "FieldFormat" - } - }, - { - "name": "defaultSecurityClassification", - "type": "SecurityClassification", - "doc": "The default security classification for this data type." - }, - { - "name": "defaultUrnClass", - "type": "string", - "doc": "The Fully Qualified Class Name (FQCN) of the default URN for this data type, e.g. com.linkedin.common.urn.MemberUrn", - "optional": true - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeKey.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeKey.pdsc deleted file mode 100644 index 9eb53574dc00f9..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceDataTypeKey.pdsc +++ /dev/null @@ -1,19 +0,0 @@ -{ - "name": "ComplianceDataTypeKey", - "type": "record", - "namespace": "com.linkedin.dataset", - "doc": "Key for compliance data type resource", - "fields": [ - { - "name": "dataType", - "type": "string", - "doc": "Compliance data type such as MEMBER_ID", - "validate": { - "strlen": { - "min": 1, - "max": 30 - } - } - } - ] -} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfo.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfo.pdsc deleted file mode 100644 index a104c01ef2008c..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfo.pdsc +++ /dev/null @@ -1,49 +0,0 @@ -{ - "type": "record", - "name": "ComplianceInfo", - "namespace": "com.linkedin.dataset", - "doc": "Compliance information that describes associated versioned dataset. Metadata will contain flags to indicate if it contains any (PII) personally identifiable information.", - "fields": [ - { - "name": "originUrns", - "type": { - "type": "array", - "items": "com.linkedin.common.DatasetUrn" - }, - "doc": "To present a list of origin (upstream) datasets for resolved compliance relationship like VIEW lineage type. i.e. multiple source datasets go to 1 or multiple dataset(s)", - "optional": true - }, - { - "name": "complianceFields", - "doc": "A complete list of dataset schema fields and their corresponding compliance information", - "type": { - "type": "array", - "items": "FieldCompliance" - } - }, - { - "name": "datasetClassification", - "type": "DatasetClassification", - "doc": "Dataset level description of whether the dataset contains certain types of information", - "optional": true - }, - { - "name": "datasetConfidentiality", - "type": "SecurityClassification", - "doc": "Overall dataset confidentiality or security classification. Classification is derived from the list of sensitive fields spec.", - "optional": true - }, - { - "name": "containingPersonalData", - "type": "boolean", - "doc": "Whether this dataset contains personal data. This can be explicitly set for schemaless system or derived from complianceFields.", - "optional": true - }, - { - "name": "lastModified", - "type": "com.linkedin.common.AuditStamp", - "doc": "Audit stamp containing who last modified the record and when.", - "optional": true - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfoSuggestion.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfoSuggestion.pdsc deleted file mode 100644 index dc5287432a60fb..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ComplianceInfoSuggestion.pdsc +++ /dev/null @@ -1,45 +0,0 @@ -{ - "type": "record", - "name": "ComplianceInfoSuggestion", - "namespace": "com.linkedin.dataset", - "doc": "Suggested Compliance information generated by auto classification process that describes associated versioned dataset. Metadata will contain flags to indicate if it contains any (PII) personally identifiable information.", - "fields": [ - { - "name": "suggestedContainingPersonalData", - "doc": "Suggestion for whether this dataset contains personal data. This is derived from suggestedFieldClassifications.", - "type": "boolean", - "optional": true, - "default": false - }, - { - "name": "suggestedFieldClassifications", - "type": { - "type": "array", - "items": { - "type": "record", - "name": "SuggestedFieldClassification", - "namespace": "com.linkedin.dataset", - "fields": [ - { - "name": "suggestion", - "type": "FieldCompliance", - "doc": "Suggestion for the field level compliance metadata." - }, - { - "name": "confidenceLevel", - "type": "float", - "doc": "The confidence level for this suggestion. The range is [0.0, 1.0]" - }, - { - "name": "uid", - "type": "string", - "doc": "Unique identifier of this suggestion", - "optional": true - } - ] - } - }, - "doc": "A list of suggested field-level compliance metadata." - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/CompliancePurgeType.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/CompliancePurgeType.pdsc deleted file mode 100644 index af16d8e072474d..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/CompliancePurgeType.pdsc +++ /dev/null @@ -1,24 +0,0 @@ -{ - "type": "enum", - "name": "CompliancePurgeType", - "namespace": "com.linkedin.dataset", - "symbols": [ - "AUTO_PURGE", - "MANUAL_PURGE", - "LIMITED_RETENTION", - "LIMITED_RETENTION_WITH_LOCKING", - "MANUAL_LIMITED_RETENTION", - "PURGE_NOT_APPLICABLE", - "PURGE_EXEMPTED" - ], - "symbolDocs": { - "AUTO_PURGE": "Choose this option only if it’s acceptable to have the data platform's default mechanism to purge this dataset based on the provided metadata (e.g. member ID, seat ID etc).", - "MANUAL_PURGE": "Choose this option only if you have implemented a custom mechanism to purge this dataset.", - "LIMITED_RETENTION": "Choose this option only if you want to rely on the data platform’s default mechanism to delete the dataset immediately when it is older than the predefined limit.", - "LIMITED_RETENTION_WITH_LOCKING": "Choose this option only if you want to rely on the data platform’s default mechanism to lockdown, prior to removal, the dataset when it is older than the predefined limit.", - "MANUAL_LIMITED_RETENTION": "Choose this option only if you have a well established process to ensure limited data retention.", - "PURGE_NOT_APPLICABLE": "Choose this option only if the dataset does not contain any PII and therefore doesn't need purging.", - "PURGE_EXEMPTED": "Choose this option only if the dataset has obtained explicit purge exemption from HSEC & Legal." - }, - "doc": "Purge mechanism types" -} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/DataPrivacyReview.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/DataPrivacyReview.pdsc deleted file mode 100644 index 6e2b531bdc6bb8..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/DataPrivacyReview.pdsc +++ /dev/null @@ -1,43 +0,0 @@ -{ - "type": "record", - "name": "DataPrivacyReview", - "namespace": "com.linkedin.dataset", - "doc": "Captures the current data privacy review status for a dataset. See go/faroprime/states for more details.", - "fields": [ - { - "name": "lastModified", - "type": "com.linkedin.common.AuditStamp", - "doc": "Audit stamp containing who last changed the review status and when." - }, - { - "name": "status", - "type": { - "type": "enum", - "name": "DataPrivacyReviewStatus", - "symbols": - [ - "PENDING_REVIEW", - "WHITELISTED", - "VIOLATION_SUSPECTED", - "BLACKLISTED", - "WHITELISTED_WITH_VIOLATION_SUPPRESSED" - ], - "symbolDocs": - { - "PENDING_REVIEW": "A review is pending for this dataset", - "WHITELISTED": "The dataset has been reviewed and whitelisted, i.e. not containing PII or HC data", - "VIOLATION_SUSPECTED": "The auto classifier has flagged the dataset as may contain PII or HC data", - "BLACKLISTED": "The dataset has been reviewed and blacklisted, i.e. containing PII or HC data", - "WHITELISTED_WITH_VIOLATION_SUPPRESSED": "The dataset has been whitelisted after suppressing classifier's detection" - } - }, - "doc": "The current review status for this dataset." - }, - { - "name": "notes", - "type": "string", - "doc": "Additional notes for the review", - "optional": true - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetClassification.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetClassification.pdsc deleted file mode 100644 index 761de3b96afb6b..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/DatasetClassification.pdsc +++ /dev/null @@ -1,140 +0,0 @@ -{ - "type": "record", - "name": "DatasetClassification", - "namespace": "com.linkedin.dataset", - "doc": "Indication of whether the dataset contains certain category of information, each question needs to be answered by True or False", - "fields": [ - { - "name": "connectionsOrFollowersOrFollowing", - "type": "boolean", - "optional": true, - "doc": "Dataset contains information about member connections, followers or following" - }, - { - "name": "profile", - "type": "boolean", - "optional": true, - "doc": "Dataset contains member profile data" - }, - { - "name": "messaging", - "type": "boolean", - "optional": true, - "doc": "Dataset contains messaging data, including both the message content and metadata (sender, receiver, time, etc)" - }, - { - "name": "thirdPartyIntegrationsInUse", - "type": "boolean", - "optional": true, - "doc": "Dataset contains third party integration usage data" - }, - { - "name": "activity", - "type": "boolean", - "optional": true, - "doc": "Dataset contains member activity information that is viewable to other members (newsfeed posts, blog posts, shares, likes, etc)" - }, - { - "name": "settings", - "type": "boolean", - "optional": true, - "doc": "Dataset contains member personal settings" - }, - { - "name": "jobApplicationFlow", - "type": "boolean", - "optional": true, - "doc": "Dataset contains job application flow information: job applications, resumes, application status, etc" - }, - { - "name": "enterpriseProduct", - "type": "boolean", - "optional": true, - "doc": "Dataset contains enterprise product data: recruiter search data, sales navigator data, etc" - }, - { - "name": "accountStatus", - "type": "boolean", - "optional": true, - "doc": "Dataset contains member account status: premium, sales navigator user, etc" - }, - { - "name": "addressBookImports", - "type": "boolean", - "optional": true, - "doc": "Dataset contains address book import data or data derived from imported address book data" - }, - { - "name": "microsoftData", - "type": "boolean", - "optional": true, - "doc": "Dataset contains data coming over from Microsoft" - }, - { - "name": "subsidiaryData", - "type": "boolean", - "optional": true, - "doc": "Dataset contains data from companies LinkedIn acquired (Lynda, Slideshare, etc)" - }, - { - "name": "otherThirdPartyIntegrations", - "type": "boolean", - "optional": true, - "doc": "Dataset contains other third party integrations: cell phone uploads, etc" - }, - { - "name": "device", - "type": "boolean", - "optional": true, - "doc": "Dataset contains member device information: IP address, device ID, browser ID, etc" - }, - { - "name": "searchHistory", - "type": "boolean", - "optional": true, - "doc": "Dataset contains search history on LinkedIn platforms" - }, - { - "name": "courseViewingHistory", - "type": "boolean", - "optional": true, - "doc": "Dataset contains course viewing history on LinkedIn Learning" - }, - { - "name": "whoViewedMyProfile", - "type": "boolean", - "optional": true, - "doc": "Dataset contains data that is ingested/created/inferred/derived in relation to providing the WVMP feature" - }, - { - "name": "profileViewsByMe", - "type": "boolean", - "optional": true, - "doc": "Dataset contains data regarding which profiles a member viewed (member IDs for the other profiles, referring URLs, etc)" - }, - { - "name": "advertising", - "type": "boolean", - "optional": true, - "doc": "Dataset contains advertising data: data from advertising partners, ad clicks/views, etc" - }, - { - "name": "usageOrErrorOrConnectivity", - "type": "boolean", - "optional": true, - "doc": "Dataset contains member usage, error reporting or connectivity data" - }, - { - "name": "otherClickstreamOrBrowsingData", - "type": "boolean", - "optional": true, - "doc": "Dataset contains other click-related data that doesn't fit into a bucket above." - }, - { - "name": "employeeData", - "type": "boolean", - "optional": true, - "doc": "Dataset contains only employee data." - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ExportPolicy.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ExportPolicy.pdsc deleted file mode 100644 index fde47d9f745ba3..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ExportPolicy.pdsc +++ /dev/null @@ -1,58 +0,0 @@ -{ - "type": "record", - "name": "ExportPolicy", - "namespace": "com.linkedin.dataset", - "doc": "Export policy of a dataset. For more info refer to Data-export metadata annotations at go/dma", - "fields": [ - { - "name": "containsUserGeneratedContent", - "type": "boolean", - "doc": "Indicates that the dataset contains data directly created by the member" - }, - { - "name": "containsUserActionGeneratedContent", - "type": "boolean", - "doc": "Indicates that the dataset contains data created as a result of direct member action on the site" - }, - { - "name": "containsUserDerivedContent", - "type": "boolean", - "doc": "Indicates that the dataset contains data owned by the member but not directly created by the member or due to member actions on the site" - }, - { - "name": "dataset", - "type": "com.linkedin.common.DatasetUrn", - "doc": "[Deprecated] Do NOT use. Dataset this export policy is associated with.", - "optional": true, - "deprecated": true - }, - { - "name": "version", - "type": "long", - "doc": "[Deprecated] Do NOT use. The version of the export policy.", - "optional": true, - "default": 0, - "deprecated": true - }, - { - "name": "created", - "type": "com.linkedin.common.AuditStamp", - "doc": "[Deprecated] Do NOT use.", - "optional": true, - "deprecated": true - }, - { - "name": "lastModified", - "type": "com.linkedin.common.AuditStamp", - "doc": "Audit stamp containing who last modified the record and when.", - "optional": true - }, - { - "name": "deleted", - "type": "com.linkedin.common.AuditStamp", - "doc": "[Deprecated] Do NOT use.", - "optional": true, - "deprecated": true - } - ] -} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/FieldCompliance.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/FieldCompliance.pdsc deleted file mode 100644 index 90622389eb240b..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/FieldCompliance.pdsc +++ /dev/null @@ -1,74 +0,0 @@ -{ - "type": "record", - "name": "FieldCompliance", - "namespace": "com.linkedin.dataset", - "doc": "Description of a field that has possible security limitations.", - "fields": [ - { - "name": "fieldPath", - "type": "SchemaFieldPath", - "doc": "Schema field path to identify a specific dataset field. This should follow the TMS path spec. Refer http://go/tms/schema" - }, - { - "name": "pegasusFieldPath", - "type": "SchemaFieldPath", - "doc": "Pegasus field path spec to identify a specific dataset field. Refer http://go/pegasus-path. If the value of this field is empty string, it should be read as null. This would have occurred due to the clients producing the compliance would have only emitted tms path(fieldPath).", - "default": "" - }, - { - "name": "dataType", - "type": "ComplianceDataType", - "doc": "The The business / semantic meaning or data type of data fields" - }, - { - "name": "fieldFormat", - "optional": true, - "doc": "Specify the optional field format (go/gdpr-taxonomy). When data classification type is of ID type (MemberId, GroupId, CompanyId, CustomId, MixedId), the field format needs to be specified, otherwise it's optional", - "type": "FieldFormat" - }, - { - "name": "valuePattern", - "optional": true, - "doc": "Optional pattern for the value. Required for CUSTOM fieldFormat", - "type": "string" - }, - { - "name": "nonOwner", - "optional": true, - "doc": "[Deprecated] use purgeKey instead. Set this field to true if the field doesn't owns the entire row/record", - "type": "boolean", - "default": false - }, - { - "name": "purgeKey", - "type": "boolean", - "doc": "Set this to true if the field is the purge key of the entire row/record, and false otherwise. Applicable if compliance data type is of ID type.", - "optional": true - }, - { - "name": "securityClassification", - "type": "SecurityClassification", - "doc": "Security classification that governs handling of this field (go/dht). Optional when dataTypes is NONE.", - "optional": true - }, - { - "name": "providedByUser", - "type": "boolean", - "doc": "Whether this field contains data directly provided by LinkedIn users.", - "optional": true - }, - { - "name": "containingPersonalData", - "type": "boolean", - "doc": "Whether this field contains personal data. This is always derived from dataType and should never be set explicitly.", - "optional": true - }, - { - "name": "readonly", - "type": "boolean", - "doc": "Whether this field is system generated and thus readonly. This should never be set explicitly.", - "optional": true, - "default": false - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/FieldFormat.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/FieldFormat.pdsc deleted file mode 100644 index 27eed5b288b6e8..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/FieldFormat.pdsc +++ /dev/null @@ -1,26 +0,0 @@ -{ - "type": "enum", - "name": "FieldFormat", - "namespace": "com.linkedin.dataset", - "symbols": [ - "NUMERIC", - "URN", - "REVERSED_URN", - "COMPOSITE_URN", - "RAW", - "CUSTOM", - "ENCODED", - "HASHED" - ], - "symbolDocs": { - "NUMERIC": "Numerical format, e.g. 12345, that can either be stored a fixed point number or a string", - "URN": "URN format, urn:li:member:12345", - "REVERSED_URN": "Reversed URN format, 12345:member:li:urn", - "COMPOSITE_URN": "[Deprecated] Use CUSTOM format + pattern instead", - "RAW": "Any unencoded string-based field that is neither numeric nor a URN, e.g. alphanumeric strings, GUID etc.", - "CUSTOM": "Any other non-standard format. A pattern for the value is expected to be provided", - "ENCODED": "Data is stored in reversible encoded/serialized/encrypted format", - "HASHED": "Data is stored in irreversible hashed format" - }, - "doc": "The field format" -} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/ReplicationConfig.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/ReplicationConfig.pdsc deleted file mode 100644 index 9eb3812b2ac84a..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/ReplicationConfig.pdsc +++ /dev/null @@ -1,14 +0,0 @@ -{ - "type": "record", - "name": "ReplicationConfig", - "namespace": "com.linkedin.dataset", - "doc": "Configurations for dataset replication", - "fields": [ - { - "name": "gobblinConfig", - "type": "string", - "doc": "The raw gobblin HOCON configuration as a string", - "optional": true - } - ] -} diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/RetentionPolicy.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/RetentionPolicy.pdsc deleted file mode 100644 index a9a448e33e494d..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/RetentionPolicy.pdsc +++ /dev/null @@ -1,25 +0,0 @@ -{ - "type": "record", - "name": "RetentionPolicy", - "namespace": "com.linkedin.dataset", - "doc": "Retention policy that describes the associated dataset. The policy is versioned and immutable upon creation", - "fields": [ - { - "name": "purgeType", - "doc": "Purge mechanism type specifies how dataset complies with existing legal requirements for data retention and clean-up policies.", - "type": "CompliancePurgeType" - }, - { - "name": "purgeNote", - "type": "string", - "doc": "The additional information about purging if the purge type is PURGE_EXEMPTED", - "optional": true - }, - { - "name": "lastModified", - "type": "com.linkedin.common.AuditStamp", - "doc": "Audit stamp containing who last modified the record and when.", - "optional": true - } - ] -} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataset/SecurityClassification.pdsc b/metadata-models/src/main/pegasus/com/linkedin/dataset/SecurityClassification.pdsc deleted file mode 100644 index e03e902f994a10..00000000000000 --- a/metadata-models/src/main/pegasus/com/linkedin/dataset/SecurityClassification.pdsc +++ /dev/null @@ -1,20 +0,0 @@ -{ - "type": "enum", - "name": "SecurityClassification", - "namespace": "com.linkedin.dataset", - "doc": "Data handling classification. For more details on classification refer to the doc: https://iwww.corp.linkedin.com/wiki/cf/display/HR/Exhibit+A+-+Data+Handling+Table (go/dht)", - "symbols": [ - "HIGHLY_CONFIDENTIAL", - "CONFIDENTIAL", - "LIMITED_DISTRIBUTION", - "GENERAL", - "PUBLIC" - ], - "symbolDocs": { - "HIGHLY_CONFIDENTIAL": "Highly sensitive or regulated data that is intended only for a limited audience within LinkedIn or whose release would likely have a material adverse financial or reputational effect on LinkedIn, LinkedIn workforces, or LinkedIn members and or consolidated subsidiaries. All data in this category will be restricted to a limited group with authorized need to know access.", - "CONFIDENTIAL": "Data that is intended only for a limited audience within LinkedIn or whose release would likely have an adverse financial or reputational effect on LinkedIn, LinkedIn workforces, or LinkedIn members and or consolidated subsidiaries. All LinkedIn members and workforce non-public personal data non-public financial data, and intellectual property shall have a minimum classification of confidential. Data in this category must be limited to only those with a business need for access.", - "LIMITED_DISTRIBUTION": "Data that the user has created and shared with a limited group of users or everyone, and is available in a shared interface. Limited distribution data handling restrictions must reflect LinkedIn Member privacy preferences.", - "GENERAL": "Data that is intended for distribution to anyone within LinkedIn, but should not be disclosed outside of the company. This classification is for data that is typically accessible to anyone working at LinkedIn to view and have access to, regardless of their job or business function.", - "PUBLIC": "Data that is readily available or disclosed to all parties. Public data does not need to be classified; however, copyright or trademark protections may apply." - } -}