Skip to content

Commit

Permalink
Improve attribution protection for images (#2072)
Browse files Browse the repository at this point in the history
* Use `DOMParser` and `textContent` to extract image author

* Update image-attribution.service.spec.ts

* Preserve newlines

* Update image-attribution.service.ts

* Update image-attribution.service.ts

* Update image-attribution.service.ts

* Update image-attribution.service.spec.ts

* Improve attribution protection

* Add test

* Make attributnion calls parallel

* Add tab

* Add test specific for tabs

* Use wikimedia commons instead of language specific wikipedia.

* Fix lint

---------
Related to #2066 
Co-authored-by: zstadler <[email protected]>
  • Loading branch information
HarelM authored Nov 9, 2024
1 parent 63508db commit 36c817b
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 35 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,16 @@ export class PublicPoiSidebarComponent implements OnDestroy {
this.showLocationUpdate = true;
}
}
this.initFromFeature(feature);
await this.initFromFeature(feature);
}

private initFromFeature(feature: GeoJSON.Feature) {
private async initFromFeature(feature: GeoJSON.Feature) {
this.fullFeature = feature;
this.latlng = GeoJSONUtils.getLocation(feature);
this.sourceImageUrls = this.getSourceImageUrls(feature);
this.shareLinks = this.poiService.getPoiSocialLinks(feature);
this.contribution = this.poiService.getContribution(feature);
this.info = this.poiService.getEditableDataFromFeature(feature);
this.info = await this.poiService.getEditableDataFromFeature(feature);
const language = this.resources.getCurrentLanguageCodeSimplified();
this.titleService.set(GeoJSONUtils.getTitle(feature, language));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,12 @@ import { HttpTestingController, provideHttpClientTesting } from "@angular/common
import { inject, TestBed } from "@angular/core/testing";

import { ImageAttributionService } from "./image-attribution.service";
import { ResourcesService } from "./resources.service";

describe("ImageAttributionService", () => {

beforeEach(() => {
TestBed.configureTestingModule({
providers: [
{ provide: ResourcesService, useValue: { getCurrentLanguageCodeSimplified: () => "en" } },
ImageAttributionService,
provideHttpClient(withInterceptorsFromDi()),
provideHttpClientTesting()
Expand Down Expand Up @@ -38,7 +36,7 @@ describe("ImageAttributionService", () => {
it("should fetch data from wikipedia when getting wikimedia image", inject([ImageAttributionService, HttpTestingController],
async (service: ImageAttributionService, mockBackend: HttpTestingController) => {
const promise = service.getAttributionForImage("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/IHM_Image.jpeg");
mockBackend.match(r => r.url.startsWith("https://en.wikipedia.org/"))[0].flush({
mockBackend.match(r => r.url.startsWith("https://commons.wikimedia.org/"))[0].flush({
query: {
pages: {
"-1": {
Expand All @@ -58,13 +56,13 @@ describe("ImageAttributionService", () => {

expect(response).not.toBeNull();
expect(response.author).toBe("hello");
expect(response.url).toBe("https://en.wikipedia.org/wiki/File:IHM_Image.jpeg");
expect(response.url).toBe("https://commons.wikimedia.org/wiki/File:IHM_Image.jpeg");
}));

it("should remove html tags and get the value inside", inject([ImageAttributionService, HttpTestingController],
async (service: ImageAttributionService, mockBackend: HttpTestingController) => {
const promise = service.getAttributionForImage("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/IHM_Image.jpeg");
mockBackend.match(r => r.url.startsWith("https://en.wikipedia.org/"))[0].flush({
mockBackend.match(r => r.url.startsWith("https://commons.wikimedia.org/"))[0].flush({
query: {
pages: {
"-1": {
Expand All @@ -84,13 +82,40 @@ describe("ImageAttributionService", () => {

expect(response).not.toBeNull();
expect(response.author).toBe("hello");
expect(response.url).toBe("https://en.wikipedia.org/wiki/File:IHM_Image.jpeg");
expect(response.url).toBe("https://commons.wikimedia.org/wiki/File:IHM_Image.jpeg");
}));

it("should remove html tags, tabs and get the value inside", inject([ImageAttributionService, HttpTestingController],
async (service: ImageAttributionService, mockBackend: HttpTestingController) => {
const promise = service.getAttributionForImage("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/IHM_Image.jpeg");
mockBackend.match(r => r.url.startsWith("https://commons.wikimedia.org/"))[0].flush({
query: {
pages: {
"-1": {
imageinfo: [{
extmetadata: {
Artist: {
value: "<span>\thello\tworld</span>"
}
}
}]
}
}
}
});

const response = await promise;

expect(response).not.toBeNull();
expect(response.author).toBe("hello world");
expect(response.url).toBe("https://commons.wikimedia.org/wiki/File:IHM_Image.jpeg");
}));

// Based on https://upload.wikimedia.org/wikipedia/commons/b/b5/Historical_map_series_for_the_area_of_Al-Manara%2C_Palestine_%281870s%29.jpg
it("should remove html tags and get the value inside for multiple html tags", inject([ImageAttributionService, HttpTestingController],
async (service: ImageAttributionService, mockBackend: HttpTestingController) => {
const promise = service.getAttributionForImage("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/IHM_Image.jpeg");
mockBackend.match(r => r.url.startsWith("https://en.wikipedia.org/"))[0].flush({
mockBackend.match(r => r.url.startsWith("https://commons.wikimedia.org/"))[0].flush({
query: {
pages: {
"-1": {
Expand All @@ -116,14 +141,17 @@ describe("ImageAttributionService", () => {
const response = await promise;

expect(response).not.toBeNull();
expect(response.author).toBe("Sources for historical series of maps as follows:\n");
expect(response.url).toBe("https://en.wikipedia.org/wiki/File:IHM_Image.jpeg");
expect(response.author).toBe("Sources for historical series of maps as follows:\n" +
"PEF Survey of Palestine\n" +
"Survey of PalestineOverlay from Palestine Open Maps\n" +
"OpenStreetMap");
expect(response.url).toBe("https://commons.wikimedia.org/wiki/File:IHM_Image.jpeg");
}));

it("should return null when getting wikimedia image without artist", inject([ImageAttributionService, HttpTestingController],
async (service: ImageAttributionService, mockBackend: HttpTestingController) => {
const promise = service.getAttributionForImage("https://upload.wikimedia.org/wikipedia/commons/thumb/a/a1/IHM_Image.jpeg");
mockBackend.match(r => r.url.startsWith("https://en.wikipedia.org/"))[0].flush({
mockBackend.match(r => r.url.startsWith("https://commons.wikimedia.org/"))[0].flush({
query: {
pages: {
"-1": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ import { HttpClient } from "@angular/common/http";
import { inject, Injectable } from "@angular/core";
import { firstValueFrom, timeout } from "rxjs";

import { ResourcesService } from "./resources.service";

export type ImageAttribution = {
author: string;
url: string;
Expand All @@ -13,9 +11,14 @@ export type ImageAttribution = {
export class ImageAttributionService {
private attributionImageCache = new Map<string, ImageAttribution>();

private readonly resources = inject(ResourcesService);
private readonly httpClient = inject(HttpClient);

private extractPlainText(html: string): string {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
return doc.documentElement.textContent.replace(/([ \t]*\n[ \t]*)+/g, '\n').replace(/[ \t]+/g, ' ').trim();
}

public async getAttributionForImage(imageUrl: string): Promise<ImageAttribution> {
if (imageUrl == null) {
return null;
Expand All @@ -37,21 +40,16 @@ export class ImageAttributionService {
}

const imageName = imageUrl.split("/").pop();
const language = this.resources.getCurrentLanguageCodeSimplified();
const address = `https://${language}.wikipedia.org/w/api.php?action=query&prop=imageinfo&iiprop=extmetadata&format=json&origin=*` +
const address = `https://commons.wikimedia.org/w/api.php?action=query&prop=imageinfo&iiprop=extmetadata&format=json&origin=*` +
`&titles=File:${imageName}`;
try {
const response: any = await firstValueFrom(this.httpClient.get(address).pipe(timeout(3000)));
const extmetadata = response.query.pages[-1].imageinfo[0].extmetadata;
const extmetadata = response.query.pages[Object.keys(response.query.pages)[0]].imageinfo[0].extmetadata;
if (extmetadata?.Artist.value) {
const match = extmetadata.Artist.value.match(/<[^>]*>([^<]*)<\/[^>]*>/);
let author = extmetadata.Artist.value as string;
if (match) {
author = match[1]; // Extract the content between the opening and closing tags
}
const author = this.extractPlainText(extmetadata.Artist.value as string);
const imageAttribution = {
author,
url: `https://${language}.wikipedia.org/wiki/File:${imageName}`
url: `https://commons.wikimedia.org/wiki/File:${imageName}`
};
this.attributionImageCache.set(imageUrl, imageAttribution);
return imageAttribution;
Expand Down
35 changes: 31 additions & 4 deletions IsraelHiking.Web/src/application/services/poi.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { ConnectionService } from "./connection.service";
import { OverpassTurboService } from "./overpass-turbo.service";
import { INatureService } from "./inature.service";
import { WikidataService } from "./wikidata.service";
import { ImageAttributionService } from "./image-attribution.service";
import { GeoJSONUtils } from "./geojson-utils";
import { GeoJsonParser } from "./geojson.parser";
import { Urls } from "../urls";
Expand Down Expand Up @@ -89,6 +90,7 @@ describe("Poi Service", () => {
createFeatureFromPageId: () => Promise.resolve()
} },
{ provide: ConnectionService, useValue: { stateChanged: { subscribe: () => {} }} },
{ provide: ImageAttributionService, useValue: { getAttributionForImage: () => "aaa" } },
GeoJsonParser,
RunningContextService,
WhatsAppService,
Expand Down Expand Up @@ -684,7 +686,7 @@ describe("Poi Service", () => {
);

it("Should allow adding a point from private marker",
inject([PoiService], (poiService: PoiService) => {
inject([PoiService], async (poiService: PoiService) => {
const feature = {
properties: {
poiSource: "OSM",
Expand All @@ -706,7 +708,7 @@ describe("Poi Service", () => {
};

poiService.mergeWithPoi(feature, markerData);
const info = poiService.getEditableDataFromFeature(feature);
const info = await poiService.getEditableDataFromFeature(feature);
const featureAfterConverstion = poiService.getFeatureFromEditableData(info);
GeoJSONUtils.setLocation(featureAfterConverstion, { lat: 2, lng: 1});
expect(GeoJSONUtils.getLocation(featureAfterConverstion).lat).toBe(2);
Expand All @@ -721,7 +723,7 @@ describe("Poi Service", () => {
);

it("Should filter out incompatible images",
inject([PoiService], (poiService: PoiService) => {
inject([PoiService], async (poiService: PoiService) => {
const feature = {
properties: {
poiSource: "OSM",
Expand All @@ -735,12 +737,37 @@ describe("Poi Service", () => {
}
} as GeoJSON.Feature;

const info = poiService.getEditableDataFromFeature(feature);
const info = await poiService.getEditableDataFromFeature(feature);
expect(info.imagesUrls.length).toBe(0);
}
)
);

it("Should filter out images with no attribution",
inject([PoiService, ImageAttributionService], async (poiService: PoiService, attributionService: ImageAttributionService) => {
const feature = {
properties: {
poiSource: "OSM",
poiId: "poiId",
identifier: "id",
image: "wikimedia.org/image-url",
image1: "wikimedia.org/image-url1",
image2: "wikimedia.org/image-url2",
} as any,
geometry: {
type: "Point",
coordinates: [1, 2]
}
} as GeoJSON.Feature;
spyOn(attributionService, "getAttributionForImage").and.returnValues(Promise.resolve(null), Promise.resolve("aaa") as any, Promise.resolve(null));

const info = await poiService.getEditableDataFromFeature(feature);
expect(info.imagesUrls.length).toBe(1);
expect(info.imagesUrls[0]).toBe("wikimedia.org/image-url1");
}
)
);

it("should get closest point from server", (inject([PoiService, HttpTestingController],
async (poiService: PoiService, mockBackend: HttpTestingController) => {

Expand Down
17 changes: 11 additions & 6 deletions IsraelHiking.Web/src/application/services/poi.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { OverpassTurboService } from "./overpass-turbo.service";
import { GeoJSONUtils } from "./geojson-utils";
import { INatureService } from "./inature.service";
import { WikidataService } from "./wikidata.service";
import { ImageAttributionService } from "./image-attribution.service";
import { LatLon, OsmTagsService, PoiProperties } from "./osm-tags.service";
import { AddToPoiQueueAction, RemoveFromPoiQueueAction } from "../reducers/offline.reducer";
import {
Expand Down Expand Up @@ -107,6 +108,7 @@ export class PoiService {
private readonly iNatureService = inject(INatureService);
private readonly wikidataService = inject(WikidataService);
private readonly overpassTurboService = inject(OverpassTurboService);
private readonly imageAttributinoService = inject(ImageAttributionService);
private readonly store = inject(Store);

constructor() {
Expand Down Expand Up @@ -679,7 +681,7 @@ export class PoiService {

public async updateComplexPoi(info: EditablePublicPointData, newLocation?: LatLngAlt) {
const originalFeature = this.store.selectSnapshot((s: ApplicationState) => s.poiState).selectedPointOfInterest;
const editableDataBeforeChanges = this.getEditableDataFromFeature(originalFeature);
const editableDataBeforeChanges = await this.getEditableDataFromFeature(originalFeature);
let hasChages = false;
const originalId = this.getFeatureId(originalFeature);
let featureContainingOnlyChanges = {
Expand Down Expand Up @@ -749,19 +751,22 @@ export class PoiService {
await this.addPointToUploadQueue(featureContainingOnlyChanges);
}

public getEditableDataFromFeature(feature: Immutable<GeoJSON.Feature>): EditablePublicPointData {
public async getEditableDataFromFeature(feature: Immutable<GeoJSON.Feature>): Promise<EditablePublicPointData> {
const language = this.resources.getCurrentLanguageCodeSimplified();
let imagesUrls = Object.keys(feature.properties)
.filter(k => k.startsWith("image"))
.map(k => feature.properties[k])
.filter(u => u.includes("wikimedia.org") || u.includes("inature.info") || u.includes("nakeb.co.il") || u.includes("jeepolog.com"));
const imageAttributions = await Promise.all(imagesUrls.map(u => this.imageAttributinoService.getAttributionForImage(u)));
imagesUrls = imagesUrls.filter((_, i) => imageAttributions[i] != null);
return {
id: this.getFeatureId(feature),
category: feature.properties.poiCategory,
description: GeoJSONUtils.getDescription(feature, language),
title: GeoJSONUtils.getTitle(feature, language),
icon: feature.properties.poiIcon,
iconColor: feature.properties.poiIconColor,
imagesUrls: Object.keys(feature.properties)
.filter(k => k.startsWith("image"))
.map(k => feature.properties[k])
.filter(u => u.includes("wikimedia.org") || u.includes("inature.info") || u.includes("nakeb.co.il") || u.includes("jeepolog.com")),
imagesUrls,
urls: Object.keys(feature.properties).filter(k => k.startsWith("website")).map(k => feature.properties[k]),
isPoint: feature.geometry.type === "Point" || feature.geometry.type === "MultiPoint",
canEditTitle: !feature.properties.poiMerged && !feature.properties["mtb:name"],
Expand Down

0 comments on commit 36c817b

Please sign in to comment.