Skip to content

Commit f79a63c

Browse files
authored
Merge pull request #1232 from qtomlinson/qt/optimize_recompute
Optimize recomputing definition
2 parents f8e77ad + c541259 commit f79a63c

File tree

23 files changed

+629
-142
lines changed

23 files changed

+629
-142
lines changed

business/definitionService.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ class DefinitionService {
286286
*/
287287
async compute(coordinates, curationSpec) {
288288
this.logger.debug('4:compute:blob:start', { ts: new Date().toISOString(), coordinates: coordinates.toString() })
289-
const raw = await this.harvestStore.getAll(coordinates)
289+
const raw = await this.harvestStore.getAllLatest(coordinates)
290290
this.logger.debug('4:compute:blob:end', { ts: new Date().toISOString(), coordinates: coordinates.toString() })
291291
coordinates = this._getCasedCoordinates(raw, coordinates)
292292
this.logger.debug('5:compute:summarize:start', {

lib/utils.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ function getLatestVersion(versions) {
8888
const normalizedCurrent = _normalizeVersion(current)
8989
if (!normalizedCurrent || semver.prerelease(normalizedCurrent) !== null) return max
9090
const normalizedMax = _normalizeVersion(max)
91+
if (!normalizedMax) return normalizedCurrent
9192
return semver.gt(normalizedCurrent, normalizedMax) ? current : max
9293
}, versions[0])
9394
}

providers/stores/abstractAzblobStore.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class AbstractAzBlobStore {
1111
constructor(options) {
1212
this.options = options
1313
this.containerName = options.containerName
14-
this.logger = logger()
14+
this.logger = this.options.logger || logger()
1515
}
1616

1717
async initialize() {

providers/stores/abstractFileStore.js

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,13 @@ const recursive = require('recursive-readdir')
77
const { promisify } = require('util')
88
const ResultCoordinates = require('../../lib/resultCoordinates')
99
const schema = require('../../schemas/definition-1.0')
10+
const { getLatestVersion } = require('../../lib/utils')
11+
const logger = require('../logging/logger')
1012

1113
class AbstractFileStore {
1214
constructor(options) {
13-
this.options = options
15+
this.options = options || {}
16+
this.logger = this.options.logger || logger()
1417
}
1518

1619
async initialize() {}
@@ -146,6 +149,25 @@ class AbstractFileStore {
146149
.join('/')
147150
.toLowerCase()
148151
}
152+
153+
static getLatestToolPaths(paths, toResultCoordinates = path => this.toResultCoordinatesFromStoragePath(path)) {
154+
const entries = paths
155+
.map(path => {
156+
const { tool, toolVersion } = toResultCoordinates(path)
157+
return { tool, toolVersion, path }
158+
})
159+
.reduce((latest, { tool, toolVersion, path }) => {
160+
if (!tool || !toolVersion) return latest
161+
latest[tool] = latest[tool] || {}
162+
//if the version is greater than the current version, replace it
163+
if (!latest[tool].toolVersion || getLatestVersion([toolVersion, latest[tool].toolVersion]) === toolVersion) {
164+
latest[tool] = { toolVersion, path }
165+
}
166+
return latest
167+
}, {})
168+
const latestPaths = Object.values(entries).map(entry => entry.path)
169+
return new Set(latestPaths)
170+
}
149171
}
150172

151173
module.exports = AbstractFileStore

providers/stores/azblobHarvestStore.js

Lines changed: 58 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -46,34 +46,40 @@ class AzHarvestBlobStore extends AbstractAzBlobStore {
4646
* @param {EntityCoordinates} coordinates - The component revision to report on
4747
* @returns An object with a property for each tool and tool version
4848
*/
49-
getAll(coordinates) {
50-
const name = this._toStoragePathFromCoordinates(coordinates)
49+
async getAll(coordinates) {
5150
// Note that here we are assuming the number of blobs will be small-ish (<10) and
5251
// a) all fit in memory reasonably, and
5352
// b) fit in one list call (i.e., <5000)
54-
const list = new Promise((resolve, reject) =>
53+
const allFilesList = await this._getListOfAllFiles(coordinates)
54+
return await this._getContent(allFilesList)
55+
}
56+
57+
_getListOfAllFiles(coordinates) {
58+
const name = this._toStoragePathFromCoordinates(coordinates)
59+
return new Promise((resolve, reject) =>
5560
this.blobService.listBlobsSegmentedWithPrefix(this.containerName, name, null, resultOrError(resolve, reject))
61+
).then(files =>
62+
files.entries.filter(file => {
63+
return (
64+
file.name.length === name.length || // either an exact match, or
65+
(file.name.length > name.length && // a longer string
66+
(file.name[name.length] === '/' || // where the next character starts extra tool indications
67+
file.name.substr(name.length) === '.json'))
68+
)
69+
})
70+
)
71+
}
72+
73+
_getContent(files) {
74+
const contents = Promise.all(
75+
files.map(file => {
76+
return new Promise((resolve, reject) =>
77+
this.blobService.getBlobToText(this.containerName, file.name, resultOrError(resolve, reject))
78+
).then(result => {
79+
return { name: file.name, content: JSON.parse(result) }
80+
})
81+
})
5682
)
57-
const contents = list.then(files => {
58-
return Promise.all(
59-
files.entries
60-
.filter(file => {
61-
return (
62-
file.name.length === name.length || // either an exact match, or
63-
(file.name.length > name.length && // a longer string
64-
(file.name[name.length] === '/' || // where the next character starts extra tool indications
65-
file.name.substr(name.length) === '.json')) // or is the end, identifying a json file extension
66-
)
67-
})
68-
.map(file => {
69-
return new Promise((resolve, reject) =>
70-
this.blobService.getBlobToText(this.containerName, file.name, resultOrError(resolve, reject))
71-
).then(result => {
72-
return { name: file.name, content: JSON.parse(result) }
73-
})
74-
})
75-
)
76-
})
7783
return contents.then(entries => {
7884
return entries.reduce((result, entry) => {
7985
const { tool, toolVersion } = this._toResultCoordinatesFromStoragePath(entry.name)
@@ -85,6 +91,35 @@ class AzHarvestBlobStore extends AbstractAzBlobStore {
8591
}, {})
8692
})
8793
}
94+
95+
/**
96+
* Get the latest version of each tool output for the given coordinates. The coordinates must be all the way down
97+
* to a revision.
98+
* @param {EntityCoordinates} coordinates - The component revision to report on
99+
* @returns {Promise} A promise that resolves to an object with a property for each tool and tool version
100+
*
101+
*/
102+
async getAllLatest(coordinates) {
103+
const allFilesList = await this._getListOfAllFiles(coordinates)
104+
const latestFilesList = this._getListOfLatestFiles(allFilesList)
105+
return await this._getContent(latestFilesList)
106+
}
107+
108+
_getListOfLatestFiles(allFiles) {
109+
let latestFiles = []
110+
const names = allFiles.map(file => file.name)
111+
try {
112+
const latest = this._getLatestToolPaths(names)
113+
latestFiles = allFiles.filter(file => latest.has(file.name))
114+
} catch (error) {
115+
this.logger.error('Error getting latest files', error)
116+
}
117+
return latestFiles.length === 0 ? allFiles : latestFiles
118+
}
119+
120+
_getLatestToolPaths(paths) {
121+
return AbstractFileStore.getLatestToolPaths(paths, path => this._toResultCoordinatesFromStoragePath(path))
122+
}
88123
}
89124

90125
module.exports = options => new AzHarvestBlobStore(options)

providers/stores/fileHarvestStore.js

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,17 +47,24 @@ class FileHarvestStore extends AbstractFileStore {
4747
*/
4848
async getAll(coordinates) {
4949
// TODO validate/enforce that the coordinates are down to the component revision
50-
const root = this._toStoragePathFromCoordinates(coordinates)
5150
// Note that here we are assuming the number of blobs will be small-ish (<10) and
5251
// a) all fit in memory reasonably, and
5352
// b) fit in one list call (i.e., <5000)
54-
let files = null
53+
const allFilesList = await this._getListOfAllFiles(coordinates)
54+
return await this._getContent(allFilesList)
55+
}
56+
57+
async _getListOfAllFiles(coordinates) {
58+
const root = this._toStoragePathFromCoordinates(coordinates)
5559
try {
56-
files = await recursive(root, ['.DS_Store'])
60+
return await recursive(root, ['.DS_Store'])
5761
} catch (error) {
58-
if (error.code === 'ENOENT') return {}
62+
if (error.code === 'ENOENT') return []
5963
throw error
6064
}
65+
}
66+
67+
async _getContent(files) {
6168
const contents = await Promise.all(
6269
files.map(file => {
6370
return new Promise((resolve, reject) =>
@@ -74,6 +81,41 @@ class FileHarvestStore extends AbstractFileStore {
7481
return result
7582
}, {})
7683
}
84+
85+
/**
86+
* Get the latest version of each tool output for the given coordinates. The coordinates must be all the way down
87+
* to a revision.
88+
* @param {EntityCoordinates} coordinates - The component revision to report on
89+
* @returns {Promise} A promise that resolves to an object with a property for each tool and tool version
90+
*
91+
*/
92+
async getAllLatest(coordinates) {
93+
const allFilesList = await this._getListOfAllFiles(coordinates)
94+
const latestFilesList = this._getListOfLatestFiles(allFilesList)
95+
return await this._getContent(latestFilesList)
96+
}
97+
98+
_getListOfLatestFiles(allFiles) {
99+
let latestFiles = []
100+
try {
101+
const latest = this._getLatestToolVersions(allFiles)
102+
latestFiles = allFiles.filter(file => latest.has(file))
103+
} catch (error) {
104+
this.logger.error('Error getting latest files', error)
105+
}
106+
if (latestFiles.length === 0) {
107+
this.logger.debug('No latest files found, returning all files')
108+
return allFiles
109+
}
110+
if (latestFiles.length !== allFiles.length) {
111+
this.logger.debug(`Using latest: \n${latestFiles}`)
112+
}
113+
return latestFiles
114+
}
115+
116+
_getLatestToolVersions(paths) {
117+
return AbstractFileStore.getLatestToolPaths(paths, path => this._toResultCoordinatesFromStoragePath(path))
118+
}
77119
}
78120

79121
module.exports = options => new FileHarvestStore(options)

test/business/definitionServiceTest.js

Lines changed: 66 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ const deepEqualInAnyOrder = require('deep-equal-in-any-order')
1212
const chai = require('chai')
1313
chai.use(deepEqualInAnyOrder)
1414
const expect = chai.expect
15+
const FileHarvestStore = require('../../providers/stores/fileHarvestStore')
16+
const SummaryService = require('../../business/summarizer')
17+
const AggregatorService = require('../../business/aggregator')
1518

1619
describe('Definition Service', () => {
1720
it('invalidates single coordinate', async () => {
@@ -310,6 +313,68 @@ describe('Definition Service Facet management', () => {
310313
})
311314
})
312315

316+
describe('Integration test', () => {
317+
let fileHarvestStore
318+
beforeEach(() => {
319+
fileHarvestStore = createFileHarvestStore()
320+
})
321+
322+
it('computes the same definition with latest harvest data', async () => {
323+
const coordinates = EntityCoordinates.fromString('npm/npmjs/-/debug/3.1.0')
324+
const allHarvestData = await fileHarvestStore.getAll(coordinates)
325+
delete allHarvestData['scancode']['2.9.0+b1'] //remove invalid scancode version
326+
let service = setupDefinitionService(allHarvestData)
327+
const baseline_def = await service.compute(coordinates)
328+
329+
const latestHarvestData = await fileHarvestStore.getAllLatest(coordinates)
330+
service = setupDefinitionService(latestHarvestData)
331+
const comparison_def = await service.compute(coordinates)
332+
333+
//updated timestamp is not deterministic
334+
expect(comparison_def._meta.updated).to.not.equal(baseline_def._meta.updated)
335+
comparison_def._meta.updated = baseline_def._meta.updated
336+
expect(comparison_def).to.deep.equal(baseline_def)
337+
})
338+
})
339+
340+
function createFileHarvestStore() {
341+
const options = {
342+
location: 'test/fixtures/store',
343+
logger: {
344+
error: () => {},
345+
debug: () => {}
346+
}
347+
}
348+
return FileHarvestStore(options)
349+
}
350+
351+
function setupDefinitionService(rawHarvestData) {
352+
const harvestStore = { getAllLatest: () => Promise.resolve(rawHarvestData) }
353+
const summary = SummaryService({})
354+
355+
const tools = [['clearlydefined', 'reuse', 'licensee', 'scancode', 'fossology', 'cdsource']]
356+
const aggregator = AggregatorService({ precedence: tools })
357+
aggregator.logger = { info: sinon.stub() }
358+
const curator = {
359+
get: () => Promise.resolve(),
360+
apply: (_coordinates, _curationSpec, definition) => Promise.resolve(definition),
361+
autoCurate: () => {}
362+
}
363+
return setupWithDelegates(curator, harvestStore, summary, aggregator)
364+
}
365+
366+
function setupWithDelegates(curator, harvestStore, summary, aggregator) {
367+
const store = { delete: sinon.stub(), get: sinon.stub(), store: sinon.stub() }
368+
const search = { delete: sinon.stub(), store: sinon.stub() }
369+
const cache = { delete: sinon.stub(), get: sinon.stub(), set: sinon.stub() }
370+
371+
const harvestService = { harvest: () => sinon.stub() }
372+
const service = DefinitionService(harvestStore, harvestService, summary, aggregator, curator, store, search, cache)
373+
service.logger = { info: sinon.stub(), debug: () => {} }
374+
service._harvest = sinon.stub()
375+
return service
376+
}
377+
313378
function validate(definition) {
314379
// Tack on a dummy coordinates to keep the schema happy. Tool summarizations do not have to include coordinates
315380
definition.coordinates = { type: 'npm', provider: 'npmjs', namespace: null, name: 'foo', revision: '1.0' }
@@ -342,7 +407,7 @@ function setup(definition, coordinateSpec, curation) {
342407
return
343408
}
344409
}
345-
const harvestStore = { getAll: () => Promise.resolve(null) }
410+
const harvestStore = { getAllLatest: () => Promise.resolve(null) }
346411
const harvestService = { harvest: () => sinon.stub() }
347412
const summary = { summarizeAll: () => Promise.resolve(null) }
348413
const aggregator = { process: () => Promise.resolve(definition) }

test/fixtures/store/npm/npmjs/-/debug/revision/3.1.0/tool/clearlydefined/1.1.2.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

test/fixtures/store/npm/npmjs/-/debug/revision/3.1.0/tool/clearlydefined/1.3.4.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

test/fixtures/store/npm/npmjs/-/debug/revision/3.1.0/tool/clearlydefined/1.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)