diff --git a/__tests__/common/profile/import.ts b/__tests__/common/profile/import.ts index 56c7348ea..82e2eb64c 100644 --- a/__tests__/common/profile/import.ts +++ b/__tests__/common/profile/import.ts @@ -59,6 +59,7 @@ describe('UserExperienceType work import', () => { verified: false, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); const skills = await con .getRepository(UserExperienceSkill) @@ -96,6 +97,7 @@ describe('UserExperienceType work import', () => { updatedAt: expect.any(Date), userId: 'user-work-2', verified: false, + flags: {}, }); }); }); @@ -131,6 +133,7 @@ describe('UserExperienceType education import', () => { grade: null, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); }); @@ -163,6 +166,7 @@ describe('UserExperienceType education import', () => { grade: null, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); }); }); @@ -200,6 +204,7 @@ describe('UserExperienceType certification import', () => { url: null, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); }); @@ -235,6 +240,7 @@ describe('UserExperienceType certification import', () => { url: null, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); }); }); @@ -273,6 +279,7 @@ describe('UserExperienceType project import', () => { url: null, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); expect(skills.map((s) => s.value).sort()).toEqual( ['GraphQL', 'Node.js'].sort(), @@ -308,6 +315,7 @@ describe('UserExperienceType project import', () => { url: null, createdAt: expect.any(Date), updatedAt: expect.any(Date), + flags: {}, }); }); }); diff --git a/bin/importProfileFromJSON.ts b/bin/importProfileFromJSON.ts index c792780f0..b705eb92a 100644 --- a/bin/importProfileFromJSON.ts +++ b/bin/importProfileFromJSON.ts @@ -3,17 +3,26 @@ import '../src/config'; import { parseArgs } from 'node:util'; import { z } from 'zod'; import createOrGetConnection from '../src/db'; -import { type DataSource } from 'typeorm'; -import { readFile } from 'node:fs/promises'; +import { QueryFailedError, type DataSource } from 'typeorm'; +import { readFile, stat, readdir } from 'node:fs/promises'; import { importUserExperienceFromJSON } from '../src/common/profile/import'; +import path from 'node:path'; +import { randomUUID } from 'node:crypto'; /** * Import profile from JSON to user by id * - * npx ts-node bin/importProfileFromJSON.ts --path ~/Downloads/testuser.json -u testuser + * Single file usage: + * + * npx ts-node bin/importProfileFromJSON.ts --path ~/Downloads/testuser.json + * + * Directory usage: + * + * npx ts-node bin/importProfileFromJSON.ts --path ~/Downloads/profiles --limit 100 --offset 0 --import import_run_test */ const main = async () => { let con: DataSource | null = null; + let failedImports = 0; try { const { values } = parseArgs({ @@ -22,28 +31,103 @@ const main = async () => { type: 'string', short: 'p', }, - userId: { + limit: { + type: 'string', + short: 'l', + }, + offset: { + type: 'string', + short: 'o', + }, + uid: { type: 'string', - short: 'u', }, }, }); const paramsSchema = z.object({ path: z.string().nonempty(), - userId: z.string().nonempty(), + limit: z.coerce.number().int().positive().default(10), + offset: z.coerce.number().int().positive().default(0), + uid: z.string().nonempty().default(randomUUID()), }); const params = paramsSchema.parse(values); + console.log(`Starting import with ID: ${params.uid}`); + con = await createOrGetConnection(); - const dataJSON = JSON.parse(await readFile(params.path, 'utf-8')); + const pathStat = await stat(params.path); + + let filePaths = [params.path]; + + if (pathStat.isDirectory()) { + filePaths = await readdir(params.path, 'utf-8'); + } + + filePaths.sort(); // ensure consistent order for offset/limit - await importUserExperienceFromJSON({ - con: con.manager, - dataJson: dataJSON, - userId: params.userId, + console.log(`Found files: ${filePaths.length}`); + + console.log( + `Importing: ${Math.min(params.limit, filePaths.length)} (limit ${params.limit}, offset ${params.offset})`, + ); + + await con.transaction(async (entityManager) => { + for (const [index, fileName] of filePaths + .slice(params.offset, params.offset + params.limit) + .entries()) { + const filePath = + params.path === fileName + ? fileName + : path.join(params.path, fileName); + + try { + if (!filePath.endsWith('.json')) { + throw { type: 'not_json_ext', filePath }; + } + + const userId = filePath.split('/').pop()?.split('.json')[0]; + + if (!userId) { + throw { type: 'no_user_id', filePath }; + } + + const dataJSON = JSON.parse(await readFile(filePath, 'utf-8')); + + await importUserExperienceFromJSON({ + con: entityManager, + dataJson: dataJSON, + userId, + importId: params.uid, + }); + } catch (error) { + failedImports += 1; + + if (error instanceof QueryFailedError) { + console.error({ + type: 'db_query_failed', + message: error.message, + query: error.query, + filePath, + }); + } else if (error instanceof z.ZodError) { + console.error({ + type: 'zod_error', + message: error.issues[0].message, + path: error.issues[0].path, + filePath, + }); + } else { + console.error(error); + } + } + + if (index && index % 100 === 0) { + console.log(`Done so far: ${index}, failed: ${failedImports}`); + } + } }); } catch (error) { console.error(error instanceof z.ZodError ? z.prettifyError(error) : error); @@ -52,6 +136,12 @@ const main = async () => { con.destroy(); } + if (failedImports > 0) { + console.log(`Failed imports: ${failedImports}`); + } else { + console.log('Done!'); + } + process.exit(0); } }; diff --git a/src/common/profile/import.ts b/src/common/profile/import.ts index 8c3089395..60086eb7b 100644 --- a/src/common/profile/import.ts +++ b/src/common/profile/import.ts @@ -11,7 +11,7 @@ import { Company } from '../../../src/entity/Company'; import { UserExperienceWork } from '../../../src/entity/user/experiences/UserExperienceWork'; import { insertOrIgnoreUserExperienceSkills } from '../../../src/entity/user/experiences/UserExperienceSkill'; import { textFromEnumValue } from '../../../src/common'; -import { LocationType } from '@dailydotdev/schema'; +import { EmploymentType, LocationType } from '@dailydotdev/schema'; import { DatasetLocation } from '../../../src/entity/dataset/DatasetLocation'; import { UserExperienceEducation } from '../../../src/entity/user/experiences/UserExperienceEducation'; import { UserExperienceCertification } from '../../../src/entity/user/experiences/UserExperienceCertification'; @@ -37,8 +37,9 @@ const resolveUserCompanyPart = async ({ const company = await con .getRepository(Company) .createQueryBuilder() + .setParameter('companyName', name) .addSelect('id') - .addSelect(`similarity(name, '${name}')`, 'similarity') + .addSelect(`similarity(name, :companyName)`, 'similarity') .orderBy('similarity', 'DESC') .getRawOne & { similarity: number }>(); @@ -58,7 +59,11 @@ const resolveUserLocationPart = async ({ con, threshold = 0.5, }: { - location?: Partial>; + location?: { + city?: string | null; + subdivision?: string | null; + country?: string | null; + } | null; con: EntityManager; threshold?: number; }): Promise>> => { @@ -72,26 +77,32 @@ const resolveUserLocationPart = async ({ .addSelect('id'); if (location.city) { - datasetLocationQb.addSelect( - `coalesce(similarity(city, '${location.city}'), 0)`, - 'similarityCity', - ); + datasetLocationQb + .setParameter('locationCity', location.city) + .addSelect( + `coalesce(similarity(city, :locationCity), 0)`, + 'similarityCity', + ); datasetLocationQb.addOrderBy('"similarityCity"', 'DESC'); } if (location.subdivision) { - datasetLocationQb.addSelect( - `coalesce(similarity(subdivision, '${location.subdivision}'), 0)`, - 'similaritySubdivision', - ); + datasetLocationQb + .setParameter('locationSubdivision', location.subdivision) + .addSelect( + `coalesce(similarity(subdivision, :locationSubdivision), 0)`, + 'similaritySubdivision', + ); datasetLocationQb.addOrderBy('"similaritySubdivision"', 'DESC'); } if (location.country) { - datasetLocationQb.addSelect( - `coalesce(similarity(country, '${location.country}'), 0)`, - 'similarityCountry', - ); + datasetLocationQb + .setParameter('locationCountry', location.country) + .addSelect( + `coalesce(similarity(country, :locationCountry), 0)`, + 'similarityCountry', + ); datasetLocationQb.addOrderBy('"similarityCountry"', 'DESC'); } @@ -137,10 +148,13 @@ export const importUserExperienceWork = async ({ skills, ended_at: endedAt, location, + flags, + employment_type: employmentType, } = userExperience; const insertResult = await con.getRepository(UserExperienceWork).insert( con.getRepository(UserExperienceWork).create({ + flags, userId: userId, ...(await resolveUserCompanyPart({ name: company, @@ -153,7 +167,6 @@ export const importUserExperienceWork = async ({ locationType: locationType ? (Object.entries(LocationType).find(([, value]) => { return ( - // TODO cv-parsing remove this replace when cv is adjusted to not use prefix locationType.replace('LOCATION_TYPE_', '') === textFromEnumValue(LocationType, value) ); @@ -163,6 +176,14 @@ export const importUserExperienceWork = async ({ location, con: con, })), + employmentType: employmentType + ? (Object.entries(EmploymentType).find(([, value]) => { + return ( + employmentType.replace('EMPLOYMENT_TYPE_', '') === + textFromEnumValue(EmploymentType, value) + ); + })?.[1] as EmploymentType) + : undefined, }), ); @@ -188,7 +209,6 @@ export const importUserExperienceEducation = async ({ }): Promise<{ experienceId: string }> => { const userExperience = userExperienceEducationImportSchema.parse(data); - // TODO cv-parsing potentially won't be needed once cv is adjusted to use camelCase const { company, title, @@ -198,10 +218,13 @@ export const importUserExperienceEducation = async ({ ended_at: endedAt, location, subtitle, + flags, + grade, } = userExperience; const insertResult = await con.getRepository(UserExperienceEducation).insert( con.getRepository(UserExperienceEducation).create({ + flags, userId: userId, ...(await resolveUserCompanyPart({ name: company, @@ -216,6 +239,7 @@ export const importUserExperienceEducation = async ({ con: con, })), subtitle, + grade, }), ); @@ -246,12 +270,15 @@ export const importUserExperienceCertification = async ({ title, started_at: startedAt, ended_at: endedAt, + flags, + url, } = userExperience; const insertResult = await con .getRepository(UserExperienceCertification) .insert( con.getRepository(UserExperienceCertification).create({ + flags, userId: userId, ...(await resolveUserCompanyPart({ name: company, @@ -260,6 +287,7 @@ export const importUserExperienceCertification = async ({ title, startedAt, endedAt, + url, }), ); @@ -287,15 +315,19 @@ export const importUserExperienceProject = async ({ started_at: startedAt, ended_at: endedAt, skills, + flags, + url, } = userExperience; const insertResult = await con.getRepository(UserExperienceProject).insert( con.getRepository(UserExperienceProject).create({ + flags, userId: userId, title, description, startedAt, endedAt, + url, }), ); @@ -314,31 +346,59 @@ export const importUserExperienceFromJSON = async ({ con, dataJson, userId, + importId, + transaction = false, }: { con: EntityManager; dataJson: unknown; userId: string; + importId?: string; + transaction?: boolean; }) => { if (!userId) { throw new Error('userId is required'); } const data = z - .array( - userExperienceInputBaseSchema - .pick({ - type: true, - }) - .loose(), + .preprocess( + (item) => { + if (item === null) { + return []; + } + + if (typeof item === 'object' && !Array.isArray(item)) { + return []; + } + + return item; + }, + z.array( + userExperienceInputBaseSchema + .pick({ + type: true, + }) + .loose(), + ), ) .parse(dataJson); - await con.transaction(async (entityManager) => { + const transactionFn = async ( + callback: (entityManager: EntityManager) => Promise, + ) => { + return transaction ? con.transaction(callback) : callback(con); + }; + + await transactionFn(async (entityManager) => { for (const item of data) { - switch (item.type) { + const importData = { + ...item, + flags: importId ? { import: importId } : undefined, + }; + + switch (importData.type) { case UserExperienceType.Work: await importUserExperienceWork({ - data: item, + data: importData, con: entityManager, userId, }); @@ -346,7 +406,7 @@ export const importUserExperienceFromJSON = async ({ break; case UserExperienceType.Education: await importUserExperienceEducation({ - data: item, + data: importData, con: entityManager, userId, }); @@ -354,7 +414,7 @@ export const importUserExperienceFromJSON = async ({ break; case UserExperienceType.Certification: await importUserExperienceCertification({ - data: item, + data: importData, con: entityManager, userId, }); @@ -364,14 +424,14 @@ export const importUserExperienceFromJSON = async ({ case UserExperienceType.OpenSource: case UserExperienceType.Volunteering: await importUserExperienceProject({ - data: item, + data: importData, con: entityManager, userId, }); break; default: - throw new Error(`Unsupported experience type: ${item.type}`); + throw new Error(`Unsupported experience type: ${importData.type}`); } } }); diff --git a/src/common/schema/common.ts b/src/common/schema/common.ts index 84386c1b5..af592d99f 100644 --- a/src/common/schema/common.ts +++ b/src/common/schema/common.ts @@ -16,3 +16,21 @@ export const paginationSchema = z.object({ }); export type PaginationArgs = z.infer; + +const urlStartRegexMatch = /^https?:\/\//; + +// match http(s) urls and partials like daily.dev (without protocol ) +export const urlParseSchema = z.preprocess( + (val) => { + if (typeof val === 'string') { + return val.match(urlStartRegexMatch) ? val : `https://${val}`; + } + + return val; + }, + z.url({ + protocol: /^https?$/, + hostname: z.regexes.domain, + normalize: true, + }), +); diff --git a/src/common/schema/profile.ts b/src/common/schema/profile.ts index 09e26ef45..613426a3e 100644 --- a/src/common/schema/profile.ts +++ b/src/common/schema/profile.ts @@ -1,6 +1,6 @@ import z from 'zod'; import { UserExperienceType } from '../../entity/user/experiences/types'; -import { paginationSchema } from './common'; +import { paginationSchema, urlParseSchema } from './common'; export const userExperiencesSchema = z .object({ @@ -100,51 +100,85 @@ export const getExperienceSchema = (type: UserExperienceType) => { export const userExperienceWorkImportSchema = z.object({ type: z.string(), - company: z.string(), - title: z.string(), - description: z.string().optional(), + company: z.string().nullish(), + title: z + .string() + .nullish() + .transform((n) => (n === null ? undefined : n)) + .default('Work experience'), + description: z.string().nullish(), started_at: z.coerce.date().default(() => new Date()), - location_type: z.string().optional(), - skills: z.array(z.string()).optional(), + location_type: z.string().nullish(), + skills: z + .array(z.string()) + .nullish() + .transform((n) => (n === null ? undefined : n)) + .default([]), ended_at: z.coerce.date().nullish().default(null), location: z .object({ - city: z.string().optional(), - country: z.string(), + city: z.string().nullish(), + country: z.string().nullish(), }) - .optional(), + .nullish(), + flags: z.object({ import: z.string() }).partial().optional(), + employment_type: z.string().nullish(), }); export const userExperienceEducationImportSchema = z.object({ type: z.string(), - company: z.string().optional(), - title: z.string(), - description: z.string().optional(), + company: z.string().nullish(), + title: z + .string() + .nullish() + .transform((n) => (n === null ? undefined : n)) + .default('Education'), + description: z.string().nullish(), started_at: z.coerce.date().default(() => new Date()), ended_at: z.coerce.date().nullish().default(null), location: z .object({ - city: z.string().optional(), - country: z.string(), + city: z.string().nullish(), + country: z.string().nullish(), }) - .optional(), - skills: z.array(z.string()).optional(), - subtitle: z.string().optional(), + .nullish(), + skills: z + .array(z.string()) + .nullish() + .transform((n) => (n === null ? undefined : n)), + subtitle: z.string().nullish(), + flags: z.object({ import: z.string() }).partial().optional(), + grade: z.string().nullish(), }); export const userExperienceCertificationImportSchema = z.object({ type: z.string(), - company: z.string().optional(), - title: z.string(), + company: z.string().nullish(), + title: z + .string() + .nullish() + .transform((n) => (n === null ? undefined : n)) + .default('Certification'), started_at: z.coerce.date().default(() => new Date()), ended_at: z.coerce.date().nullish().default(null), + flags: z.object({ import: z.string() }).partial().optional(), + url: urlParseSchema.nullish().catch(undefined), }); export const userExperienceProjectImportSchema = z.object({ type: z.string(), - title: z.string(), - description: z.string(), + title: z + .string() + .nullish() + .transform((n) => (n === null ? undefined : n)) + .default('Project'), + description: z.string().nullish(), started_at: z.coerce.date().default(() => new Date()), ended_at: z.coerce.date().nullish().default(null), - skills: z.array(z.string()), + skills: z + .array(z.string()) + .nullish() + .transform((n) => (n === null ? undefined : n)), + flags: z.object({ import: z.string() }).partial().optional(), + url: urlParseSchema.nullish().catch(undefined), }); diff --git a/src/entity/user/experiences/UserExperience.ts b/src/entity/user/experiences/UserExperience.ts index 797f59f3e..8df32d5d6 100644 --- a/src/entity/user/experiences/UserExperience.ts +++ b/src/entity/user/experiences/UserExperience.ts @@ -15,6 +15,10 @@ import type { Company } from '../../Company'; import { LocationType } from '@dailydotdev/schema'; import type { DatasetLocation } from '../../dataset/DatasetLocation'; +export type UserExperienceFlags = Partial<{ + import: string; +}>; + @Entity() @TableInheritance({ column: { type: 'text', name: 'type' } }) export class UserExperience { @@ -84,4 +88,7 @@ export class UserExperience { @UpdateDateColumn({ type: 'timestamp' }) updatedAt: Date; + + @Column({ type: 'jsonb', default: {} }) + flags: UserExperienceFlags; } diff --git a/src/migration/1763996658211-UserExperienceFlagsImport.ts b/src/migration/1763996658211-UserExperienceFlagsImport.ts new file mode 100644 index 000000000..04c52ffd9 --- /dev/null +++ b/src/migration/1763996658211-UserExperienceFlagsImport.ts @@ -0,0 +1,19 @@ +import { MigrationInterface, QueryRunner } from 'typeorm'; + +export class UserExperienceFlagsImport1763996658211 + implements MigrationInterface +{ + name = 'UserExperienceFlagsImport1763996658211'; + + public async up(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `ALTER TABLE "user_experience" ADD "flags" jsonb NOT NULL DEFAULT '{}'`, + ); + } + + public async down(queryRunner: QueryRunner): Promise { + await queryRunner.query( + `ALTER TABLE "user_experience" DROP COLUMN "flags"`, + ); + } +} diff --git a/src/workers/opportunity/parseCVProfile.ts b/src/workers/opportunity/parseCVProfile.ts index 298636022..f13b75601 100644 --- a/src/workers/opportunity/parseCVProfile.ts +++ b/src/workers/opportunity/parseCVProfile.ts @@ -92,6 +92,7 @@ export const parseCVProfileWorker: TypedWorker<'api.v1.candidate-preference-upda con: con.manager, dataJson, userId, + transaction: true, }); } catch (error) { // revert to previous date on error