Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: import opco files #287

Merged
merged 5 commits into from
Sep 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions server/src/commands.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { captureException } from "@sentry/node";
import { program } from "commander";
import HttpTerminator from "lil-http-terminator";
import { ObjectId } from "mongodb";

import logger from "@/common/logger";
import { closeMongodbConnection } from "@/common/utils/mongodbUtils";
Expand Down Expand Up @@ -257,6 +258,22 @@ program
}
});

program
.command("import:document")
.description("Import document content")
.requiredOption("-dId, --documentId <string>", "Document Id")
.option("-s, --sync", "Run job synchronously")
.action(async ({ documentId, sync }) => {
const exitCode = await addJob({
name: "import:document",
payload: { document_id: new ObjectId(documentId) },
sync,
});
if (exitCode) {
program.error("Command failed", { exitCode });
}
});

program
.command("documents:save-columns")
.description("Gets columns from documents and save them in database")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { Db, MongoClient } from "mongodb";

export const up = async (db: Db, _client: MongoClient) => {
await db.collection("organisations").updateMany(
{
"_meta.source": { $exists: true, $ne: "" },
},
[
{
$set: {
"_meta.sources": ["$_meta.source"],
},
},
{
$unset: "_meta.source",
},
]
);

await db.collection("persons").updateMany(
{
"_meta.source": { $exists: true, $ne: "" },
},
[
{
$set: {
"_meta.sources": ["$_meta.source"],
},
},
{
$unset: "_meta.source",
},
]
);
};

export const down = async (db: Db, _client: MongoClient) => {
db.collection("organisations").updateMany(
{
"_meta.sources": { $exists: true },
},
[
{
$set: {
"_meta.source": { $arrayElemAt: ["$_meta.sources", 0] },
},
},
{
$unset: "_meta.sources",
},
]
);

db.collection("persons").updateMany(
{
"_meta.sources": { $exists: true },
},
[
{
$set: {
"_meta.source": { $arrayElemAt: ["$_meta.sources", 0] },
},
},
{
$unset: "_meta.sources",
},
]
);
};
68 changes: 68 additions & 0 deletions server/src/modules/actions/constructys.actions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { DOCUMENT_TYPES } from "shared/constants/documents";
import { SIRET_REGEX } from "shared/constants/regex";
import { getSirenFromSiret } from "shared/helpers/common";

import { getDbCollection } from "../../common/utils/mongodbUtils";
import { updateOrganisationData } from "./organisations.actions";

interface ContentLine {
Siret: string;
Mails?: string;
}

export interface IConstructysParsedContentLine {
siret: string;
emails: string[];
}

export const parseConstructysContentLine = (line: ContentLine): IConstructysParsedContentLine | undefined => {
if (!line.Mails) return;
if (!SIRET_REGEX.test(line.Siret)) return;

const { Siret: siret } = line;

// split emails and remove non valid emails
let emails = line.Mails.split(",").filter((e) => /^[^\s@]+@[^\s@]+\.[^\s@]+$/.test(e));
// remove duplicates
emails = [...new Set(emails)];

if (!emails.length) return;

return { siret, emails };
};

export const importConstructysContent = async (content: IConstructysParsedContentLine) => {
const { siret, emails } = content;
const uniqueEmails = [...new Set(emails)];
const siren = getSirenFromSiret(siret);
const domains = [...new Set(uniqueEmails.map((e) => e.split("@")[1]))];

const organisation = await updateOrganisationData({
siren,
sirets: [siret],
email_domains: domains,
source: DOCUMENT_TYPES.CONSTRUCTYS,
});

await Promise.all(
uniqueEmails.map((email) =>
getDbCollection("persons").updateOne(
{
email,
},
{
$addToSet: {
...(organisation && { organisations: organisation._id.toString() }),
sirets: siret,
},
$setOnInsert: {
email,
},
},
{
upsert: true,
}
)
)
);
};
40 changes: 9 additions & 31 deletions server/src/modules/actions/deca.actions.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import companyEmailValidator from "company-email-validator";
import { IPostRoutes, IResponse } from "shared";
import { DOCUMENT_TYPES } from "shared/constants/documents";
import { SIRET_REGEX } from "shared/constants/regex";
import { getSirenFromSiret } from "shared/helpers/common";
import { IOrganisation } from "shared/models/organisation.model";

import { getDbCollection } from "../../common/utils/mongodbUtils";
import { findOrCreateOrganisation, findOrganisation, updateOrganisation } from "./organisations.actions";
import { findOrganisation, updateOrganisationData } from "./organisations.actions";
import { findPerson } from "./persons.actions";

interface ContentLine {
Expand Down Expand Up @@ -41,34 +41,12 @@ export const importDecaContent = async (emails: string[], siret: string) => {
const siren = getSirenFromSiret(siret);
const domains = [...new Set(uniqueEmails.map((e) => e.split("@")[1]))];

const organisation = await findOrCreateOrganisation(
{ siren },
{
siren,
etablissements: [{ siret }],
email_domains: domains,
}
);

const updateOrganisationData: Partial<IOrganisation> = {};
const etablissement = organisation.etablissements?.find((e) => e.siret === siret);

if (!etablissement) {
const etablissements = organisation.etablissements ?? [];
etablissements.push({ siret });
updateOrganisationData.etablissements = etablissements;
}

const newDomains = domains.filter(
(domain) => !organisation.email_domains?.includes(domain) && companyEmailValidator.isCompanyDomain(domain)
);

if (newDomains.length) {
updateOrganisationData.email_domains = organisation.email_domains ?? [];
updateOrganisationData.email_domains?.push(...newDomains);
}

await updateOrganisation(organisation, updateOrganisationData);
const organisation = await updateOrganisationData({
siren,
sirets: [siret],
email_domains: domains,
source: DOCUMENT_TYPES.DECA,
});

await Promise.all(
uniqueEmails.map((email) =>
Expand All @@ -78,7 +56,7 @@ export const importDecaContent = async (emails: string[], siret: string) => {
},
{
$addToSet: {
organisations: organisation._id.toString(),
...(organisation && { organisations: organisation._id.toString() }),
sirets: siret,
},
$setOnInsert: {
Expand Down
38 changes: 33 additions & 5 deletions server/src/modules/actions/documents.actions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,15 @@ import { clamav } from "@/services";
import { deleteFromStorage, getFromStorage, uploadToStorage } from "../../common/utils/ovhUtils";
import { parseCsv } from "../../common/utils/parserUtils";
import { noop } from "../server/utils/upload.utils";
import {
IConstructysParsedContentLine,
importConstructysContent,
parseConstructysContentLine,
} from "./constructys.actions";
import { DECAParsedContentLine, importDecaContent, parseContentLine } from "./deca.actions";
import { createDocumentContent, deleteDocumentContent } from "./documentContent.actions";
import { MAILING_LIST_DOCUMENT_PREFIX } from "./mailingLists.actions";
import { importOcapiatContent, IOcapiatParsedContentLine, parseOcapiatContentLine } from "./ocapiat.actions";

const testMode = config.env === "test";

Expand Down Expand Up @@ -404,6 +410,21 @@ export const importDocumentContent = async <TFileLine = unknown, TContentLine =
continue;
}

if (document.type_document === DOCUMENT_TYPES.DECA) {
const decaContent = contentLine as unknown as DECAParsedContentLine;
await importDecaContent(decaContent.emails, decaContent.siret);
}

if (document.type_document === DOCUMENT_TYPES.OCAPIAT) {
await importOcapiatContent(contentLine as unknown as IOcapiatParsedContentLine);
return [];
}

if (document.type_document === DOCUMENT_TYPES.CONSTRUCTYS) {
await importConstructysContent(contentLine as unknown as IConstructysParsedContentLine);
return [];
}

const documentContent = await createDocumentContent({
content: contentLine,
document_id: document._id.toString(),
Expand All @@ -412,11 +433,6 @@ export const importDocumentContent = async <TFileLine = unknown, TContentLine =

if (!documentContent) continue;

if (document.type_document === DOCUMENT_TYPES.DECA) {
const decaContent = contentLine as unknown as DECAParsedContentLine;
await importDecaContent(decaContent.emails, decaContent.siret);
}

documentContents = [...documentContents, documentContent];
}

Expand Down Expand Up @@ -462,6 +478,18 @@ export const handleDocumentFileContent = async ({ document_id }: Record<"documen
formatter: parseContentLine,
});
break;
case DOCUMENT_TYPES.OCAPIAT:
await extractDocumentContent({
document,
formatter: parseOcapiatContentLine,
});
break;
case DOCUMENT_TYPES.CONSTRUCTYS:
await extractDocumentContent({
document,
formatter: parseConstructysContentLine,
});
break;

default:
await extractDocumentContent({ document });
Expand Down
96 changes: 96 additions & 0 deletions server/src/modules/actions/ocapiat.actions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import companyEmailValidator from "company-email-validator";
import { DOCUMENT_TYPES } from "shared/constants/documents";
import { getSirenFromSiret } from "shared/helpers/common";

import { getDbCollection } from "../../common/utils/mongodbUtils";
import { updateOrganisationData } from "./organisations.actions";

export interface IOcapiatParsedContentLine {
"Région INSEE"?: string;
"GUH Etablissement"?: string;
"Code OPSI"?: string;
Siret?: string;
"Raison sociale"?: string;
Département?: string;
"Etablissement Sous Contrat"?: string;
"Nom du groupe"?: string;
"Effectif Etab moyen annuel"?: string;
"Effectif Etab"?: string;
"Taille Collecte"?: string;
Branche?: string;
"Branche Secteur"?: string;
"Type Branche"?: string;
Conseiller?: string;
Civilite?: string;
"Nom du contact"?: string;
"Prénom du contact"?: string;
"Titre du contact"?: string;
"Fonction du contact"?: string;
"Tél contact"?: string;
"Mobile contact"?: string;
"Email du contact"?: string;
}

export const parseOcapiatContentLine = (line: IOcapiatParsedContentLine): IOcapiatParsedContentLine | undefined => {
// remove attributes where value is "-" considered empty
const content = Object.entries(line).reduce<IOcapiatParsedContentLine>(
(acc, [key, value]) => ({
...acc,
...(value === "-" ? {} : { [key]: value }),
}),
{}
);

return content;
};

export const importOcapiatContent = async (content: IOcapiatParsedContentLine) => {
const siret = content?.Siret ?? "";
const siren = getSirenFromSiret(siret);
const email = content?.["Email du contact"];
let domains: string[] = [];

if (email && companyEmailValidator.isCompanyEmail(email)) {
domains = [email.split("@")[1]];
}

const organisation = await updateOrganisationData({
siren,
sirets: [siret],
email_domains: domains,
source: DOCUMENT_TYPES.OCAPIAT,
});

if (!email) return;

const date = new Date();

getDbCollection("persons").updateOne(
{
email,
},
{
$set: {
updated_at: date,
},
$addToSet: {
...(organisation && { organisations: organisation._id.toString() }),
sirets: siret,
"_meta.sources": DOCUMENT_TYPES.OCAPIAT,
},
$setOnInsert: {
email,
...(content?.["Nom du contact"] && {
nom: content?.["Nom du contact"],
}),
...(content?.["Prénom du contact"] && {
prenom: content?.["Prénom du contact"],
}),
created_at: date,
},
},
{
upsert: true,
}
);
};
Loading