Skip to content

Commit fd852e4

Browse files
committed
chore: add script for pruning and refreshing member organizations
1 parent bd1a315 commit fd852e4

5 files changed

Lines changed: 142 additions & 1 deletion

File tree

services/apps/script_executor_worker/src/activities.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,11 @@ import {
4141
} from './activities/merge-members-with-similar-identities'
4242
import { getUnprocessedLLMApprovedSuggestions } from './activities/process-llm-verified-merges'
4343
import {
44+
getMemberOrganizationsToPrune,
4445
getOrganizationsToPrune,
46+
pruneMemberOrganization,
4547
pruneOrganization,
48+
refreshMemberAffiliations,
4649
} from './activities/prune-duplicate-organizations'
4750
import { deleteIndexedEntities } from './activities/sync/entity-index'
4851
import { getMembersForSync, syncMembersBatch } from './activities/sync/member'
@@ -86,4 +89,7 @@ export {
8689
calculateMemberAffiliations,
8790
getOrganizationsToPrune,
8891
pruneOrganization,
92+
getMemberOrganizationsToPrune,
93+
pruneMemberOrganization,
94+
refreshMemberAffiliations,
8995
}

services/apps/script_executor_worker/src/activities/prune-duplicate-organizations.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1+
import { refreshMemberOrganizationAffiliations } from '@crowd/data-access-layer/src/member-organization-affiliation'
2+
import { deleteMemberOrganizations } from '@crowd/data-access-layer/src/members'
13
import OrganizationRepository from '@crowd/data-access-layer/src/old/apps/script_executor_worker/organization.repo'
4+
import { pgpQx } from '@crowd/data-access-layer/src/queryExecutor'
25

36
import { svc } from '../main'
47

@@ -23,3 +26,38 @@ export async function getOrganizationsToPrune(
2326
throw error
2427
}
2528
}
29+
30+
export async function getMemberOrganizationsToPrune(
31+
batchSize: number,
32+
): Promise<{ id: string; memberId: string }[]> {
33+
try {
34+
const orgRepo = new OrganizationRepository(svc.postgres.reader.connection(), svc.log)
35+
return orgRepo.getMemberOrganizationsToPrune(batchSize)
36+
} catch (error) {
37+
svc.log.error(error, 'Error getting member organizations to prune!')
38+
throw error
39+
}
40+
}
41+
42+
export async function pruneMemberOrganization(
43+
memberOrganizationId: string,
44+
memberId: string,
45+
): Promise<void> {
46+
try {
47+
const qx = pgpQx(svc.postgres.writer.connection())
48+
await deleteMemberOrganizations(qx, memberId, [memberOrganizationId], false)
49+
} catch (error) {
50+
svc.log.error(error, 'Error pruning member organization!')
51+
throw error
52+
}
53+
}
54+
55+
export async function refreshMemberAffiliations(memberId: string): Promise<void> {
56+
try {
57+
const qx = pgpQx(svc.postgres.writer.connection())
58+
await refreshMemberOrganizationAffiliations(qx, memberId)
59+
} catch (error) {
60+
svc.log.error(error, 'Error refreshing member affiliations!')
61+
throw error
62+
}
63+
}

services/apps/script_executor_worker/src/workflows.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { fixBotMembersAffiliation } from './workflows/fix-bot-members-affiliatio
88
import { fixOrgIdentitiesWithWrongUrls } from './workflows/fixOrgIdentitiesWithWrongUrls'
99
import { processLLMVerifiedMerges } from './workflows/processLLMVerifiedMerges'
1010
import { pruneDuplicateOrganizations } from './workflows/prune-duplicate-organizations'
11+
import { pruneIncorrectMemberOrganizations } from './workflows/prune-incorrect-member-organizations'
1112
import { syncMembers } from './workflows/sync/members'
1213
import { syncOrganizations } from './workflows/sync/organizations'
1314

@@ -24,4 +25,5 @@ export {
2425
pruneDuplicateOrganizations,
2526
fixBotMembersAffiliation,
2627
blockOrganizationAffiliation,
28+
pruneIncorrectMemberOrganizations,
2729
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { proxyActivities } from '@temporalio/workflow'
2+
3+
import * as activities from '../activities'
4+
import { IScriptBatchTestArgs } from '../types'
5+
import { chunkArray } from '../utils/common'
6+
7+
const { getMemberOrganizationsToPrune, pruneMemberOrganization, refreshMemberAffiliations } =
8+
proxyActivities<typeof activities>({
9+
startToCloseTimeout: '30 minutes',
10+
retry: { maximumAttempts: 3, backoffCoefficient: 3 },
11+
})
12+
13+
export async function pruneIncorrectMemberOrganizations(args: IScriptBatchTestArgs): Promise<void> {
14+
const BATCH_SIZE = args.batchSize ?? 100
15+
16+
const memberOrganizationsToPrune = await getMemberOrganizationsToPrune(BATCH_SIZE)
17+
18+
if (memberOrganizationsToPrune.length === 0) {
19+
console.log('No more member organizations to prune!')
20+
return
21+
}
22+
23+
const memberIdsToRefresh = new Set<string>()
24+
const CHUNK_SIZE = 25
25+
26+
for (const chunk of chunkArray(memberOrganizationsToPrune, CHUNK_SIZE)) {
27+
const cleanupTasks = chunk.map(async (mo) => {
28+
console.log('Pruning member organization', mo.id)
29+
await pruneMemberOrganization(mo.id, mo.memberId)
30+
memberIdsToRefresh.add(mo.memberId)
31+
})
32+
33+
await Promise.all(cleanupTasks).catch((err) => {
34+
console.error('Error pruning member organizations!', err)
35+
})
36+
}
37+
38+
for (const memberId of memberIdsToRefresh) {
39+
console.log('Refreshing member affiliations', memberId)
40+
await refreshMemberAffiliations(memberId)
41+
}
42+
}

services/libs/data-access-layer/src/old/apps/script_executor_worker/organization.repo.ts

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,67 @@ class OrganizationRepository {
254254
FROM "memberOrganizations" mo
255255
WHERE mo."organizationId" = o.id
256256
AND mo."deletedAt" IS NULL
257-
AND (mo.title IS NOT NULL AND mo.title != '')
257+
AND (mo.title IS NOT NULL OR mo.title != '')
258258
AND (mo.source IS NOT NULL AND mo.source NOT IN ('email-domain'))
259259
)
260260
LIMIT $(batchSize);
261261
`,
262262
{ batchSize },
263263
)
264264
}
265+
266+
public async getMemberOrganizationsToPrune(
267+
batchSize: number,
268+
): Promise<{ id: string; memberId: string }[]> {
269+
return this.connection.query(
270+
`
271+
WITH email_providers AS (
272+
SELECT unnest(ARRAY[
273+
'gmail.com', 'gmail.co.uk', 'gmail.com.au', 'gmail.com.tr',
274+
'yahoo.com', 'yahoo.co.uk', 'yahoo.com.br', 'yahoo.co.in', 'yahoo.fr', 'yahoo.es', 'yahoo.it', 'yahoo.de', 'yahoo.ca', 'yahoo.com.au', 'yahoo.in', 'yahoo.co.jp', 'yahoo.com.ar', 'yahoo.com.mx', 'yahoo.co.id', 'yahoo.com.sg', 'yahoo.co.za', 'yahoo.com.ph', 'yahoo.com.tw', 'yahoo.com.hk', 'yahoo.com.vn',
275+
'hotmail.com', 'hotmail.co.uk', 'hotmail.fr', 'hotmail.ca', 'hotmail.it', 'hotmail.es', 'hotmail.de', 'hotmail.com.au', 'hotmail.com.mx',
276+
'icloud.com', 'icloud.com.cn',
277+
'fastmail.com', 'tutanota.com', 'tuta.io',
278+
'gmx.com', 'gmx.de', 'gmx.net', 'gmx.at', 'gmx.ch', 'gmx.fr', 'gmx.co.uk',
279+
'aol.com', 'aol.co.uk', 'aol.fr', 'aol.de',
280+
'msn.com', 'wanadoo.fr', 'orange.fr', 'comcast.net',
281+
'live.com', 'live.co.uk', 'live.fr', 'live.nl', 'live.it', 'live.com.au', 'live.ca', 'live.cn',
282+
'rediffmail.com', 'sify.com', 'indiatimes.com', 'free.fr', 'web.de',
283+
'yandex.ru', 'yandex.com', 'yandex.com.tr', 'ya.ru',
284+
'ymail.com', 'libero.it',
285+
'outlook.com', 'outlook.fr', 'outlook.co.uk', 'outlook.de', 'outlook.es', 'outlook.it', 'outlook.com.au', 'outlook.com.br', 'outlook.com.mx', 'outlook.co.jp', 'outlook.in', 'outlook.com.sg', 'outlook.co.za', 'outlook.co.in',
286+
'uol.com.br', 'bol.com.br',
287+
'mail.ru', 'inbox.ru', 'list.ru', 'bk.ru',
288+
'mail.com', 'mail.de', 'mail.co.uk',
289+
'cox.net', 'sbcglobal.net', 'sfr.fr', 'verizon.net', 'googlemail.com', 'ig.com.br', 'bigpond.com', 'bigpond.net.au', 'terra.com.br', 'neuf.fr', 'alice.it', 'rocketmail.com', 'att.net', 'laposte.net', 'bellsouth.net', 'charter.net', 'rambler.ru', 'tiscali.it', 'tiscali.co.uk', 'shaw.ca', 'sky.com', 'earthlink.net', 'optonline.net', 'freenet.de', 't-online.de', 'aliceadsl.fr', 'virgilio.it', 'home.nl', 'qq.com', 'vip.qq.com', 'telenet.be', 'pandora.be', 'me.com', 'voila.fr', 'planet.nl', 'tin.it', 'ntlworld.com', 'arcor.de', 'frontiernet.net', 'hetnet.nl', 'zonnet.nl', 'club-internet.fr', 'juno.com', 'optusnet.com.au', 'blueyonder.co.uk', 'bluewin.ch', 'skynet.be', 'sympatico.ca', 'windstream.net', 'mac.com', 'centurytel.net', 'chello.nl', 'aim.com',
290+
'protonmail.com', 'protonmail.ch', 'proton.me', 'pm.me', 'duck.com',
291+
'zoho.com', 'zohomail.com',
292+
'users.noreply.github.com',
293+
'126.com', '139.com', '163.com', '188.com', 'foxmail.com', 'tom.com', '21cn.com', 'yeah.net',
294+
'naver.com', 'daum.net', 'hanmail.net',
295+
'hey.com', 'inbox.com', 'lycos.com', 'excite.com', 'hushmail.com', 'mailfence.com', 'mailbox.org', 'posteo.de', 'startmail.com', 'runbox.com', 'countermail.com', 'mynet.com',
296+
'wp.pl', 'onet.pl', 'interia.pl', 'o2.pl',
297+
'seznam.cz', 'centrum.cz',
298+
'mailinator.com', 'guerrillamail.com', '10minutemail.com', 'tempmail.com'
299+
]) AS provider
300+
),
301+
orgs_with_email_provider AS (
302+
SELECT DISTINCT o.id
303+
FROM organizations o
304+
INNER JOIN "organizationIdentities" oi ON o.id = oi."organizationId"
305+
INNER JOIN email_providers ep ON LOWER(oi.value) = ep.provider
306+
WHERE o."deletedAt" IS NULL
307+
AND oi.type = 'primary-domain'
308+
)
309+
SELECT DISTINCT mo.id, mo."memberId"
310+
FROM "memberOrganizations" mo
311+
INNER JOIN orgs_with_email_provider oep ON mo."organizationId" = oep.id
312+
WHERE mo.source = 'email-domain' AND (mo.title IS NULL OR mo.title = '')
313+
LIMIT $(batchSize);
314+
`,
315+
{ batchSize },
316+
)
317+
}
265318
}
266319

267320
export default OrganizationRepository

0 commit comments

Comments
 (0)