how can I do in KQL to compare if a value contains another value in the same table in KQL?

Copper Contributor

Hi everyone,

 

I used the default rule "SharePointFileOperation across devices with previously unseen user agents" and I need to enhance the rule so that if the "USER_ID" column contains a variable I created. This variable performs a replacement of the above name with regex and removes the values beyond the "@". I have tried to make a "contain" expression but it doesn't work...is there another way...?

 

let threshold = 5;
let szSharePointFileOperation = "SharePointFileOperation";
let szOperations = dynamic(["FileDownloaded", "FileUploaded"]);
let starttime = 14d;
let endtime = 1d;
let historicalActivity =
    OfficeActivity
    | where TimeGenerated between(ago(starttime) .. ago(endtime))
    | where RecordType =~ szSharePointFileOperation
    | where Operation in~ (szOperations)
    | where isnotempty(UserAgent)
    | summarize historicalCount = count() by UserAgent, RecordType, Operation;
let ReplacedData =
    OfficeActivity
    | where TimeGenerated between(ago(starttime) .. ago(endtime))
    | where RecordType =~ szSharePointFileOperation
    | where Operation in~ (szOperations)
    | extend replaced=replace_regex(UserId, @'([^\\]*)@gmail.com', @'\1')
    | project replaced;
let recentActivity = OfficeActivity
    | where RecordType =~ szSharePointFileOperation
    | where Operation in~ (szOperations)
    | where TimeGenerated > ago(endtime)
    | where isnotempty(UserAgent)
    | summarize min(Start_Time), max(Start_Time), recentCount = count() by UserAgent, RecordType, Operation;
let RareUserAgent = recentActivity
    | join kind = leftanti (historicalActivity) on UserAgent
    | order by recentCount desc, UserAgent
    // More than 5 downloads/uploads from a new user agent today
    | where recentCount > threshold;
//OfficeActivity 
OfficeActivity 
| where TimeGenerated > ago(endtime) 
| where RecordType =~ szSharePointFileOperation 
| where Operation in~ (szOperations)
| where isnotempty(UserAgent)
| where Site_Url contains (ReplacedData)
| join kind= inner (RareUserAgent)
    on UserAgent, RecordType, Operation    
| where Start_Time between(min_Start_Time .. max_Start_Time)
| summarize StartTimeUtc = min(min_Start_Time), EndTimeUtc = max(max_Start_Time)
    by
    RecordType,
    Operation,
    UserAgent,
    UserType,
    UserId,
    ClientIP,
    OfficeWorkload,
    Site_Url,
    OfficeObjectId,
    UserAgentSeenCount = recentCount
| extend
    timestamp = StartTimeUtc,
    AccountCustomEntity = UserId,
    IPCustomEntity = ClientIP,
    URLCustomEntity = Site_Url
| extend replaced=replace_regex(UserId, @'([^\\]*)@gmail.com', @'\1')
| order by UserAgentSeenCount desc, UserAgent asc, Operation asc, UserId asc, replaced

 

Any idea how to get this query to work correctly. Or another way to come up with a solution.

 

Thanks.

Regards.

2 Replies
| where Site_Url in (ReplacedData)

You should use "in" rather than "contains" for the above line

Hi @Clive_Watson 

 

But the problem is that I would need that the string is not equal but that it is with the sentence "contains" or similar, because when using "IN" I am indicating that the parameter is equal and I don't want it to be equal but to contain it, because all the parameters are not going to be equal.

 

Regards.