Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion src/common/utils/events.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { AnalyticsEvent } from '../api/analyticsEvent';
import { Environment } from '../api/environment';
import { TelemetryEvent } from '../api/telemetry';
import { anonymizeFilePaths } from './telemetryUtils';

/**
* Enhances a `TelemetryEvent` by injecting environmental data to its properties and context
Expand Down Expand Up @@ -95,6 +96,7 @@ import { TelemetryEvent } from '../api/telemetry';
*/
export const IGNORED_USERS = ['user', 'gitpod', 'theia', 'vscode', 'redhat']
export const IGNORED_PROPERTIES = ['extension_name', 'extension_version', 'app_name', 'app_version', 'app_kind', 'app_remote', 'app_host', 'browser_name', 'browser_version', '']
export const REDACTED_PATH_PROPERTIES = [/error/, /message/, /stacktrace/, /exception/]

export function transform(event: TelemetryEvent, userId: string, environment: Environment): AnalyticsEvent {
//Inject Client name and version, Extension id and version, and timezone to the event properties
Expand Down Expand Up @@ -177,9 +179,14 @@ function sanitize(properties: any, environment: Environment): any {
continue;
}
const isObj = isObject(rawProperty);

let sanitizedProperty = isObj ? JSON.stringify(rawProperty) : rawProperty;

sanitizedProperty = (sanitizedProperty as string).replace(usernameRegexp, '_username_');
if (REDACTED_PATH_PROPERTIES.some(rpp => rpp.test(p))) {
sanitizedProperty = anonymizeFilePaths(sanitizedProperty as string);
}

sanitizedProperty = sanitizedProperty.replace(usernameRegexp, '_username_');
if (isObj) {
//let's try to deserialize into a sanitized object
try {
Expand Down
67 changes: 67 additions & 0 deletions src/common/utils/telemetryUtils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
// Copied/adapted from https://github.com/microsoft/vscode/blob/6115140fb0657d86350c2de8bcf53e61c731d8cd/src/vs/platform/telemetry/common/telemetryUtils.ts

// Regex patterns for path sanitization
const NODE_MODULES_REGEX = /[\\\/]?(node_modules|node_modules\.asar)[\\\/]/;
const FILE_REGEX_PATTERN = /(file:\/\/)?([a-zA-Z]:(\\\\|\\|\/)|(\\\\|\\|\/))?([\w-\._]+(\\\\|\\|\/))+[\w-\._]+/g;

/**
* Cleans a given stack of possible paths
* @param stack The stack to sanitize
* @param cleanupPatterns Cleanup patterns to remove from the stack
* @returns The cleaned stack
*/
export function anonymizeFilePaths(stack: string): string {

// Fast check to see if it is a file path to avoid doing unnecessary heavy regex work
if (!stack || (!stack.includes('/') && !stack.includes('\\'))) {
return stack;
}

// Create a new regex instance for this function call to avoid lastIndex mutation issues
const fileRegex = new RegExp(FILE_REGEX_PATTERN);
let updatedStack = '';
let lastIndex = 0;

while (true) {
const result = fileRegex.exec(stack);
if (!result) {
break;
}

// Check if this is a node_modules path
const isNodeModules = NODE_MODULES_REGEX.test(result[0]);

// anoynimize user file paths that do not need to be retained or cleaned up.
if (!isNodeModules) {
updatedStack += stack.substring(lastIndex, result.index) + '<REDACTED: user-file-path>';
} else {
// For node_modules paths, anonymize the user part but preserve the node_modules part
const match = result[0];
const nodeModulesMatch = match.match(NODE_MODULES_REGEX);
if (nodeModulesMatch) {
const nodeModulesIndex = match.indexOf(nodeModulesMatch[0]);
// If the path starts with node_modules (no user part), preserve the entire path
if (nodeModulesIndex === 0) {
updatedStack += stack.substring(lastIndex, fileRegex.lastIndex);
} else {
// Otherwise, anonymize the user part and preserve the node_modules part
const nodeModulesPart = match.substring(nodeModulesIndex);
updatedStack += stack.substring(lastIndex, result.index) + '<REDACTED: user-file-path>' + nodeModulesPart;
}
} else {
// Fallback: preserve the original text
updatedStack += stack.substring(lastIndex, fileRegex.lastIndex);
}
}
lastIndex = fileRegex.lastIndex;
}
if (lastIndex < stack.length) {
updatedStack += stack.substring(lastIndex);
}

return updatedStack;
}
60 changes: 44 additions & 16 deletions src/tests/utils/events.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ import { TelemetryEvent } from '../../common/api/telemetry';

const env: Environment = {
application: {
name:'SuperCode',
version:'6.6.6'
name: 'SuperCode',
version: '6.6.6'
},
extension: {
name: 'my-ext',
version: '1.2.3'
},
username:'Fred',
username: 'Fred',
platform: {
name: 'DeathStar II'
},
Expand All @@ -23,9 +23,9 @@ const USER_ID = "1234";
suite('Test events enhancements', () => {
test('should inject environment data', async () => {
const event: TelemetryEvent = {
name:'Something',
name: 'Something',
properties: {
foo: 'bar',
foo: 'http://bar',
}
}

Expand All @@ -34,19 +34,19 @@ suite('Test events enhancements', () => {
assert.strictEqual(betterEvent.properties.app_version, '6.6.6');
assert.strictEqual(betterEvent.properties.extension_name, 'my-ext');
assert.strictEqual(betterEvent.properties.extension_version, '1.2.3');
assert.strictEqual(betterEvent.properties.foo, 'bar');
assert.strictEqual(betterEvent.properties.foo, 'http://bar');
assert.strictEqual(betterEvent.context.ip, '0.0.0.0');

});

test('should anonymize data', async () => {
const event: TelemetryEvent = {
name:'Something',
name: 'Something',
properties: {
foo: 'Fred is Fred',
qty: 10,
active: false,
bar: 'That c:\\Fred\\bar looks like a path',
bar: 'That c:\\Fred\\bar looks like a path, but is not fully anonymized',
error: 'An error occurred in /Users/Fred/foo/bar.txt! But we\'re fine',
multiline: 'That url file://Fred/bar.txt is gone!\nNot that c:\\user\\bar though',
obj: {
Expand All @@ -61,19 +61,47 @@ suite('Test events enhancements', () => {
assert.strictEqual(betterEvent.properties.qty, 10);
assert.strictEqual(betterEvent.properties.active, false);
assert.strictEqual(betterEvent.properties.foo, '_username_ is _username_');
assert.strictEqual(betterEvent.properties.bar, 'That c:\\_username_\\bar looks like a path');
assert.strictEqual(betterEvent.properties.error, 'An error occurred in /Users/_username_/foo/bar.txt! But we\'re fine');
assert.strictEqual(betterEvent.properties.bar, 'That c:\\_username_\\bar looks like a path, but is not fully anonymized');
assert.strictEqual(betterEvent.properties.error, 'An error occurred in <REDACTED: user-file-path>! But we\'re fine');
assert.strictEqual(betterEvent.properties.multiline, 'That url file://_username_/bar.txt is gone!\nNot that c:\\user\\bar though');
assert.strictEqual(betterEvent.properties.obj.q, 'Who is _username_?');
assert.strictEqual(betterEvent.properties.obj.a, '_username_ who?');
});

test('should anonymize stacktraces', async () => {
const stacktrace = `
An internal error occurred during: "Updating workspace".
Tree element '/myprojectname/target/classes' not found.
org.eclipse.core.internal.dtree.ObjectNotFoundException: Tree element '/myprojectname/target/classes' not found.
at org.eclipse.core.internal.dtree.AbstractDataTree.handleNotFound(AbstractDataTree.java:183)
at org.eclipse.core.internal.dtree.DeltaDataTree.getData(DeltaDataTree.java:572)
at org.eclipse.core.internal.dtree.DeltaDataTree.naiveCopyCompleteSubtree(DeltaDataTree.java:757)`;

const expectedStacktrace = `
An internal error occurred during: "Updating workspace".
Tree element '<REDACTED: user-file-path>' not found.
org.eclipse.core.internal.dtree.ObjectNotFoundException: Tree element '<REDACTED: user-file-path>' not found.
at org.eclipse.core.internal.dtree.AbstractDataTree.handleNotFound(AbstractDataTree.java:183)
at org.eclipse.core.internal.dtree.DeltaDataTree.getData(DeltaDataTree.java:572)
at org.eclipse.core.internal.dtree.DeltaDataTree.naiveCopyCompleteSubtree(DeltaDataTree.java:757)`;

const event: TelemetryEvent = {
name: 'Something',
properties: {
stacktrace: stacktrace
}
}

const betterEvent = utils.transform(event, USER_ID, env);
assert.strictEqual(betterEvent.properties.stacktrace, expectedStacktrace);
});

test('should not anonymize special usernames', async () => {
utils.IGNORED_USERS.forEach((user) => {
const cheEnv: Environment = {
application: {
name:'SuperCode',
version:'6.6.6'
name: 'SuperCode',
version: '6.6.6'
},
extension: {
name: 'my-ext',
Expand All @@ -86,7 +114,7 @@ suite('Test events enhancements', () => {
}

const event: TelemetryEvent = {
name:'Something',
name: 'Something',
properties: {
foo: 'vscode likes theia',
multiline: 'That gitpod \nusername is a redhat user',
Expand All @@ -102,8 +130,8 @@ suite('Test events enhancements', () => {
test('should not anonymize technical properties', async () => {
const someEnv: Environment = {
application: {
name:'codename',
version:'codename'
name: 'codename',
version: 'codename'
},
extension: {
name: 'codename',
Expand All @@ -116,7 +144,7 @@ suite('Test events enhancements', () => {
}

const event: TelemetryEvent = {
name:'Something',
name: 'Something',
properties: {
foo: 'codename likes vscode',
multiline: 'That gitpod \ncodename is a redhat user',
Expand Down
162 changes: 162 additions & 0 deletions src/tests/utils/telemetryUtils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import * as assert from 'assert';
import { anonymizeFilePaths } from '../../common/utils/telemetryUtils';

suite('Test anonymizeFilePaths', () => {
test('should return empty string when input is empty', () => {
const result = anonymizeFilePaths('');
assert.strictEqual(result, '');
});

test('should return null when input is null', () => {
const result = anonymizeFilePaths(null as any);
assert.strictEqual(result, null);
});

test('should return undefined when input is undefined', () => {
const result = anonymizeFilePaths(undefined as any);
assert.strictEqual(result, undefined);
});

test('should return original string when no file paths are present', () => {
const input = 'This is just a regular string without any paths';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, input);
});

test('should return original string when no slashes are present', () => {
const input = 'This string has no forward or backslashes';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, input);
});

test('should anonymize Unix file paths', () => {
const input = 'Error in /Users/john/workspace/project/src/file.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should anonymize Windows file paths', () => {
const input = 'Error in C:\\Users\\john\\workspace\\project\\src\\file.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should anonymize Windows file paths with forward slashes', () => {
const input = 'Error in C:/Users/john/workspace/project/src/file.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should anonymize file:// URLs', () => {
const input = 'Error in file:///Users/john/workspace/project/src/file.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should anonymize multiple file paths in same string', () => {
const input = 'Error in /Users/john/file1.ts and also in C:\\Users\\jane\\file2.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path> and also in <REDACTED: user-file-path>');
});

test('should not anonymize node_modules paths', () => {
const input = 'Error in /Users/john/node_modules/package/index.js';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>/node_modules/package/index.js');
});

test('should not anonymize node_modules.asar paths', () => {
const input = 'Error in /Users/john/node_modules.asar/package/index.js';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>/node_modules.asar/package/index.js');
});

test('should not anonymize node_modules paths with leading slash', () => {
const input = 'Error in /node_modules/package/index.js';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, input);
});

test('should not anonymize node_modules paths with backslash', () => {
const input = 'Error in \\node_modules\\package\\index.js';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, input);
});

test('should anonymize user paths but preserve node_modules paths', () => {
const input = 'Error in /Users/john/project/src/file.ts and /Users/john/project/node_modules/package/index.js';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path> and <REDACTED: user-file-path>/node_modules/package/index.js');
});

test('should handle complex stack traces', () => {
const stackTrace = `Error: Something went wrong
at Object.function (/Users/john/workspace/project/src/file.ts:10:5)
at /Users/john/workspace/project/src/other.ts:15:20
at /Users/john/node_modules/package/index.js:5:10`;

const result = anonymizeFilePaths(stackTrace);
// Note: The current implementation may include line numbers in the redacted path
// This is acceptable behavior as it still anonymizes the sensitive parts
assert(result.includes('<REDACTED: user-file-path>'));
assert(!result.includes('/Users/john/workspace/project/src/file.ts'));
assert(!result.includes('/Users/john/workspace/project/src/other.ts'));
assert(result.includes('<REDACTED: user-file-path>/node_modules/package/index.js:5:10'));
});

test('should handle paths with special characters', () => {
const input = 'Error in /Users/john/my-project (copy)/src/file.ts';
const result = anonymizeFilePaths(input);
// Note: The current implementation may split paths with spaces in parentheses
// This is acceptable behavior as it still anonymizes the sensitive parts
assert(result.includes('<REDACTED: user-file-path>'));
assert(!result.includes('/Users/john/my-project'));
});

test('should handle paths with dots and dashes', () => {
const input = 'Error in /Users/john/my-project.v2/src/file-name.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should handle relative paths', () => {
const input = 'Error in ./src/file.ts and ../other/file.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path> and <REDACTED: user-file-path>');
});

test('should handle paths without file extensions', () => {
const input = 'Error in /Users/john/workspace/project/src/file';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should handle mixed content with and without paths', () => {
const input = 'Regular text /Users/john/file.ts more text C:\\Users\\jane\\file.ts end';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Regular text <REDACTED: user-file-path> more text <REDACTED: user-file-path> end');
});

test('should handle very long paths', () => {
const longPath = '/Users/john/' + 'very/long/path/'.repeat(50) + 'file.ts';
const input = `Error in ${longPath}`;
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

test('should handle paths with Unicode characters', () => {
const input = 'Error in /Users/jöhn/workspace/project/src/file.ts';
const result = anonymizeFilePaths(input);
// Note: The current implementation may split Unicode paths
// This is acceptable behavior as it still anonymizes the sensitive parts
assert(result.includes('<REDACTED: user-file-path>'));
assert(!result.includes('/Users/jöhn'));
});

test('should handle Windows UNC paths', () => {
const input = 'Error in \\\\server\\share\\file.ts';
const result = anonymizeFilePaths(input);
assert.strictEqual(result, 'Error in <REDACTED: user-file-path>');
});

});