All files / languages/turtle turtle-document.ts

98.83% Statements 170/172
88.7% Branches 110/124
100% Functions 20/20
99.37% Lines 160/161

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456                                                  121x   121x     121x   121x       1x             2x             134x       3x   3x     2x 1x   1x   1x     2x         6x   6x 1x     5x       2x       2x     1x                       16x   16x     8x 8x   8x     8x           4x 4x   4x 2x   2x                   2x 2x     2x 2x   2x 2x       2x   2x 1x 1x 1x   1x   1x                                     55x 55x 55x   55x 55x 55x       55x 55x   55x                   34x 34x   34x 39x   39x 1x     38x 2x       36x 16x       20x 1x       19x 6x       11x                 23x   23x                 5x   5x   1x 4x   1x     3x 3x   3x 3x   3x     3x 2x         1x                 51x 51x 51x 51x 51x 51x   51x       51x 107x     10x     10x 8x   8x 8x   10x         41x       10x     41x   41x       16x   16x 16x     16x 16x 16x 16x     15x   15x 1x     15x 15x 15x 15x     9x 9x 9x       2x   2x   1x 1x     1x   1x       1x 1x       107x     106x           17x   17x 7x   7x 7x     7x         33x 33x     33x   33x       40x 9x   9x   7x   7x   4x   4x         40x 9x   9x   7x   7x   4x   4x   3x   3x   2x     2x             1x   1x          
import * as vscode from 'vscode';
import { Position } from 'vscode-languageserver-types';
import { Quad_Subject, Quad_Object, Quad_Predicate } from '@rdfjs/types';
import { Store, Uri, _OWL, _RDF, _RDFS, _SH, _SKOS, _SKOS_XL, RDF } from '@faubulous/mentor-rdf';
import { IToken, RdfSyntax, TurtleReader, TurtleParser, RdfToken } from '@faubulous/mentor-rdf-parsers';
import { container } from 'tsyringe';
import { ServiceToken } from '@src/services/tokens';
import { DocumentContext } from '@src/services/document/document-context';
import { WorkspaceUri } from '@src/providers/workspace-uri';
import {
	countLeadingWhitespace,
	countTrailingWhitespace,
	getIriFromIriReference,
	getIriFromPrefixedName,
	getIriFromToken,
	getNamespaceDefinition,
	getTokenPosition
} from '@src/utilities';
 
/**
 * A document context for Turtle and TriG documents.
 */
export class TurtleDocument extends DocumentContext {
	readonly syntax: RdfSyntax;
 
	private _inferenceExecuted = false;
 
	private _tokens: IToken[] = [];
 
	constructor(uri: vscode.Uri, syntax: RdfSyntax) {
		super(uri);
 
		this.syntax = syntax;
	}
 
	get isLoaded(): boolean {
		return this._tokens.length > 0 && this.graphs.length > 0;
	}
 
	/**
	 * Indicates whether tokens have been set for this document.
	 */
	get hasTokens(): boolean {
		return this._tokens.length > 0;
	}
 
	/**
	 * All tokens in the document.
	 */
	get tokens(): IToken[] {
		return this._tokens;
	}
 
	public override getIriAtPosition(position: vscode.Position): string | undefined {
		const token = this.getTokenAtPosition(position);
 
		if (token) {
			let iri;
 
			if (this.isPrefixTokenAtPosition(token, position)) {
				const prefix = token.image.split(":")[0];
 
				iri = this.namespaces[prefix];
			} else {
				iri = getIriFromToken(this.namespaces, token);
			}
 
			return iri;
		}
	}
 
	public override getLiteralAtPosition(position: vscode.Position): string | undefined {
		const token = this.getTokenAtPosition(position);
 
		if (!token || !token.tokenType) {
			return undefined;
		}
 
		switch (token.tokenType.name) {
			// Display the literal strings without the quotes for improved readability for long strings.
			case RdfToken.STRING_LITERAL_SINGLE_QUOTE.name:
			case RdfToken.STRING_LITERAL_QUOTE.name: {
				return token.image.slice(1, -1);
			}
			case RdfToken.STRING_LITERAL_LONG_QUOTE.name:
			case RdfToken.STRING_LITERAL_LONG_SINGLE_QUOTE.name: {
				return token.image.slice(3, -3);
			}
			default: {
				return undefined;
			}
		}
	}
 
	/**
	 * Indicates whether the token at the given position is a namespace prefix.
	 * @param token A token.
	 * @param position The position in the document.
	 * @returns `true` if the cursor is on the prefix of the token, `false` otherwise.
	 */
	isPrefixTokenAtPosition(token: IToken, position: vscode.Position) {
		const { start } = getTokenPosition(token);
 
		switch (token.tokenType.name) {
			case RdfToken.PNAME_NS.name:
			case RdfToken.PNAME_LN.name: {
				const i = token.image.indexOf(":");
				const n = position.character - start.character;
 
				return n <= i;
			}
			default: {
				return false;
			}
		}
	}
 
	public override async infer(): Promise<void> {
		const store = container.resolve<Store>(ServiceToken.Store);
		const reasoner = store.reasoner;
 
		if (reasoner && !this._inferenceExecuted) {
			this._inferenceExecuted = true;
 
			store.executeInference(WorkspaceUri.toCanonicalString(this.graphIri));
		}
	}
 
	/**
	 * Loads triples into the triple store using existing tokens.
	 * This method assumes tokens have already been set via setTokens().
	 * @param data The file content (not used, parsing uses existing tokens).
	 */
	public override async loadTriples(data: string): Promise<void> {
		try {
			const store = container.resolve<Store>(ServiceToken.Store);
			// Initialize the graphs *before* trying to load the document so 
			// that they are initialized even when loading the document fails.
			const graphUri = WorkspaceUri.toCanonicalString(this.graphIri);
			const g = store.dataFactory.namedNode(graphUri);
 
			this.graphs.length = 0;
			this.graphs.push(graphUri);
 
			// Only updates the existing graphs if the document was parsed successfully.
			// Uses existing tokens that were set by the language server.
			const cst = new TurtleParser().parse(this._tokens);
 
			for (const q of new TurtleReader().visit(cst)) {
				const s = q.subject as Quad_Subject;
				const p = q.predicate as Quad_Predicate;
				const o = q.object as Quad_Object;
 
				const quad = store.dataFactory.quad(s, p, o, g);
 
				store.add(quad);
			}
		} catch (e) {
			// This is not a critical error because the graph might be invalid.
		}
	}
 
	override async onDidChangeDocument(e: vscode.TextDocumentChangeEvent): Promise<void> {
		// Auto-prefix definition is handled by TurtleAutoDefinePrefixProvider
		// which waits for fresh tokens from the language server before processing.
	}
 
	/**
	 * Get the location of a token in a document.
	 * @param documentUri The URI of the document.
	 * @param token A token.
	 */
	getRangeFromToken(token: IToken): vscode.Range {
		// The token positions are 1-based, whereas the editor positions / locations are 0-based.
		const startLine = token.startLine ? token.startLine - 1 : 0;
		const startCharacter = token.startColumn ? token.startColumn - 1 : 0;
		const startWhitespace = countLeadingWhitespace(token.image);
 
		const endLine = token.endLine ? token.endLine - 1 : 0;
		const endCharacter = token.endColumn ? token.endColumn - 1 : 0;
		const endWhitespace = countTrailingWhitespace(token.image);
 
		// Note: The millan parser incorrectly parses some tokens with leading and trailing whitespace.
		// We account for this by adjusting the start and end positions.
		const start = new vscode.Position(startLine, startCharacter + startWhitespace);
		const end = new vscode.Position(endLine, endCharacter - endWhitespace).translate(0, 1);
 
		return new vscode.Range(start, end);
	}
 
	/**
	 * Gets the index of the token at a given position.
	 * @param position A position in the document.
	 * @returns The index of the token at the given position, or -1 if no token is found.
	 */
	getTokenIndexAtPosition(position: Position): number {
		// The tokens are 1-based, but the position is 0-based.
		const l = position.line + 1;
		const n = position.character;
 
		for (let i = 0; i < this.tokens.length; i++) {
			const token = this.tokens[i];
 
			if (!token.startLine || !token.endLine || !token.startColumn || !token.endColumn) {
				continue;
			}
 
			if (token.startLine > l) {
				break;
			}
 
			// If the token starts and ends on the same line and column, then the position must be inside the token.
			if (token.startLine == l && token.endLine == l && token.startColumn <= n && n <= token.endColumn) {
				return i;
			}
 
			// If we have a multi-line token and the position is between start and end, then we have a match.
			if (token.startLine < l && token.endLine > l) {
				return i;
			}
 
			// If the token ends on the same line and the position is before the end column, then we have a match.
			if (token.endLine == l && token.endColumn >= n) {
				return i;
			}
		}
 
		return -1;
	}
 
	/**
	 * Gets the first token at a given position.
	 * @param position A position in the document.
	 * @returns The token at the given position, if it exists, undefined otherwise.
	 */
	getTokenAtPosition(position: Position): IToken | undefined {
		const index = this.getTokenIndexAtPosition(position);
 
		return index >= 0 ? this.tokens[index] : undefined;
	}
 
	/**
	 * Gets the token that precedes the given position.
	 * @param position A position in the document.
	 * @returns The token before the given position, if it exists, undefined otherwise.
	 */
	getTokenBeforePosition(position: Position): IToken | undefined {
		const index = this.getTokenIndexAtPosition(position);
 
		if (index > 0) {
			// Found token at position, return previous one
			return this.tokens[index - 1];
		} else if (index === 0) {
			// At first token, no previous token
			return undefined;
		} else {
			// No token at position (index === -1), find last token before this position
			const l = position.line + 1;
			const n = position.character;
 
			for (let i = this.tokens.length - 1; i >= 0; i--) {
				const token = this.tokens[i];
 
				Iif (!token.endLine || !token.endColumn) continue;
 
				// If token ends before the cursor position, it's the one we want
				if (token.endLine < l || (token.endLine === l && token.endColumn <= n)) {
					return token;
				}
			}
		}
 
		return undefined;
	}
 
	/**
	 * Set the tokens of the document and update the namespaces, references, type assertions and type definitions.
	 * @param tokens An array of tokens.
	 * @note The registration is executed on a token level so that document types are supported that do not produce triples.
	 */
	setTokens(tokens: IToken[]): void {
		this.namespaces = {};
		this.namespaceDefinitions = {};
		this.subjects = {};
		this.references = {};
		this.typeAssertions = {};
		this.typeDefinitions = {};
 
		this._tokens = tokens;
 
		let previousToken: IToken | undefined;
 
		tokens.forEach((t: IToken, i: number) => {
			switch (t.tokenType.name) {
				case RdfToken.PREFIX.name:
				case RdfToken.TTL_PREFIX.name: {
					const ns = getNamespaceDefinition(this.tokens, t);
 
					// Only set the namespace if it is preceeded by a prefix keyword.
					if (ns) {
						const r = this.getRangeFromToken(t);
 
						this.namespaces[ns.prefix] = ns.uri;
						this.namespaceDefinitions[ns.uri] = [r];
					}
					break;
				}
				case RdfToken.PNAME_NS.name:
				case RdfToken.PNAME_LN.name: {
					// Skip processing prefixes and iris in prefix definitions..
					switch (previousToken?.tokenType.name) {
						case RdfToken.PREFIX.name:
						case RdfToken.TTL_PREFIX.name:
						case RdfToken.PNAME_NS.name:
							break;
					}
 
					let iri = getIriFromPrefixedName(this.namespaces, t.image);
 
					if (!iri) break;
 
					// Remove any trailing slahes or hashes so that the IRIs are comparable
					// with the vscode.Uri.toString() output.
					iri = Uri.getNormalizedUri(iri);
 
					Eif (previousToken) {
						this._registerSubject(t, iri, previousToken);
					}
 
					this._handleTypeAssertion(tokens, t, iri, i);
					this._handleTypeDefinition(tokens, t, iri, i);
					this._handleResourceReference(tokens, t, iri);
					break;
				}
				case RdfToken.IRIREF.name: {
					const iri = getIriFromIriReference(t.image);
 
					if (t.startColumn === 1 && previousToken) {
						this._registerSubject(t, iri, previousToken);
					}
 
					this._handleTypeAssertion(tokens, t, iri, i);
					this._handleTypeDefinition(tokens, t, iri, i);
					this._handleResourceReference(tokens, t, iri);
					break;
				}
				case RdfToken.A.name: {
					this._handleTypeAssertion(tokens, t, RDF.type, i);
					this._handleTypeDefinition(tokens, t, RDF.type, i);
					break;
				}
				case RdfToken.LBRACKET.name: {
					// Store the position of anonymous blank nodes so they can be revealed in the editor.
					const id = t.payload?.blankNodeId;
 
					if (!id) break;
 
					this._handleResourceReference(tokens, t, id);
					break;
				}
				case RdfToken.BLANK_NODE_LABEL.name: {
					const id = t.image;
 
					Iif (t.startColumn === 1 && previousToken) {
						this._registerSubject(t, id, previousToken);
					}
 
					this._handleResourceReference(tokens, t, id);
					break;
				}
			}
 
			if (t.tokenType.name !== RdfToken.COMMENT.name) {
				// Skip comments for previous token tracking to avoid skipping important 
				// registrations when comments are present between tokens.
				previousToken = t;
			}
		});
	}
 
	private _registerSubject(token: IToken, iriOrBlankId: string, previousToken: IToken) {
		const previousType = previousToken.tokenType.name;
 
		if (previousType === RdfToken.PERIOD.name) {
			const range = this.getRangeFromToken(token);
 
			Eif (!this.subjects[iriOrBlankId]) {
				this.subjects[iriOrBlankId] = [];
			}
 
			this.subjects[iriOrBlankId].push(range);
		}
	}
 
	private _handleResourceReference(tokens: IToken[], token: IToken, iriOrBlankId: string) {
		Eif (!this.references[iriOrBlankId]) {
			this.references[iriOrBlankId] = [];
		}
 
		const range = this.getRangeFromToken(token);
 
		this.references[iriOrBlankId].push(range);
	}
 
	private _handleTypeAssertion(tokens: IToken[], token: IToken, uri: string, index: number) {
		if (uri === RDF.type) {
			const subjectToken = tokens[index - 1];
 
			if (!subjectToken) return;
 
			const subjectUri = getIriFromToken(this.namespaces, subjectToken);
 
			if (!subjectUri) return;
 
			const range = this.getRangeFromToken(subjectToken);
 
			this.typeAssertions[subjectUri] = [range];
		}
	}
 
	private _handleTypeDefinition(tokens: IToken[], token: IToken, uri: string, index: number) {
		if (uri == RDF.type) {
			const subjectToken = tokens[index - 1];
 
			if (!subjectToken) return;
 
			const subjectUri = getIriFromToken(this.namespaces, subjectToken);
 
			if (!subjectUri) return;
 
			const objectToken = tokens[index + 1];
 
			if (!objectToken) return;
 
			const objectUri = getIriFromToken(this.namespaces, objectToken);
 
			if (!objectUri) return;
 
			const namespaceUri = Uri.getNamespaceIri(objectUri);
 
			// TODO: Make this more explicit to reduce false positives.
			switch (namespaceUri) {
				case _RDF:
				case _RDFS:
				case _OWL:
				case _SKOS:
				case _SKOS_XL:
				case _SH: {
					const range = this.getRangeFromToken(subjectToken);
 
					this.typeDefinitions[subjectUri] = [range];
				}
			}
		}
	}
}