-
Notifications
You must be signed in to change notification settings - Fork 2k
Rust: Add LiteralExpr sub classes
#19475
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| /** Provides sub classes of literal expressions. */ | ||
|
|
||
| private import internal.LiteralExprImpl | ||
|
|
||
| final class CharLiteralExpr = Impl::CharLiteralExpr; | ||
|
|
||
| final class StringLiteralExpr = Impl::StringLiteralExpr; | ||
|
|
||
| final class NumberLiteralExpr = Impl::NumberLiteralExpr; | ||
|
|
||
| final class IntegerLiteralExpr = Impl::IntegerLiteralExpr; | ||
|
|
||
| final class FloatLiteralExpr = Impl::FloatLiteralExpr; | ||
|
|
||
| final class BooleanLiteralExpr = Impl::BooleanLiteralExpr; | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -42,4 +42,191 @@ module Impl { | |||||||||||||||||||||
| ) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * A [character literal][1]. For example: | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * ```rust | ||||||||||||||||||||||
| * 'x'; | ||||||||||||||||||||||
| * ``` | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * [1]: https://doc.rust-lang.org/reference/tokens.html#character-literals | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| class CharLiteralExpr extends LiteralExpr { | ||||||||||||||||||||||
| CharLiteralExpr() { | ||||||||||||||||||||||
| // todo: proper implementation | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What needs to be done to make this a "proper implementation"? I was initially concerned about escaped quote characters, but I think with lazy matching they may just work. Testing will confirm.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The logic is a lot simpler than the official spec, which is why I added the comment.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest we remove the comment, or rephrase it away from "todo", if there is nothing specific we want to do here. |
||||||||||||||||||||||
| this.getTextValue().regexpMatch("'.*'") | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| override string getAPrimaryQlClass() { result = "CharLiteralExpr" } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * A [string literal][1]. For example: | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * ```rust | ||||||||||||||||||||||
| * "Hello, world!"; | ||||||||||||||||||||||
| * ``` | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * [1]: https://doc.rust-lang.org/reference/tokens.html#string-literals | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| class StringLiteralExpr extends LiteralExpr { | ||||||||||||||||||||||
| StringLiteralExpr() { | ||||||||||||||||||||||
| // todo: proper implementation | ||||||||||||||||||||||
| this.getTextValue().regexpMatch("r?#*\".*\"#*") | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What needs to be done to make this a "proper implementation"? I think it's OK that this matches some invalid string literals (e.g.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as for char literals, the spec is more involved, but perhaps as you say the above is good enough. |
||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| override string getAPrimaryQlClass() { result = "StringLiteralExpr" } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * A number literal. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| abstract class NumberLiteralExpr extends LiteralExpr { } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // https://doc.rust-lang.org/reference/tokens.html#integer-literals | ||||||||||||||||||||||
| private module IntegerLiteralRegexs { | ||||||||||||||||||||||
| bindingset[s] | ||||||||||||||||||||||
| string paren(string s) { result = "(" + s + ")" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string integerLiteral() { | ||||||||||||||||||||||
| result = | ||||||||||||||||||||||
| paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + | ||||||||||||||||||||||
| paren(hexLiteral())) + paren(suffix()) + "?" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| private string suffix() { result = "u8|i8|u16|i16|u32|i32|u64|i64|u128|i128|usize|isize" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string decLiteral() { result = decDigit() + "(" + decDigit() + "|_)*" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string binLiteral() { | ||||||||||||||||||||||
| result = "0b(" + binDigit() + "|_)*" + binDigit() + "(" + binDigit() + "|_)*" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string octLiteral() { | ||||||||||||||||||||||
| result = "0o(" + octDigit() + "|_)*" + octDigit() + "(" + octDigit() + "|_)*" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string hexLiteral() { | ||||||||||||||||||||||
| result = "0x(" + hexDigit() + "|_)*" + hexDigit() + "(" + hexDigit() + "|_)*" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string decDigit() { result = "[0-9]" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string binDigit() { result = "[01]" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string octDigit() { result = "[0-7]" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string hexDigit() { result = "[0-9a-fA-F]" } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * An [integer literal][1]. For example: | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * ```rust | ||||||||||||||||||||||
| * 42; | ||||||||||||||||||||||
| * ``` | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * [1]: https://doc.rust-lang.org/reference/tokens.html#integer-literals | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| class IntegerLiteralExpr extends NumberLiteralExpr { | ||||||||||||||||||||||
| IntegerLiteralExpr() { this.getTextValue().regexpMatch(IntegerLiteralRegexs::integerLiteral()) } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * Get the suffix of this integer literal, if any. | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * For example, `42u8` has the suffix `u8`. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| string getSuffix() { | ||||||||||||||||||||||
| exists(string s, string reg, int last | | ||||||||||||||||||||||
| s = this.getTextValue() and | ||||||||||||||||||||||
| reg = IntegerLiteralRegexs::integerLiteral() and | ||||||||||||||||||||||
| last = strictcount(reg.indexOf("(")) and | ||||||||||||||||||||||
| result = s.regexpCapture(reg, last) | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a bit of a scary approach, I'm not convinced it will be reliable. Can't we just match against
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would really like to be able to just reuse the existing regexes; one way this could be made better is if QL supported named groups.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In general the number of opening brackets isn't going to be equal to the number of capture groups, for example the regex
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree. Would it be possible to write the regex'es such that the thing we want is always in a fixed capture group? Then we could write something like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I can hard-code the numbers instead.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about a helper predicate for getting the last capture group? Something like: bindingset[s, reg]
string regexpCaptureLast(string s, string reg) {
exists(int i | result = s.regexpCapture(reg, i) and not exists(s.regexpCapture(reg, i + 1)))
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that will work; it will give you the last group that matches, but the suffix part is optional. |
||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| override string getAPrimaryQlClass() { result = "IntegerLiteralExpr" } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // https://doc.rust-lang.org/reference/tokens.html#floating-point-literals | ||||||||||||||||||||||
| private module FloatLiteralRegexs { | ||||||||||||||||||||||
| private import IntegerLiteralRegexs | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string floatLiteral() { | ||||||||||||||||||||||
| result = | ||||||||||||||||||||||
| paren(decLiteral() + "\\.") + "|" + paren(floatLiteralSuffix1()) + "|" + | ||||||||||||||||||||||
| paren(floatLiteralSuffix2()) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string floatLiteralSuffix1() { | ||||||||||||||||||||||
| result = decLiteral() + "\\." + decLiteral() + paren(suffix()) + "?" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string floatLiteralSuffix2() { | ||||||||||||||||||||||
| result = | ||||||||||||||||||||||
| decLiteral() + paren("\\." + decLiteral()) + "?" + paren(exponent()) + paren(suffix()) + "?" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string integerSuffixLiteral() { | ||||||||||||||||||||||
| result = | ||||||||||||||||||||||
| paren(paren(decLiteral()) + "|" + paren(binLiteral()) + "|" + paren(octLiteral()) + "|" + | ||||||||||||||||||||||
| paren(hexLiteral())) + paren(suffix()) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| private string suffix() { result = "f32|f64" } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| string exponent() { | ||||||||||||||||||||||
| result = "(e|E)(\\+|-)?(" + decDigit() + "|_)*" + decDigit() + "(" + decDigit() + "|_)*" | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * A [floating-point literal][1]. For example: | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * ```rust | ||||||||||||||||||||||
| * 42.0; | ||||||||||||||||||||||
| * ``` | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * [1]: https://doc.rust-lang.org/reference/tokens.html#floating-point-literals | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| class FloatLiteralExpr extends NumberLiteralExpr { | ||||||||||||||||||||||
| FloatLiteralExpr() { | ||||||||||||||||||||||
| this.getTextValue() | ||||||||||||||||||||||
| .regexpMatch([ | ||||||||||||||||||||||
| FloatLiteralRegexs::floatLiteral(), FloatLiteralRegexs::integerSuffixLiteral() | ||||||||||||||||||||||
| ]) and | ||||||||||||||||||||||
|
hvitved marked this conversation as resolved.
Outdated
|
||||||||||||||||||||||
| // E.g. `0x01_f32` is an integer, not a float | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm confused by the cases
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is exactly the restriction on this line that makes it an integer only; otherwise it would be consider a float as well.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't see how
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||||||||||||||||||||||
| not this instanceof IntegerLiteralExpr | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * Get the suffix of this floating-point literal, if any. | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * For example, `42.0f32` has the suffix `f32`. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| string getSuffix() { | ||||||||||||||||||||||
| exists(string s, string reg, int last | | ||||||||||||||||||||||
| s = this.getTextValue() and | ||||||||||||||||||||||
| reg = | ||||||||||||||||||||||
| [ | ||||||||||||||||||||||
| FloatLiteralRegexs::floatLiteralSuffix1(), FloatLiteralRegexs::floatLiteralSuffix2(), | ||||||||||||||||||||||
| FloatLiteralRegexs::integerSuffixLiteral() | ||||||||||||||||||||||
| ] and | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using the helper predicate above we could instead do a disjunction here:
Suggested change
|
||||||||||||||||||||||
| last = strictcount(reg.indexOf("(")) and | ||||||||||||||||||||||
| result = s.regexpCapture(reg, last) | ||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thoughts as for |
||||||||||||||||||||||
| ) | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| override string getAPrimaryQlClass() { result = "FloatLiteralExpr" } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * A Boolean literal. Either `true` or `false`. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| class BooleanLiteralExpr extends LiteralExpr { | ||||||||||||||||||||||
| BooleanLiteralExpr() { this.getTextValue() = ["false", "true"] } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| override string getAPrimaryQlClass() { result = "BooleanLiteralExpr" } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| charLiteral | ||
| | literal.rs:2:5:2:7 | 'a' | | ||
| | literal.rs:3:5:3:7 | 'b' | | ||
| stringLiteral | ||
| | literal.rs:8:5:8:9 | "foo" | | ||
| | literal.rs:9:5:9:10 | r"foo" | | ||
| | literal.rs:10:5:10:13 | "\\"foo\\"" | | ||
| | literal.rs:11:5:11:14 | r#""foo""# | | ||
| | literal.rs:13:5:13:18 | "foo #\\"# bar" | | ||
| | literal.rs:14:5:14:22 | r##"foo #"# bar"## | | ||
| | literal.rs:16:5:16:10 | "\\x52" | | ||
| | literal.rs:17:5:17:7 | "R" | | ||
| | literal.rs:18:5:18:8 | r"R" | | ||
| | literal.rs:19:5:19:11 | "\\\\x52" | | ||
| | literal.rs:20:5:20:11 | r"\\x52" | | ||
| integerLiteral | ||
| | literal.rs:25:5:25:7 | 123 | | | ||
| | literal.rs:26:5:26:10 | 123i32 | i32 | | ||
| | literal.rs:27:5:27:10 | 123u32 | u32 | | ||
| | literal.rs:28:5:28:11 | 123_u32 | u32 | | ||
| | literal.rs:30:5:30:8 | 0xff | | | ||
| | literal.rs:31:5:31:11 | 0xff_u8 | u8 | | ||
| | literal.rs:32:5:32:12 | 0x01_f32 | | | ||
| | literal.rs:33:5:33:11 | 0x01_e3 | | | ||
| | literal.rs:35:5:35:8 | 0o70 | | | ||
| | literal.rs:36:5:36:12 | 0o70_i16 | i16 | | ||
| | literal.rs:38:5:38:25 | 0b1111_1111_1001_0000 | | | ||
| | literal.rs:39:5:39:28 | 0b1111_1111_1001_0000i64 | i64 | | ||
| | literal.rs:40:5:40:15 | 0b________1 | | | ||
| | literal.rs:42:5:42:10 | 0usize | usize | | ||
| | literal.rs:45:5:46:10 | 128_i8 | i8 | | ||
| | literal.rs:47:5:48:10 | 256_u8 | u8 | | ||
| floatLiteral | ||
| | literal.rs:53:5:53:8 | 5f32 | f32 | | ||
| | literal.rs:55:5:55:12 | 123.0f64 | f64 | | ||
| | literal.rs:56:5:56:10 | 0.1f64 | f64 | | ||
| | literal.rs:57:5:57:10 | 0.1f32 | f32 | | ||
| | literal.rs:58:5:58:14 | 12E+99_f64 | f64 | | ||
| | literal.rs:59:18:59:19 | 2. | | | ||
| booleanLiteral | ||
| | literal.rs:63:5:63:8 | true | | ||
| | literal.rs:64:5:64:9 | false | |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| import rust | ||
|
|
||
| query predicate charLiteral(CharLiteralExpr e) { any() } | ||
|
|
||
| query predicate stringLiteral(StringLiteralExpr e) { any() } | ||
|
|
||
| query predicate integerLiteral(IntegerLiteralExpr e, string suffix) { | ||
| suffix = concat(e.getSuffix()) | ||
| } | ||
|
|
||
| query predicate floatLiteral(FloatLiteralExpr e, string suffix) { suffix = concat(e.getSuffix()) } | ||
|
|
||
| query predicate booleanLiteral(BooleanLiteralExpr e) { any() } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| fn char_literals() { | ||
| 'a'; | ||
| 'b'; | ||
|
geoffw0 marked this conversation as resolved.
|
||
| } | ||
|
|
||
| fn string_literals() { | ||
| // from https://doc.rust-lang.org/reference/tokens.html#string-literals | ||
| "foo"; | ||
| r"foo"; // foo | ||
| "\"foo\""; | ||
| r#""foo""#; // "foo" | ||
|
|
||
| "foo #\"# bar"; | ||
| r##"foo #"# bar"##; // foo #"# bar | ||
|
|
||
| "\x52"; | ||
| "R"; | ||
| r"R"; // R | ||
| "\\x52"; | ||
| r"\x52"; // \x52 | ||
| } | ||
|
|
||
| fn integer_literals() { | ||
| // from https://doc.rust-lang.org/reference/tokens.html#integer-literals | ||
| 123; | ||
| 123i32; | ||
| 123u32; | ||
| 123_u32; | ||
|
|
||
| 0xff; | ||
| 0xff_u8; | ||
| 0x01_f32; // integer 7986, not floating-point 1.0 | ||
| 0x01_e3; // integer 483, not floating-point 1000.0 | ||
|
|
||
| 0o70; | ||
| 0o70_i16; | ||
|
|
||
| 0b1111_1111_1001_0000; | ||
| 0b1111_1111_1001_0000i64; | ||
| 0b________1; | ||
|
|
||
| 0usize; | ||
|
|
||
| // These are too big for their type, but are accepted as literal expressions. | ||
| #[allow(overflowing_literals)] | ||
| 128_i8; | ||
| #[allow(overflowing_literals)] | ||
| 256_u8; | ||
| } | ||
|
|
||
| fn float_literals() { | ||
| // This is an integer literal, accepted as a floating-point literal expression. | ||
| 5f32; | ||
|
|
||
| 123.0f64; | ||
| 0.1f64; | ||
| 0.1f32; | ||
| 12E+99_f64; | ||
| let x: f64 = 2.; | ||
| } | ||
|
|
||
| fn boolean_literals() { | ||
| true; | ||
| false; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does the
Extsuffix in the filename indicate? I can't find any existing files with that suffix.My first thought was it meant that these could be _ext_ended, but that's not the case.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it just means the file is an extension to the functionality provided in
LiteralExpr.qll.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Indeed.