add php support for semantic search

This commit is contained in:
KCaverly 2023-07-31 16:36:09 -04:00
parent 89edb3d1b5
commit 599f674827
7 changed files with 275 additions and 63 deletions

65
Cargo.lock generated
View file

@ -2341,7 +2341,7 @@ dependencies = [
"tree-sitter", "tree-sitter",
"tree-sitter-html", "tree-sitter-html",
"tree-sitter-rust", "tree-sitter-rust",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)", "tree-sitter-typescript",
"unindent", "unindent",
"util", "util",
"workspace", "workspace",
@ -3851,7 +3851,7 @@ dependencies = [
"text", "text",
"theme", "theme",
"tree-sitter", "tree-sitter",
"tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)", "tree-sitter-elixir",
"tree-sitter-embedded-template", "tree-sitter-embedded-template",
"tree-sitter-heex", "tree-sitter-heex",
"tree-sitter-html", "tree-sitter-html",
@ -3860,7 +3860,7 @@ dependencies = [
"tree-sitter-python", "tree-sitter-python",
"tree-sitter-ruby", "tree-sitter-ruby",
"tree-sitter-rust", "tree-sitter-rust",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)", "tree-sitter-typescript",
"unicase", "unicase",
"unindent", "unindent",
"util", "util",
@ -6685,14 +6685,15 @@ dependencies = [
"theme", "theme",
"tiktoken-rs 0.5.0", "tiktoken-rs 0.5.0",
"tree-sitter", "tree-sitter",
"tree-sitter-cpp 0.20.2", "tree-sitter-cpp",
"tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter-elixir",
"tree-sitter-json 0.19.0", "tree-sitter-json 0.20.0",
"tree-sitter-lua", "tree-sitter-lua",
"tree-sitter-php",
"tree-sitter-ruby", "tree-sitter-ruby",
"tree-sitter-rust", "tree-sitter-rust",
"tree-sitter-toml 0.20.0", "tree-sitter-toml",
"tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)", "tree-sitter-typescript",
"unindent", "unindent",
"util", "util",
"workspace", "workspace",
@ -8257,16 +8258,6 @@ dependencies = [
"tree-sitter", "tree-sitter",
] ]
[[package]]
name = "tree-sitter-cpp"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c88fd925d0333e63ac64e521f5bd79c53019e569ffbbccfeef346a326f459e9"
dependencies = [
"cc",
"tree-sitter",
]
[[package]] [[package]]
name = "tree-sitter-css" name = "tree-sitter-css"
version = "0.19.0" version = "0.19.0"
@ -8276,16 +8267,6 @@ dependencies = [
"tree-sitter", "tree-sitter",
] ]
[[package]]
name = "tree-sitter-elixir"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e"
dependencies = [
"cc",
"tree-sitter",
]
[[package]] [[package]]
name = "tree-sitter-elixir" name = "tree-sitter-elixir"
version = "0.1.0" version = "0.1.0"
@ -8464,26 +8445,6 @@ dependencies = [
"tree-sitter", "tree-sitter",
] ]
[[package]]
name = "tree-sitter-toml"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca517f578a98b23d20780247cc2688407fa81effad5b627a5a364ec3339b53e8"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-typescript"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "079c695c32d39ad089101c66393aeaca30e967fba3486a91f573d2f0e12d290a"
dependencies = [
"cc",
"tree-sitter",
]
[[package]] [[package]]
name = "tree-sitter-typescript" name = "tree-sitter-typescript"
version = "0.20.2" version = "0.20.2"
@ -9923,9 +9884,9 @@ dependencies = [
"tree-sitter", "tree-sitter",
"tree-sitter-bash", "tree-sitter-bash",
"tree-sitter-c", "tree-sitter-c",
"tree-sitter-cpp 0.20.0", "tree-sitter-cpp",
"tree-sitter-css", "tree-sitter-css",
"tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)", "tree-sitter-elixir",
"tree-sitter-elm", "tree-sitter-elm",
"tree-sitter-embedded-template", "tree-sitter-embedded-template",
"tree-sitter-glsl", "tree-sitter-glsl",
@ -9942,8 +9903,8 @@ dependencies = [
"tree-sitter-rust", "tree-sitter-rust",
"tree-sitter-scheme", "tree-sitter-scheme",
"tree-sitter-svelte", "tree-sitter-svelte",
"tree-sitter-toml 0.5.1", "tree-sitter-toml",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)", "tree-sitter-typescript",
"tree-sitter-yaml", "tree-sitter-yaml",
"unindent", "unindent",
"url", "url",

View file

@ -54,11 +54,12 @@ tempdir.workspace = true
ctor.workspace = true ctor.workspace = true
env_logger.workspace = true env_logger.workspace = true
tree-sitter-typescript = "*" tree-sitter-typescript.workspace = true
tree-sitter-json = "*" tree-sitter-json.workspace = true
tree-sitter-rust = "*" tree-sitter-rust.workspace = true
tree-sitter-toml = "*" tree-sitter-toml.workspace = true
tree-sitter-cpp = "*" tree-sitter-cpp.workspace = true
tree-sitter-elixir = "*" tree-sitter-elixir.workspace = true
tree-sitter-lua = "*" tree-sitter-lua.workspace = true
tree-sitter-ruby = "*" tree-sitter-ruby.workspace = true
tree-sitter-php.workspace = true

View file

@ -200,7 +200,12 @@ impl CodeContextRetriever {
let mut document_content = String::new(); let mut document_content = String::new();
for context_range in &context_match.context_ranges { for context_range in &context_match.context_ranges {
document_content.push_str(&content[context_range.clone()]); add_content_from_range(
&mut document_content,
content,
context_range.clone(),
context_match.start_col,
);
document_content.push_str("\n"); document_content.push_str("\n");
} }

View file

@ -1017,6 +1017,156 @@ async fn test_code_context_retrieval_ruby() {
); );
} }
#[gpui::test]
async fn test_code_context_retrieval_php() {
let language = php_lang();
let mut retriever = CodeContextRetriever::new();
let text = r#"
<?php
namespace LevelUp\Experience\Concerns;
/*
This is a multiple-lines comment block
that spans over multiple
lines
*/
function functionName() {
echo "Hello world!";
}
trait HasAchievements
{
/**
* @throws \Exception
*/
public function grantAchievement(Achievement $achievement, $progress = null): void
{
if ($progress > 100) {
throw new Exception(message: 'Progress cannot be greater than 100');
}
if ($this->achievements()->find($achievement->id)) {
throw new Exception(message: 'User already has this Achievement');
}
$this->achievements()->attach($achievement, [
'progress' => $progress ?? null,
]);
$this->when(value: ($progress === null) || ($progress === 100), callback: fn (): ?array => event(new AchievementAwarded(achievement: $achievement, user: $this)));
}
public function achievements(): BelongsToMany
{
return $this->belongsToMany(related: Achievement::class)
->withPivot(columns: 'progress')
->where('is_secret', false)
->using(AchievementUser::class);
}
}
interface Multiplier
{
public function qualifies(array $data): bool;
public function setMultiplier(): int;
}
enum AuditType: string
{
case Add = 'add';
case Remove = 'remove';
case Reset = 'reset';
case LevelUp = 'level_up';
}
?>"#
.unindent();
let documents = retriever.parse_file(&text, language.clone()).unwrap();
assert_documents_eq(
&documents,
&[
(
r#"
/*
This is a multiple-lines comment block
that spans over multiple
lines
*/
function functionName() {
echo "Hello world!";
}"#
.unindent(),
123,
),
(
r#"
trait HasAchievements
{
/**
* @throws \Exception
*/
public function grantAchievement(Achievement $achievement, $progress = null): void
{/* ... */}
public function achievements(): BelongsToMany
{/* ... */}
}"#
.unindent(),
177,
),
(r#"
/**
* @throws \Exception
*/
public function grantAchievement(Achievement $achievement, $progress = null): void
{
if ($progress > 100) {
throw new Exception(message: 'Progress cannot be greater than 100');
}
if ($this->achievements()->find($achievement->id)) {
throw new Exception(message: 'User already has this Achievement');
}
$this->achievements()->attach($achievement, [
'progress' => $progress ?? null,
]);
$this->when(value: ($progress === null) || ($progress === 100), callback: fn (): ?array => event(new AchievementAwarded(achievement: $achievement, user: $this)));
}"#.unindent(), 245),
(r#"
public function achievements(): BelongsToMany
{
return $this->belongsToMany(related: Achievement::class)
->withPivot(columns: 'progress')
->where('is_secret', false)
->using(AchievementUser::class);
}"#.unindent(), 902),
(r#"
interface Multiplier
{
public function qualifies(array $data): bool;
public function setMultiplier(): int;
}"#.unindent(),
1146),
(r#"
enum AuditType: string
{
case Add = 'add';
case Remove = 'remove';
case Reset = 'reset';
case LevelUp = 'level_up';
}"#.unindent(), 1265)
],
);
}
#[gpui::test] #[gpui::test]
fn test_dot_product(mut rng: StdRng) { fn test_dot_product(mut rng: StdRng) {
assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.); assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
@ -1376,6 +1526,61 @@ fn lua_lang() -> Arc<Language> {
) )
} }
fn php_lang() -> Arc<Language> {
Arc::new(
Language::new(
LanguageConfig {
name: "PHP".into(),
path_suffixes: vec!["php".into()],
collapsed_placeholder: "/* ... */".into(),
..Default::default()
},
Some(tree_sitter_php::language()),
)
.with_embedding_query(
r#"
(
(comment)* @context
.
[
(function_definition
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(trait_declaration
"trait" @name
name: (_) @name)
(method_declaration
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(interface_declaration
"interface" @name
name: (_) @name
)
(enum_declaration
"enum" @name
name: (_) @name
)
] @item
)
"#,
)
.unwrap(),
)
}
fn ruby_lang() -> Arc<Language> { fn ruby_lang() -> Arc<Language> {
Arc::new( Arc::new(
Language::new( Language::new(

View file

@ -9,3 +9,4 @@ brackets = [
{ start = "(", end = ")", close = true, newline = true }, { start = "(", end = ")", close = true, newline = true },
{ start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] }, { start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
] ]
collapsed_placeholder = "/* ... */"

View file

@ -0,0 +1,36 @@
(
(comment)* @context
.
[
(function_definition
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(trait_declaration
"trait" @name
name: (_) @name)
(method_declaration
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(interface_declaration
"interface" @name
name: (_) @name
)
(enum_declaration
"enum" @name
name: (_) @name
)
] @item
)

View file

@ -8,8 +8,6 @@
name: (_) @name name: (_) @name
) @item ) @item
(method_declaration (method_declaration
"function" @context "function" @context
name: (_) @name name: (_) @name
@ -24,3 +22,8 @@
"enum" @context "enum" @context
name: (_) @name name: (_) @name
) @item ) @item
(trait_declaration
"trait" @context
name: (_) @name
) @item