add php support for semantic search

This commit is contained in:
KCaverly 2023-07-31 16:36:09 -04:00
parent 89edb3d1b5
commit 599f674827
7 changed files with 275 additions and 63 deletions

65
Cargo.lock generated
View file

@ -2341,7 +2341,7 @@ dependencies = [
"tree-sitter",
"tree-sitter-html",
"tree-sitter-rust",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
"tree-sitter-typescript",
"unindent",
"util",
"workspace",
@ -3851,7 +3851,7 @@ dependencies = [
"text",
"theme",
"tree-sitter",
"tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
"tree-sitter-elixir",
"tree-sitter-embedded-template",
"tree-sitter-heex",
"tree-sitter-html",
@ -3860,7 +3860,7 @@ dependencies = [
"tree-sitter-python",
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
"tree-sitter-typescript",
"unicase",
"unindent",
"util",
@ -6685,14 +6685,15 @@ dependencies = [
"theme",
"tiktoken-rs 0.5.0",
"tree-sitter",
"tree-sitter-cpp 0.20.2",
"tree-sitter-elixir 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter-json 0.19.0",
"tree-sitter-cpp",
"tree-sitter-elixir",
"tree-sitter-json 0.20.0",
"tree-sitter-lua",
"tree-sitter-php",
"tree-sitter-ruby",
"tree-sitter-rust",
"tree-sitter-toml 0.20.0",
"tree-sitter-typescript 0.20.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tree-sitter-toml",
"tree-sitter-typescript",
"unindent",
"util",
"workspace",
@ -8257,16 +8258,6 @@ dependencies = [
"tree-sitter",
]
[[package]]
name = "tree-sitter-cpp"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c88fd925d0333e63ac64e521f5bd79c53019e569ffbbccfeef346a326f459e9"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-css"
version = "0.19.0"
@ -8276,16 +8267,6 @@ dependencies = [
"tree-sitter",
]
[[package]]
name = "tree-sitter-elixir"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a9916f3e1c80b3c8aab8582604e97e8720cb9b893489b347cf999f80f9d469e"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-elixir"
version = "0.1.0"
@ -8464,26 +8445,6 @@ dependencies = [
"tree-sitter",
]
[[package]]
name = "tree-sitter-toml"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca517f578a98b23d20780247cc2688407fa81effad5b627a5a364ec3339b53e8"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-typescript"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "079c695c32d39ad089101c66393aeaca30e967fba3486a91f573d2f0e12d290a"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "tree-sitter-typescript"
version = "0.20.2"
@ -9923,9 +9884,9 @@ dependencies = [
"tree-sitter",
"tree-sitter-bash",
"tree-sitter-c",
"tree-sitter-cpp 0.20.0",
"tree-sitter-cpp",
"tree-sitter-css",
"tree-sitter-elixir 0.1.0 (git+https://github.com/elixir-lang/tree-sitter-elixir?rev=4ba9dab6e2602960d95b2b625f3386c27e08084e)",
"tree-sitter-elixir",
"tree-sitter-elm",
"tree-sitter-embedded-template",
"tree-sitter-glsl",
@ -9942,8 +9903,8 @@ dependencies = [
"tree-sitter-rust",
"tree-sitter-scheme",
"tree-sitter-svelte",
"tree-sitter-toml 0.5.1",
"tree-sitter-typescript 0.20.2 (git+https://github.com/tree-sitter/tree-sitter-typescript?rev=5d20856f34315b068c41edaee2ac8a100081d259)",
"tree-sitter-toml",
"tree-sitter-typescript",
"tree-sitter-yaml",
"unindent",
"url",

View file

@ -54,11 +54,12 @@ tempdir.workspace = true
ctor.workspace = true
env_logger.workspace = true
tree-sitter-typescript = "*"
tree-sitter-json = "*"
tree-sitter-rust = "*"
tree-sitter-toml = "*"
tree-sitter-cpp = "*"
tree-sitter-elixir = "*"
tree-sitter-lua = "*"
tree-sitter-ruby = "*"
tree-sitter-typescript.workspace = true
tree-sitter-json.workspace = true
tree-sitter-rust.workspace = true
tree-sitter-toml.workspace = true
tree-sitter-cpp.workspace = true
tree-sitter-elixir.workspace = true
tree-sitter-lua.workspace = true
tree-sitter-ruby.workspace = true
tree-sitter-php.workspace = true

View file

@ -200,7 +200,12 @@ impl CodeContextRetriever {
let mut document_content = String::new();
for context_range in &context_match.context_ranges {
document_content.push_str(&content[context_range.clone()]);
add_content_from_range(
&mut document_content,
content,
context_range.clone(),
context_match.start_col,
);
document_content.push_str("\n");
}

View file

@ -1017,6 +1017,156 @@ async fn test_code_context_retrieval_ruby() {
);
}
#[gpui::test]
async fn test_code_context_retrieval_php() {
let language = php_lang();
let mut retriever = CodeContextRetriever::new();
let text = r#"
<?php
namespace LevelUp\Experience\Concerns;
/*
This is a multiple-lines comment block
that spans over multiple
lines
*/
function functionName() {
echo "Hello world!";
}
trait HasAchievements
{
/**
* @throws \Exception
*/
public function grantAchievement(Achievement $achievement, $progress = null): void
{
if ($progress > 100) {
throw new Exception(message: 'Progress cannot be greater than 100');
}
if ($this->achievements()->find($achievement->id)) {
throw new Exception(message: 'User already has this Achievement');
}
$this->achievements()->attach($achievement, [
'progress' => $progress ?? null,
]);
$this->when(value: ($progress === null) || ($progress === 100), callback: fn (): ?array => event(new AchievementAwarded(achievement: $achievement, user: $this)));
}
public function achievements(): BelongsToMany
{
return $this->belongsToMany(related: Achievement::class)
->withPivot(columns: 'progress')
->where('is_secret', false)
->using(AchievementUser::class);
}
}
interface Multiplier
{
public function qualifies(array $data): bool;
public function setMultiplier(): int;
}
enum AuditType: string
{
case Add = 'add';
case Remove = 'remove';
case Reset = 'reset';
case LevelUp = 'level_up';
}
?>"#
.unindent();
let documents = retriever.parse_file(&text, language.clone()).unwrap();
assert_documents_eq(
&documents,
&[
(
r#"
/*
This is a multiple-lines comment block
that spans over multiple
lines
*/
function functionName() {
echo "Hello world!";
}"#
.unindent(),
123,
),
(
r#"
trait HasAchievements
{
/**
* @throws \Exception
*/
public function grantAchievement(Achievement $achievement, $progress = null): void
{/* ... */}
public function achievements(): BelongsToMany
{/* ... */}
}"#
.unindent(),
177,
),
(r#"
/**
* @throws \Exception
*/
public function grantAchievement(Achievement $achievement, $progress = null): void
{
if ($progress > 100) {
throw new Exception(message: 'Progress cannot be greater than 100');
}
if ($this->achievements()->find($achievement->id)) {
throw new Exception(message: 'User already has this Achievement');
}
$this->achievements()->attach($achievement, [
'progress' => $progress ?? null,
]);
$this->when(value: ($progress === null) || ($progress === 100), callback: fn (): ?array => event(new AchievementAwarded(achievement: $achievement, user: $this)));
}"#.unindent(), 245),
(r#"
public function achievements(): BelongsToMany
{
return $this->belongsToMany(related: Achievement::class)
->withPivot(columns: 'progress')
->where('is_secret', false)
->using(AchievementUser::class);
}"#.unindent(), 902),
(r#"
interface Multiplier
{
public function qualifies(array $data): bool;
public function setMultiplier(): int;
}"#.unindent(),
1146),
(r#"
enum AuditType: string
{
case Add = 'add';
case Remove = 'remove';
case Reset = 'reset';
case LevelUp = 'level_up';
}"#.unindent(), 1265)
],
);
}
#[gpui::test]
fn test_dot_product(mut rng: StdRng) {
assert_eq!(dot(&[1., 0., 0., 0., 0.], &[0., 1., 0., 0., 0.]), 0.);
@ -1376,6 +1526,61 @@ fn lua_lang() -> Arc<Language> {
)
}
fn php_lang() -> Arc<Language> {
Arc::new(
Language::new(
LanguageConfig {
name: "PHP".into(),
path_suffixes: vec!["php".into()],
collapsed_placeholder: "/* ... */".into(),
..Default::default()
},
Some(tree_sitter_php::language()),
)
.with_embedding_query(
r#"
(
(comment)* @context
.
[
(function_definition
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(trait_declaration
"trait" @name
name: (_) @name)
(method_declaration
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(interface_declaration
"interface" @name
name: (_) @name
)
(enum_declaration
"enum" @name
name: (_) @name
)
] @item
)
"#,
)
.unwrap(),
)
}
fn ruby_lang() -> Arc<Language> {
Arc::new(
Language::new(

View file

@ -9,3 +9,4 @@ brackets = [
{ start = "(", end = ")", close = true, newline = true },
{ start = "\"", end = "\"", close = true, newline = false, not_in = ["string"] },
]
collapsed_placeholder = "/* ... */"

View file

@ -0,0 +1,36 @@
(
(comment)* @context
.
[
(function_definition
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(trait_declaration
"trait" @name
name: (_) @name)
(method_declaration
"function" @name
name: (_) @name
body: (_
"{" @keep
"}" @keep) @collapse
)
(interface_declaration
"interface" @name
name: (_) @name
)
(enum_declaration
"enum" @name
name: (_) @name
)
] @item
)

View file

@ -8,8 +8,6 @@
name: (_) @name
) @item
(method_declaration
"function" @context
name: (_) @name
@ -24,3 +22,8 @@
"enum" @context
name: (_) @name
) @item
(trait_declaration
"trait" @context
name: (_) @name
) @item