added parsing support for <|S| |E|> spans (#3184)
added parsing support for <|S| and |E|> spans for inline generation
This commit is contained in:
commit
1b9619ca7f
2 changed files with 96 additions and 32 deletions
|
@ -118,7 +118,7 @@ impl Codegen {
|
||||||
|
|
||||||
let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
|
let (mut hunks_tx, mut hunks_rx) = mpsc::channel(1);
|
||||||
let diff = cx.background().spawn(async move {
|
let diff = cx.background().spawn(async move {
|
||||||
let chunks = strip_markdown_codeblock(response.await?);
|
let chunks = strip_invalid_spans_from_codeblock(response.await?);
|
||||||
futures::pin_mut!(chunks);
|
futures::pin_mut!(chunks);
|
||||||
let mut diff = StreamingDiff::new(selected_text.to_string());
|
let mut diff = StreamingDiff::new(selected_text.to_string());
|
||||||
|
|
||||||
|
@ -279,12 +279,13 @@ impl Codegen {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn strip_markdown_codeblock(
|
fn strip_invalid_spans_from_codeblock(
|
||||||
stream: impl Stream<Item = Result<String>>,
|
stream: impl Stream<Item = Result<String>>,
|
||||||
) -> impl Stream<Item = Result<String>> {
|
) -> impl Stream<Item = Result<String>> {
|
||||||
let mut first_line = true;
|
let mut first_line = true;
|
||||||
let mut buffer = String::new();
|
let mut buffer = String::new();
|
||||||
let mut starts_with_fenced_code_block = false;
|
let mut starts_with_markdown_codeblock = false;
|
||||||
|
let mut includes_start_or_end_span = false;
|
||||||
stream.filter_map(move |chunk| {
|
stream.filter_map(move |chunk| {
|
||||||
let chunk = match chunk {
|
let chunk = match chunk {
|
||||||
Ok(chunk) => chunk,
|
Ok(chunk) => chunk,
|
||||||
|
@ -292,11 +293,31 @@ fn strip_markdown_codeblock(
|
||||||
};
|
};
|
||||||
buffer.push_str(&chunk);
|
buffer.push_str(&chunk);
|
||||||
|
|
||||||
|
if buffer.len() > "<|S|".len() && buffer.starts_with("<|S|") {
|
||||||
|
includes_start_or_end_span = true;
|
||||||
|
|
||||||
|
buffer = buffer
|
||||||
|
.strip_prefix("<|S|>")
|
||||||
|
.or_else(|| buffer.strip_prefix("<|S|"))
|
||||||
|
.unwrap_or(&buffer)
|
||||||
|
.to_string();
|
||||||
|
} else if buffer.ends_with("|E|>") {
|
||||||
|
includes_start_or_end_span = true;
|
||||||
|
} else if buffer.starts_with("<|")
|
||||||
|
|| buffer.starts_with("<|S")
|
||||||
|
|| buffer.starts_with("<|S|")
|
||||||
|
|| buffer.ends_with("|")
|
||||||
|
|| buffer.ends_with("|E")
|
||||||
|
|| buffer.ends_with("|E|")
|
||||||
|
{
|
||||||
|
return future::ready(None);
|
||||||
|
}
|
||||||
|
|
||||||
if first_line {
|
if first_line {
|
||||||
if buffer == "" || buffer == "`" || buffer == "``" {
|
if buffer == "" || buffer == "`" || buffer == "``" {
|
||||||
return future::ready(None);
|
return future::ready(None);
|
||||||
} else if buffer.starts_with("```") {
|
} else if buffer.starts_with("```") {
|
||||||
starts_with_fenced_code_block = true;
|
starts_with_markdown_codeblock = true;
|
||||||
if let Some(newline_ix) = buffer.find('\n') {
|
if let Some(newline_ix) = buffer.find('\n') {
|
||||||
buffer.replace_range(..newline_ix + 1, "");
|
buffer.replace_range(..newline_ix + 1, "");
|
||||||
first_line = false;
|
first_line = false;
|
||||||
|
@ -306,16 +327,26 @@ fn strip_markdown_codeblock(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let text = if starts_with_fenced_code_block {
|
let mut text = buffer.to_string();
|
||||||
buffer
|
if starts_with_markdown_codeblock {
|
||||||
|
text = text
|
||||||
.strip_suffix("\n```\n")
|
.strip_suffix("\n```\n")
|
||||||
.or_else(|| buffer.strip_suffix("\n```"))
|
.or_else(|| text.strip_suffix("\n```"))
|
||||||
.or_else(|| buffer.strip_suffix("\n``"))
|
.or_else(|| text.strip_suffix("\n``"))
|
||||||
.or_else(|| buffer.strip_suffix("\n`"))
|
.or_else(|| text.strip_suffix("\n`"))
|
||||||
.or_else(|| buffer.strip_suffix('\n'))
|
.or_else(|| text.strip_suffix('\n'))
|
||||||
.unwrap_or(&buffer)
|
.unwrap_or(&text)
|
||||||
} else {
|
.to_string();
|
||||||
&buffer
|
}
|
||||||
|
|
||||||
|
if includes_start_or_end_span {
|
||||||
|
text = text
|
||||||
|
.strip_suffix("|E|>")
|
||||||
|
.or_else(|| text.strip_suffix("E|>"))
|
||||||
|
.or_else(|| text.strip_prefix("|>"))
|
||||||
|
.or_else(|| text.strip_prefix(">"))
|
||||||
|
.unwrap_or(&text)
|
||||||
|
.to_string();
|
||||||
};
|
};
|
||||||
|
|
||||||
if text.contains('\n') {
|
if text.contains('\n') {
|
||||||
|
@ -328,6 +359,7 @@ fn strip_markdown_codeblock(
|
||||||
} else {
|
} else {
|
||||||
Some(Ok(buffer.clone()))
|
Some(Ok(buffer.clone()))
|
||||||
};
|
};
|
||||||
|
|
||||||
buffer = remainder;
|
buffer = remainder;
|
||||||
future::ready(result)
|
future::ready(result)
|
||||||
})
|
})
|
||||||
|
@ -558,50 +590,82 @@ mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[gpui::test]
|
#[gpui::test]
|
||||||
async fn test_strip_markdown_codeblock() {
|
async fn test_strip_invalid_spans_from_codeblock() {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
strip_markdown_codeblock(chunks("Lorem ipsum dolor", 2))
|
strip_invalid_spans_from_codeblock(chunks("Lorem ipsum dolor", 2))
|
||||||
.map(|chunk| chunk.unwrap())
|
.map(|chunk| chunk.unwrap())
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
.await,
|
.await,
|
||||||
"Lorem ipsum dolor"
|
"Lorem ipsum dolor"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
strip_markdown_codeblock(chunks("```\nLorem ipsum dolor", 2))
|
strip_invalid_spans_from_codeblock(chunks("```\nLorem ipsum dolor", 2))
|
||||||
.map(|chunk| chunk.unwrap())
|
.map(|chunk| chunk.unwrap())
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
.await,
|
.await,
|
||||||
"Lorem ipsum dolor"
|
"Lorem ipsum dolor"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
strip_markdown_codeblock(chunks("```\nLorem ipsum dolor\n```", 2))
|
strip_invalid_spans_from_codeblock(chunks("```\nLorem ipsum dolor\n```", 2))
|
||||||
.map(|chunk| chunk.unwrap())
|
.map(|chunk| chunk.unwrap())
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
.await,
|
.await,
|
||||||
"Lorem ipsum dolor"
|
"Lorem ipsum dolor"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
strip_markdown_codeblock(chunks("```\nLorem ipsum dolor\n```\n", 2))
|
strip_invalid_spans_from_codeblock(chunks("```\nLorem ipsum dolor\n```\n", 2))
|
||||||
.map(|chunk| chunk.unwrap())
|
.map(|chunk| chunk.unwrap())
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
.await,
|
.await,
|
||||||
"Lorem ipsum dolor"
|
"Lorem ipsum dolor"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
strip_markdown_codeblock(chunks("```html\n```js\nLorem ipsum dolor\n```\n```", 2))
|
strip_invalid_spans_from_codeblock(chunks(
|
||||||
.map(|chunk| chunk.unwrap())
|
"```html\n```js\nLorem ipsum dolor\n```\n```",
|
||||||
.collect::<String>()
|
2
|
||||||
.await,
|
))
|
||||||
|
.map(|chunk| chunk.unwrap())
|
||||||
|
.collect::<String>()
|
||||||
|
.await,
|
||||||
"```js\nLorem ipsum dolor\n```"
|
"```js\nLorem ipsum dolor\n```"
|
||||||
);
|
);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
strip_markdown_codeblock(chunks("``\nLorem ipsum dolor\n```", 2))
|
strip_invalid_spans_from_codeblock(chunks("``\nLorem ipsum dolor\n```", 2))
|
||||||
.map(|chunk| chunk.unwrap())
|
.map(|chunk| chunk.unwrap())
|
||||||
.collect::<String>()
|
.collect::<String>()
|
||||||
.await,
|
.await,
|
||||||
"``\nLorem ipsum dolor\n```"
|
"``\nLorem ipsum dolor\n```"
|
||||||
);
|
);
|
||||||
|
assert_eq!(
|
||||||
|
strip_invalid_spans_from_codeblock(chunks("<|S|Lorem ipsum|E|>", 2))
|
||||||
|
.map(|chunk| chunk.unwrap())
|
||||||
|
.collect::<String>()
|
||||||
|
.await,
|
||||||
|
"Lorem ipsum"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
strip_invalid_spans_from_codeblock(chunks("<|S|>Lorem ipsum", 2))
|
||||||
|
.map(|chunk| chunk.unwrap())
|
||||||
|
.collect::<String>()
|
||||||
|
.await,
|
||||||
|
"Lorem ipsum"
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
strip_invalid_spans_from_codeblock(chunks("```\n<|S|>Lorem ipsum\n```", 2))
|
||||||
|
.map(|chunk| chunk.unwrap())
|
||||||
|
.collect::<String>()
|
||||||
|
.await,
|
||||||
|
"Lorem ipsum"
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
strip_invalid_spans_from_codeblock(chunks("```\n<|S|Lorem ipsum|E|>\n```", 2))
|
||||||
|
.map(|chunk| chunk.unwrap())
|
||||||
|
.collect::<String>()
|
||||||
|
.await,
|
||||||
|
"Lorem ipsum"
|
||||||
|
);
|
||||||
fn chunks(text: &str, size: usize) -> impl Stream<Item = Result<String>> {
|
fn chunks(text: &str, size: usize) -> impl Stream<Item = Result<String>> {
|
||||||
stream::iter(
|
stream::iter(
|
||||||
text.chars()
|
text.chars()
|
||||||
|
|
|
@ -80,12 +80,12 @@ fn summarize(buffer: &BufferSnapshot, selected_range: Range<impl ToOffset>) -> S
|
||||||
if !flushed_selection {
|
if !flushed_selection {
|
||||||
// The collapsed node ends after the selection starts, so we'll flush the selection first.
|
// The collapsed node ends after the selection starts, so we'll flush the selection first.
|
||||||
summary.extend(buffer.text_for_range(offset..selected_range.start));
|
summary.extend(buffer.text_for_range(offset..selected_range.start));
|
||||||
summary.push_str("<|START|");
|
summary.push_str("<|S|");
|
||||||
if selected_range.end == selected_range.start {
|
if selected_range.end == selected_range.start {
|
||||||
summary.push_str(">");
|
summary.push_str(">");
|
||||||
} else {
|
} else {
|
||||||
summary.extend(buffer.text_for_range(selected_range.clone()));
|
summary.extend(buffer.text_for_range(selected_range.clone()));
|
||||||
summary.push_str("|END|>");
|
summary.push_str("|E|>");
|
||||||
}
|
}
|
||||||
offset = selected_range.end;
|
offset = selected_range.end;
|
||||||
flushed_selection = true;
|
flushed_selection = true;
|
||||||
|
@ -107,12 +107,12 @@ fn summarize(buffer: &BufferSnapshot, selected_range: Range<impl ToOffset>) -> S
|
||||||
// Flush selection if we haven't already done so.
|
// Flush selection if we haven't already done so.
|
||||||
if !flushed_selection && offset <= selected_range.start {
|
if !flushed_selection && offset <= selected_range.start {
|
||||||
summary.extend(buffer.text_for_range(offset..selected_range.start));
|
summary.extend(buffer.text_for_range(offset..selected_range.start));
|
||||||
summary.push_str("<|START|");
|
summary.push_str("<|S|");
|
||||||
if selected_range.end == selected_range.start {
|
if selected_range.end == selected_range.start {
|
||||||
summary.push_str(">");
|
summary.push_str(">");
|
||||||
} else {
|
} else {
|
||||||
summary.extend(buffer.text_for_range(selected_range.clone()));
|
summary.extend(buffer.text_for_range(selected_range.clone()));
|
||||||
summary.push_str("|END|>");
|
summary.push_str("|E|>");
|
||||||
}
|
}
|
||||||
offset = selected_range.end;
|
offset = selected_range.end;
|
||||||
}
|
}
|
||||||
|
@ -260,7 +260,7 @@ pub(crate) mod tests {
|
||||||
summarize(&snapshot, Point::new(1, 4)..Point::new(1, 4)),
|
summarize(&snapshot, Point::new(1, 4)..Point::new(1, 4)),
|
||||||
indoc! {"
|
indoc! {"
|
||||||
struct X {
|
struct X {
|
||||||
<|START|>a: usize,
|
<|S|>a: usize,
|
||||||
b: usize,
|
b: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -286,7 +286,7 @@ pub(crate) mod tests {
|
||||||
impl X {
|
impl X {
|
||||||
|
|
||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
let <|START|a |END|>= 1;
|
let <|S|a |E|>= 1;
|
||||||
let b = 2;
|
let b = 2;
|
||||||
Self { a, b }
|
Self { a, b }
|
||||||
}
|
}
|
||||||
|
@ -307,7 +307,7 @@ pub(crate) mod tests {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl X {
|
impl X {
|
||||||
<|START|>
|
<|S|>
|
||||||
fn new() -> Self {}
|
fn new() -> Self {}
|
||||||
|
|
||||||
pub fn a(&self, param: bool) -> usize {}
|
pub fn a(&self, param: bool) -> usize {}
|
||||||
|
@ -333,7 +333,7 @@ pub(crate) mod tests {
|
||||||
|
|
||||||
pub fn b(&self) -> usize {}
|
pub fn b(&self) -> usize {}
|
||||||
}
|
}
|
||||||
<|START|>"}
|
<|S|>"}
|
||||||
);
|
);
|
||||||
|
|
||||||
// Ensure nested functions get collapsed properly.
|
// Ensure nested functions get collapsed properly.
|
||||||
|
@ -369,7 +369,7 @@ pub(crate) mod tests {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
summarize(&snapshot, Point::new(0, 0)..Point::new(0, 0)),
|
summarize(&snapshot, Point::new(0, 0)..Point::new(0, 0)),
|
||||||
indoc! {"
|
indoc! {"
|
||||||
<|START|>struct X {
|
<|S|>struct X {
|
||||||
a: usize,
|
a: usize,
|
||||||
b: usize,
|
b: usize,
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue