json_in_md_parser.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. import json
  2. from core.llm_generator.output_parser.errors import OutputParserError
  3. def parse_json_markdown(json_string: str) -> dict:
  4. # Get json from the backticks/braces
  5. json_string = json_string.strip()
  6. starts = ["```json", "```", "``", "`", "{"]
  7. ends = ["```", "``", "`", "}"]
  8. end_index = -1
  9. start_index = 0
  10. parsed: dict = {}
  11. for s in starts:
  12. start_index = json_string.find(s)
  13. if start_index != -1:
  14. if json_string[start_index] != "{":
  15. start_index += len(s)
  16. break
  17. if start_index != -1:
  18. for e in ends:
  19. end_index = json_string.rfind(e, start_index)
  20. if end_index != -1:
  21. if json_string[end_index] == "}":
  22. end_index += 1
  23. break
  24. if start_index != -1 and end_index != -1 and start_index < end_index:
  25. extracted_content = json_string[start_index:end_index].strip()
  26. parsed = json.loads(extracted_content)
  27. else:
  28. raise ValueError("could not find json block in the output.")
  29. return parsed
  30. def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict:
  31. try:
  32. json_obj = parse_json_markdown(text)
  33. except json.JSONDecodeError as e:
  34. raise OutputParserError(f"got invalid json object. error: {e}")
  35. for key in expected_keys:
  36. if key not in json_obj:
  37. raise OutputParserError(
  38. f"got invalid return object. expected key `{key}` to be present, but got {json_obj}"
  39. )
  40. return json_obj