Skip to content

Commit 92ea702

Browse files
authored
validate incoming JSON (#1699)
1 parent c4bbf20 commit 92ea702

File tree

7 files changed

+769
-15
lines changed

7 files changed

+769
-15
lines changed

apps/framework-cli/src/cli/local_webserver.rs

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@ use rdkafka::message::OwnedMessage;
3737
use rdkafka::producer::future_producer::OwnedDeliveryResult;
3838
use rdkafka::producer::{DeliveryFuture, FutureRecord};
3939
use rdkafka::util::Timeout;
40-
use serde::Deserialize;
4140
use serde::Serialize;
42-
use serde_json::Value;
41+
use serde::{Deserialize, Deserializer};
42+
use serde_json::Deserializer as JsonDeserializer;
4343

44+
use crate::framework::data_model::model::DataModel;
45+
use crate::utilities::validate_passthrough::{DataModelArrayVisitor, DataModelVisitor};
4446
use lazy_static::lazy_static;
4547
use log::Level::{Debug, Trace};
4648
use std::collections::{HashMap, HashSet};
@@ -412,12 +414,10 @@ fn route_not_found_response() -> hyper::http::Result<Response<Full<Bytes>>> {
412414
async fn send_payload_to_topic(
413415
configured_producer: &ConfiguredProducer,
414416
topic_name: &str,
415-
payload: Value,
417+
payload: Vec<u8>,
416418
metrics: Arc<Metrics>,
417419
route: PathBuf,
418420
) -> Result<(i32, i64), (KafkaError, OwnedMessage)> {
419-
let payload = serde_json::to_vec(&payload).unwrap();
420-
421421
debug!("Sending payload {:?} to topic: {}", payload, topic_name);
422422

423423
metrics
@@ -447,6 +447,7 @@ async fn to_reader(req: Request<Incoming>) -> bytes::buf::Reader<impl Buf + Size
447447
async fn handle_json_req(
448448
configured_producer: &ConfiguredProducer,
449449
topic_name: &str,
450+
data_model: &DataModel,
450451
req: Request<Incoming>,
451452
metrics: Arc<Metrics>,
452453
route: PathBuf,
@@ -456,7 +457,9 @@ async fn handle_json_req(
456457
let url = req.uri().to_string();
457458
let number_of_bytes = req.body().size_hint().exact().unwrap();
458459
let body = to_reader(req).await;
459-
let parsed: Result<Value, serde_json::Error> = serde_json::from_reader(body);
460+
461+
let parsed = JsonDeserializer::from_reader(body)
462+
.deserialize_any(&mut DataModelVisitor::new(&data_model.columns));
460463

461464
metrics
462465
.send_metric(MetricsMessage::PutIngestedBytesCount {
@@ -508,6 +511,7 @@ async fn wait_for_batch_complete(
508511
async fn handle_json_array_body(
509512
configured_producer: &ConfiguredProducer,
510513
topic_name: &str,
514+
data_model: &DataModel,
511515
req: Request<Incoming>,
512516
metrics: Arc<Metrics>,
513517
route: PathBuf,
@@ -522,7 +526,9 @@ async fn handle_json_array_body(
522526
"starting to parse json array with length {} for {}",
523527
number_of_bytes, topic_name
524528
);
525-
let parsed: Result<Vec<Value>, serde_json::Error> = serde_json::from_reader(body);
529+
let parsed = JsonDeserializer::from_reader(body).deserialize_seq(&mut DataModelArrayVisitor {
530+
inner: DataModelVisitor::new(&data_model.columns),
531+
});
526532

527533
debug!("parsed json array for {}", topic_name);
528534
metrics
@@ -541,8 +547,6 @@ async fn handle_json_array_body(
541547
let mut temp_res: Vec<Result<DeliveryFuture, KafkaError>> = Vec::new();
542548

543549
for (count, payload) in parsed.ok().unwrap().into_iter().enumerate() {
544-
let payload = serde_json::to_vec(&payload).unwrap();
545-
546550
debug!("Sending payload {:?} to topic: {}", payload, topic_name);
547551
let record = FutureRecord::to(topic_name)
548552
.key(topic_name) // This should probably be generated by the client that pushes data to the API
@@ -644,6 +648,7 @@ async fn ingest_route(
644648
EndpointIngestionFormat::Json => Ok(handle_json_req(
645649
&configured_producer,
646650
&route_meta.topic_name,
651+
&route_meta.data_model,
647652
req,
648653
metrics,
649654
route,
@@ -652,6 +657,7 @@ async fn ingest_route(
652657
EndpointIngestionFormat::JsonArray => Ok(handle_json_array_body(
653658
&configured_producer,
654659
&route_meta.topic_name,
660+
&route_meta.data_model,
655661
req,
656662
metrics,
657663
route,
@@ -859,11 +865,15 @@ impl Webserver {
859865
ApiChange::ApiEndpoint(Change::Added(api_endpoint)) => {
860866
log::info!("Adding route: {:?}", api_endpoint.path);
861867
match api_endpoint.api_type {
862-
APIType::INGRESS { target_topic } => {
868+
APIType::INGRESS {
869+
target_topic,
870+
data_model,
871+
} => {
863872
route_table.insert(
864873
api_endpoint.path.clone(),
865874
RouteMeta {
866875
format: api_endpoint.format.clone(),
876+
data_model: data_model.unwrap(),
867877
topic_name: target_topic,
868878
},
869879
);
@@ -879,14 +889,18 @@ impl Webserver {
879889
}
880890
ApiChange::ApiEndpoint(Change::Updated { before, after }) => {
881891
match &after.api_type {
882-
APIType::INGRESS { target_topic } => {
892+
APIType::INGRESS {
893+
target_topic,
894+
data_model,
895+
} => {
883896
log::info!("Replacing route: {:?} with {:?}", before, after);
884897

885898
route_table.remove(&before.path);
886899
route_table.insert(
887900
after.path.clone(),
888901
RouteMeta {
889902
format: after.format.clone(),
903+
data_model: data_model.as_ref().unwrap().clone(),
890904
topic_name: target_topic.clone(),
891905
},
892906
);

apps/framework-cli/src/cli/watcher.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ async fn process_data_models_changes(
214214
ingest_route,
215215
RouteMeta {
216216
topic_name: fo.topic.clone(),
217+
data_model: fo.data_model.clone(),
217218
format: fo.data_model.config.ingestion.format.clone(),
218219
},
219220
);

apps/framework-cli/src/framework/controller.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use super::core::code_loader::FrameworkObject;
22
use super::data_model::config::EndpointIngestionFormat;
33
use crate::framework::core::infrastructure::table::Table;
44
use crate::framework::core::plan::PlanningError;
5+
use crate::framework::data_model::model::DataModel;
56
use crate::infrastructure::migration::InitialDataLoadError;
67
use crate::infrastructure::olap;
78
use crate::infrastructure::olap::clickhouse::config::ClickHouseConfig;
@@ -31,6 +32,7 @@ use std::sync::Arc;
3132
#[derive(Debug, Clone)]
3233
pub struct RouteMeta {
3334
pub topic_name: String,
35+
pub data_model: DataModel,
3436
pub format: EndpointIngestionFormat,
3537
}
3638

@@ -481,6 +483,7 @@ pub async fn set_up_topic_and_tables_and_route(
481483
ingest_route.clone(),
482484
RouteMeta {
483485
topic_name,
486+
data_model: fo.data_model.clone(),
484487
format: fo.data_model.config.ingestion.format.clone(),
485488
},
486489
);

apps/framework-cli/src/framework/core/infrastructure/api_endpoint.rs

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,13 @@ use super::{topic::Topic, DataLineage, InfrastructureSignature};
1111

1212
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1313
pub enum APIType {
14-
INGRESS { target_topic: String },
14+
INGRESS {
15+
target_topic: String,
16+
// in previous versions this is not stored,
17+
// so deserialization may fail if the value is not optional
18+
// our code does not depend on the stored field
19+
data_model: Option<DataModel>,
20+
},
1521
EGRESS,
1622
}
1723

@@ -41,9 +47,10 @@ impl ApiEndpoint {
4147
name: data_model.name.clone(),
4248
api_type: APIType::INGRESS {
4349
target_topic: topic.id(),
50+
data_model: Some(data_model.clone()),
4451
},
45-
// This implementation is actually removing the functionaliy of nestedness of paths in
46-
// data model to change the ingest path. Howevever, we are changin how this works with an
52+
// This implementation is actually removing the functionality of nestedness of paths in
53+
// data model to change the ingest path. However, we are changing how this works with an
4754
// explicit in ingest path and we have not seen people use that functionality yet.
4855
path: PathBuf::from("ingest")
4956
.join(data_model.name.clone())
@@ -95,7 +102,7 @@ impl DataLineage for ApiEndpoint {
95102

96103
fn pushes_data_to(&self) -> Vec<InfrastructureSignature> {
97104
match &self.api_type {
98-
APIType::INGRESS { target_topic } => {
105+
APIType::INGRESS { target_topic, .. } => {
99106
vec![InfrastructureSignature::Topic {
100107
id: target_topic.clone(),
101108
}]

apps/framework-cli/src/infrastructure/olap/clickhouse/queries.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,7 @@ fn basic_field_type_to_string(
272272
.iter()
273273
.map(|enum_member| match &enum_member.value {
274274
EnumValue::Int(int) => format!("'{}' = {}", enum_member.name, int),
275+
// "Numbers are assigned starting from 1 by default."
275276
EnumValue::String(string) => format!("'{}'", string),
276277
})
277278
.collect::<Vec<String>>()

apps/framework-cli/src/utilities.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ pub mod git;
77
pub mod package_managers;
88
pub mod retry;
99
pub mod system;
10+
pub mod validate_passthrough;
1011

1112
pub trait PathExt {
1213
fn ext_is_supported_lang(&self) -> bool;

0 commit comments

Comments
 (0)